From 0d88653c21c03f1f7b4c62fb914f5d95d3c7459e Mon Sep 17 00:00:00 2001 From: _ <_@_> Date: Mon, 25 Sep 2023 03:05:00 -0500 Subject: [PATCH] :star: the nested functions thing is working, though I wish I didn't have to use recursion --- src/loader.rs | 219 ++++++++++++++++++++---------------- src/state.rs | 5 +- src/tests.rs | 3 - test_vectors/functions.lua | 21 ++++ test_vectors/functions.luac | Bin 0 -> 422 bytes test_vectors/functions.txt | 84 ++++++++++++++ 6 files changed, 229 insertions(+), 103 deletions(-) create mode 100644 test_vectors/functions.lua create mode 100644 test_vectors/functions.luac create mode 100644 test_vectors/functions.txt diff --git a/src/loader.rs b/src/loader.rs index ff5ce88..b399efa 100644 --- a/src/loader.rs +++ b/src/loader.rs @@ -29,12 +29,14 @@ pub fn parse_inst (buf: [u8; 4]) -> Option 0x09 => Inst::GetUpVal (a, b), 0x0b => Inst::GetTabUp (a, b, c), 0x0d => Inst::GetI (a, b, c), + 0x0f => Inst::SetTabUp (a, b, c), 0x22 => Inst::Add (a, b, c), 0x24 => Inst::Mul (a, b, c), 0x2e => Inst::MmBin (a, b, c), 0x3c => Inst::EqK (a, b, c), 0x38 => Inst::Jmp (s_j), 0x44 => Inst::Call (a, b, c), + 0x45 => Inst::TailCall (a, b, c, k), 0x46 => Inst::Return (a, b, c, k), 0x47 => Inst::Return0, 0x48 => Inst::Return1 (a), @@ -49,95 +51,122 @@ struct Header { inst_count: u8, } -fn parse_file_name (rdr: &mut R) -> Option { - let file_name_sz = { - let mut file_name_sz = [0u8; 1]; - rdr.read_exact (&mut file_name_sz).ok ()?; - usize::try_from (file_name_sz [0] - 0x80 - 1).ok ()? +// loadString in PUC Lua. Doesn't work with long strings yet. + +fn parse_string (rdr: &mut R) -> Option { + let len = match parse_int (rdr)? { + 0 => 0, + x => x - 1, }; - { - let mut file_name = vec! [0u8; file_name_sz]; - rdr.read_exact (&mut file_name).ok ()?; - Some (String::from_utf8 (file_name).ok ()?) - } + let mut buf = vec! [0u8; len as usize]; + rdr.read_exact (&mut buf).ok ()?; + Some (String::from_utf8 (buf).ok ()?) } -fn parse_header (buf: [u8; 6]) -> Option
{ - if buf [0] & 0x80 != 0x80 { - // Not a function header - return None; - } - - Some (Header { - inst_count: buf [5] - 0x80, - }) -} +// loadByte in PUC Lua -// I don't know what this really is, so I'm calling it a trailer for now -// It appears in luac files after the string table / constants table -// for each function. - -#[derive (Debug, PartialEq)] -struct Trailer { - upvalue_count: u8, -} - -fn parse_trailer (buf: [u8; 6]) -> Option { - Some (Trailer { - upvalue_count: buf [0] - 0x80, - }) -} - -pub fn parse_block (rdr: &mut R) -> Option +fn parse_byte (rdr: &mut R) -> Option { - let header = { - let mut buf = [0u8; 6]; - rdr.read_exact (&mut buf).ok ()?; - parse_header (buf)? - }; + let mut buf = [0u8; 1]; + rdr.read_exact (&mut buf).ok ()?; + Some (buf [0]) +} + +fn parse_int (rdr: &mut R) -> Option +{ + Some ((parse_byte (rdr)? - 0x80) as u32) +} + +// I'm doing this recursively so it's easy to match with the PUC Lua +// code, but I don't like recursion in general, and I don't know +// why PUC wrote it that way. + +pub fn parse_block (rdr: &mut R, blocks: &mut Vec ) +-> Option <()> +{ + // Ignore things I haven't implemented yet - let mut instructions = Vec::with_capacity (header.inst_count as usize); + parse_string (rdr)?; // function name + parse_int (rdr)?; // start line in source code + parse_int (rdr)?; // last line in source code + parse_byte (rdr)?; // num params + parse_byte (rdr)?; // is_vararg + parse_byte (rdr)?; // maxstacksize, might be same as num slots? - for _ in 0..header.inst_count { + let inst_count = parse_int (rdr)?; + let mut instructions = Vec::with_capacity (inst_count as usize); + + for _ in 0..inst_count { let mut buf = [0u8; 4]; rdr.read_exact (&mut buf).ok ()?; instructions.push (parse_inst (buf).expect (&format! ("{buf:?}"))); } - let constant_count = { - let mut buf = [0u8; 1]; - rdr.read_exact (&mut buf).ok ()?; - buf [0] - 0x80 - }; + let constant_count = parse_int (rdr)?; let mut constants = Vec::with_capacity (constant_count as usize); for _ in 0..constant_count { - let mut buf = [0u8; 2]; - rdr.read_exact (&mut buf).ok ()?; + let const_type = parse_byte (rdr)?; + assert_eq! (const_type, 0x04); - let len = ((buf [0] as u32) << 8) + (buf [1] as u32) - 0x0481; - - let mut s = vec! [0u8; len.try_into().ok ()?]; - rdr.read_exact (&mut s).ok ()?; - - let s = String::from_utf8 (s).ok ()?; + let s = parse_string (rdr)?; constants.push (s.into ()); } - let trailer = { - let mut buf = [0u8; 6]; - rdr.read_exact (&mut buf).ok ()?; - - parse_trailer (buf)? - }; + let upvalue_count = parse_int (rdr)? as usize; - Some (Block { + for _ in 0..upvalue_count { + // Just ignore these + + for _ in 0..3 { + parse_byte (rdr)?; + } + } + + blocks.push (Block { constants, instructions, - upvalue_count: trailer.upvalue_count as usize, - }) + upvalue_count, + }); + + // Recursion + + // Subfunctions. PUC calls them protos. + let protos_count = parse_int (rdr)?; + for _ in 0..protos_count { + parse_block (rdr, blocks)?; + } + + // Skip over debug stuff + + // I think this is delta line numbers, e.g. most instructions + // have 0, but when you go to a new source line it's 1+. + + let lineinfo_count = parse_int (rdr)?; + for _ in 0..lineinfo_count { + parse_byte (rdr)?; + } + + // Absolute line info, didn't see that in my test files + + let abslineinfo_count = parse_int (rdr)?; + assert_eq! (abslineinfo_count, 0); + + let local_count = parse_int (rdr)?; + for _ in 0..local_count { + parse_string(rdr)?; + parse_int (rdr)?; + parse_int (rdr)?; + } + + let upvalue_count = parse_int (rdr)?; + for _ in 0..upvalue_count { + parse_string (rdr)?; + } + + Some (()) } @@ -148,16 +177,13 @@ pub fn parse_chunk (rdr: &mut R) -> Option { let mut hdr = [0u8; 32]; rdr.read_exact (&mut hdr).ok ()?; - let file_name = parse_file_name (rdr)?; - let mut blocks = vec![]; - while let Some (block) = parse_block (rdr) { - blocks.push (block); + while let Some (_) = parse_block (rdr, &mut blocks) { + // } Some (Chunk { - file_name, blocks, }) } @@ -206,36 +232,33 @@ mod tests { } #[test] - fn parse_header () { - for (input, expected) in [ - // Bytes 0 and 1 are first line and last line for debugging - // Byte 2 is numparams - // Byte 3 is is_vararg - // Byte 4 is slot count / max stack size - // Byte 5 is instruction count + fn parse_nested_functions () { + use std::io::Read; + + let bytecode = include_bytes! ("../test_vectors/functions.luac"); + + { + let mut rdr = std::io::Cursor::new (bytecode.clone ()); - ([0x80, 0x80, 0x00, 0x01, 0x04, 0x92], (18,)), - ([0x81, 0x89, 0x00, 0x00, 0x03, 0x87], (7,)), - ([0x85, 0x88, 0x00, 0x00, 0x02, 0x86], (6,)), - ] { - let actual = super::parse_header (input).unwrap (); - assert_eq! (actual, super::Header { - inst_count: expected.0, - }); + let mut buf = [0u8; 32]; + rdr.read_exact (&mut buf).unwrap (); + + let mut blocks = vec! []; + + super::parse_block (&mut rdr, &mut blocks).unwrap (); + + assert_eq! (blocks [0].instructions.len (), 15); + assert_eq! (blocks [1].instructions.len (), 6); + assert_eq! (blocks [2].instructions.len (), 4); + assert_eq! (blocks [3].instructions.len (), 4); + assert_eq! (blocks [4].instructions.len (), 4); } - } - - #[test] - fn parse_trailer () { - for (input, expected) in [ - ([0x81, 0x01, 0x00, 0x00, 0x81, 0x80], (1,)), - ([0x81, 0x00, 0x00, 0x00, 0x81, 0x80], (1,)), - ([0x82, 0x00, 0x00, 0x00, 0x01, 0x00], (2,)), - ] { - let actual = super::parse_trailer (input).unwrap (); - assert_eq! (actual, super::Trailer { - upvalue_count: expected.0, - }); + + if false { + let mut rdr = std::io::Cursor::new (bytecode.clone ()); + let file = crate::loader::parse_chunk (&mut rdr).unwrap (); + + assert_eq! (file.blocks.len (), 5); } } } diff --git a/src/state.rs b/src/state.rs index 3b98641..86cefa4 100644 --- a/src/state.rs +++ b/src/state.rs @@ -48,7 +48,9 @@ pub enum Instruction { // Return just one register Return1 (u8), - TailCall (u8, u8, u8), + SetTabUp (u8, u8, u8), + + TailCall (u8, u8, u8, bool), Test (u8, i32), @@ -136,7 +138,6 @@ pub struct Block { } pub struct Chunk { - pub file_name: String, pub blocks: Vec , } diff --git a/src/tests.rs b/src/tests.rs index 7371af3..b3e13ff 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -3,7 +3,6 @@ use crate::state::{ Chunk, Instruction as Inst, State, - Value, }; #[test] @@ -23,7 +22,6 @@ fn bools () { */ let chunk = Chunk { - file_name: "".to_string (), blocks: vec! [ Block { instructions: vec! [ @@ -136,7 +134,6 @@ fn floats () { upvalue_count: 1, }; let chunk = Chunk { - file_name: "".to_string (), blocks: vec! [block], }; diff --git a/test_vectors/functions.lua b/test_vectors/functions.lua new file mode 100644 index 0000000..84ced25 --- /dev/null +++ b/test_vectors/functions.lua @@ -0,0 +1,21 @@ +print "p_1" + +function aa () + print "p_2" + + function bb () + print "p_3" + end +end + +print "p_4" + +local function cc () + print "p_5" +end + +local dd = function () + print "p_6" +end + +print "p_7" diff --git a/test_vectors/functions.luac b/test_vectors/functions.luac new file mode 100644 index 0000000000000000000000000000000000000000..4d2b9913d496262a6d0f46554ad28db20cb5544f GIT binary patch literal 422 zcmb34DNPJvketlRCB?YnU;>%K#OY)0~_0vl8l1no4^NRIy zN)sCz7#Lal1A!X2fw;MWfx(4=iP0a(=4W7niZMdOfO6*<7#X-3f$A8U7+e^c7~L2d z8CzM}3W_rGN?2M7;tg4v6B9uU6A)qE2vXY6+{wVe)P~J?cc5|2NZO59nv;?mfp#}G zv~>VAwIFK(DQ-eiYz$J^&;qoAv7w={HQv=PtPRLv`Ns_7HuUxrGQ (15 instructions at 0x559bfdbb1c90) +0+ params, 4 slots, 1 upvalue, 2 locals, 5 constants, 3 functions + 1 [1] VARARGPREP 0 + 2 [1] GETTABUP 0 0 0 ; _ENV "print" + 3 [1] LOADK 1 1 ; "p_1" + 4 [1] CALL 0 2 1 ; 1 in 0 out + 5 [9] CLOSURE 0 0 ; 0x559bfdbb1f40 + 6 [3] SETTABUP 0 2 0 ; _ENV "aa" + 7 [11] GETTABUP 0 0 0 ; _ENV "print" + 8 [11] LOADK 1 3 ; "p_4" + 9 [11] CALL 0 2 1 ; 1 in 0 out + 10 [15] CLOSURE 0 1 ; 0x559bfdbb2240 + 11 [19] CLOSURE 1 2 ; 0x559bfdbb2390 + 12 [21] GETTABUP 2 0 0 ; _ENV "print" + 13 [21] LOADK 3 4 ; "p_7" + 14 [21] CALL 2 2 1 ; 1 in 0 out + 15 [21] RETURN 2 1 1 ; 0 out +constants (5) for 0x559bfdbb1c90: + 0 S "print" + 1 S "p_1" + 2 S "aa" + 3 S "p_4" + 4 S "p_7" +locals (2) for 0x559bfdbb1c90: + 0 cc 11 16 + 1 dd 12 16 +upvalues (1) for 0x559bfdbb1c90: + 0 _ENV 1 0 + +function (6 instructions at 0x559bfdbb1f40) +0 params, 2 slots, 1 upvalue, 0 locals, 3 constants, 1 function + 1 [4] GETTABUP 0 0 0 ; _ENV "print" + 2 [4] LOADK 1 1 ; "p_2" + 3 [4] CALL 0 2 1 ; 1 in 0 out + 4 [8] CLOSURE 0 0 ; 0x559bfdbb20d0 + 5 [6] SETTABUP 0 2 0 ; _ENV "bb" + 6 [9] RETURN0 +constants (3) for 0x559bfdbb1f40: + 0 S "print" + 1 S "p_2" + 2 S "bb" +locals (0) for 0x559bfdbb1f40: +upvalues (1) for 0x559bfdbb1f40: + 0 _ENV 0 0 + +function (4 instructions at 0x559bfdbb20d0) +0 params, 2 slots, 1 upvalue, 0 locals, 2 constants, 0 functions + 1 [7] GETTABUP 0 0 0 ; _ENV "print" + 2 [7] LOADK 1 1 ; "p_3" + 3 [7] CALL 0 2 1 ; 1 in 0 out + 4 [8] RETURN0 +constants (2) for 0x559bfdbb20d0: + 0 S "print" + 1 S "p_3" +locals (0) for 0x559bfdbb20d0: +upvalues (1) for 0x559bfdbb20d0: + 0 _ENV 0 0 + +function (4 instructions at 0x559bfdbb2240) +0 params, 2 slots, 1 upvalue, 0 locals, 2 constants, 0 functions + 1 [14] GETTABUP 0 0 0 ; _ENV "print" + 2 [14] LOADK 1 1 ; "p_5" + 3 [14] CALL 0 2 1 ; 1 in 0 out + 4 [15] RETURN0 +constants (2) for 0x559bfdbb2240: + 0 S "print" + 1 S "p_5" +locals (0) for 0x559bfdbb2240: +upvalues (1) for 0x559bfdbb2240: + 0 _ENV 0 0 + +function (4 instructions at 0x559bfdbb2390) +0 params, 2 slots, 1 upvalue, 0 locals, 2 constants, 0 functions + 1 [18] GETTABUP 0 0 0 ; _ENV "print" + 2 [18] LOADK 1 1 ; "p_6" + 3 [18] CALL 0 2 1 ; 1 in 0 out + 4 [19] RETURN0 +constants (2) for 0x559bfdbb2390: + 0 S "print" + 1 S "p_6" +locals (0) for 0x559bfdbb2390: +upvalues (1) for 0x559bfdbb2390: + 0 _ENV 0 0