From c718ba67cf846fd5957861247496cd6e97187f4f Mon Sep 17 00:00:00 2001 From: _ <_@_> Date: Sun, 24 Sep 2023 20:40:28 -0500 Subject: [PATCH] :star: make a luac loader sufficient for what I've implemented --- src/loader.rs | 230 ++++++++++++++++++++++++++++++++++++++ src/main.rs | 59 ++++------ src/state.rs | 40 +++++-- src/tests.rs | 110 ++++++++++++++++-- test_vectors/closure.lua | 15 +++ test_vectors/closure.luac | Bin 0 -> 342 bytes test_vectors/fma.lua | 15 +++ 7 files changed, 412 insertions(+), 57 deletions(-) create mode 100644 src/loader.rs create mode 100644 test_vectors/closure.lua create mode 100644 test_vectors/closure.luac create mode 100644 test_vectors/fma.lua diff --git a/src/loader.rs b/src/loader.rs new file mode 100644 index 0000000..8703a67 --- /dev/null +++ b/src/loader.rs @@ -0,0 +1,230 @@ +use std::io::Read; + +use crate::state::{ + Block, + Instruction as Inst, +}; + +pub fn parse_inst (buf: [u8; 4]) -> Option +{ + let opcode = buf [0] & 0x7f; + + let a = (buf [0] >> 7) | ((buf [1] & 0x7f) << 1); + let b = buf [2]; + let c = buf [3]; + let bx = + (((buf [1] >> 7) as u32) << 0) | + ((buf [2] as u32) << 1) | + ((buf [3] as u32) << 9); + let bx = bx.try_into().ok ()?; + let sbx = bx - 65535; + let k = (buf [1] & 0x80) >> 7 == 1; + + Some (match opcode { + 0x00 => Inst::Move (a, b), + 0x01 => Inst::LoadI (a, sbx), + 0x03 => Inst::LoadK (a, bx), + 0x09 => Inst::GetUpVal (a, b), + 0x0b => Inst::GetTabUp (a, 0, 0), + 0x44 => Inst::Call (a, b, c), + 0x46 => Inst::Return (a, b, c, k), + 0x47 => Inst::Return0, + 0x48 => Inst::Return1 (a), + 0x4f => Inst::Closure (0, 0), + 0x51 => Inst::VarArgPrep (0), + _ => return None, + }) +} + +#[derive (Debug, PartialEq)] +struct Header { + inst_count: u8, +} + +fn parse_file_name (rdr: &mut R) -> Option { + let file_name_sz = { + let mut file_name_sz = [0u8; 1]; + rdr.read_exact (&mut file_name_sz).ok ()?; + usize::try_from (file_name_sz [0] - 0x80 - 1).ok ()? + }; + + { + let mut file_name = vec! [0u8; file_name_sz]; + rdr.read_exact (&mut file_name).ok ()?; + Some (String::from_utf8 (file_name).ok ()?) + } +} + +fn parse_header (buf: [u8; 6]) -> Option
{ + if buf [0] & 0x80 != 0x80 { + // Not a function header + return None; + } + + Some (Header { + inst_count: buf [5] - 0x80, + }) +} + +// I don't know what this really is, so I'm calling it a trailer for now +// It appears in luac files after the string table / constants table +// for each function. + +#[derive (Debug, PartialEq)] +struct Trailer { + upvalue_count: u8, +} + +fn parse_trailer (buf: [u8; 6]) -> Option { + Some (Trailer { + upvalue_count: buf [0] - 0x80, + }) +} + +pub fn parse_block (rdr: &mut R) -> Option +{ + let header = { + let mut buf = [0u8; 6]; + rdr.read_exact (&mut buf).ok ()?; + parse_header (buf)? + }; + + let mut instructions = Vec::with_capacity (header.inst_count as usize); + + for _ in 0..header.inst_count { + let mut buf = [0u8; 4]; + rdr.read_exact (&mut buf).ok ()?; + instructions.push (parse_inst (buf)?); + } + + let constant_count = { + let mut buf = [0u8; 1]; + rdr.read_exact (&mut buf).ok ()?; + buf [0] - 0x80 + }; + + let mut constants = Vec::with_capacity (constant_count as usize); + + for _ in 0..constant_count { + let mut buf = [0u8; 2]; + rdr.read_exact (&mut buf).ok ()?; + + let len = ((buf [0] as u32) << 8) + (buf [1] as u32) - 0x0481; + + let mut s = vec! [0u8; len.try_into().ok ()?]; + rdr.read_exact (&mut s).ok ()?; + + let s = String::from_utf8 (s).ok ()?; + constants.push (s.into ()); + } + + let trailer = { + let mut buf = [0u8; 6]; + rdr.read_exact (&mut buf).ok ()?; + + parse_trailer (buf)? + }; + + Some (Block { + constants, + instructions, + upvalue_count: trailer.upvalue_count as usize, + }) +} + +pub struct File { + pub file_name: String, + pub blocks: Vec , +} + +pub fn parse_file (rdr: &mut R) -> Option { + // Discard 32 bytes from the start of the file. + // This is magic number, version number, etc. + + let mut hdr = [0u8; 32]; + rdr.read_exact (&mut hdr).ok ()?; + + let file_name = parse_file_name (rdr)?; + + let mut blocks = vec![]; + + while let Some (block) = parse_block (rdr) { + blocks.push (block); + } + + Some (File { + file_name, + blocks, + }) +} + +#[cfg (test)] +mod tests { + #[test] + fn parse_inst () { + use super::Inst; + + for (input, expected) in [ + ([0x51, 0x00, 0x00, 0x00], Inst::VarArgPrep (0)), + ([0x4f, 0x00, 0x00, 0x00], Inst::Closure (0, 0)), + ([0x8b, 0x00, 0x00, 0x00], Inst::GetTabUp (1, 0, 0)), + ([0x03, 0x81, 0x00, 0x00], Inst::LoadK (2, 1)), + ([0xc4, 0x00, 0x02, 0x01], Inst::Call (1, 2, 1)), + ([0x80, 0x00, 0x00, 0x00], Inst::Move (1, 0)), + ([0xc4, 0x00, 0x01, 0x02], Inst::Call (1, 1, 2)), + ([0x0b, 0x01, 0x00, 0x00], Inst::GetTabUp (2, 0, 0)), + ([0x83, 0x01, 0x01, 0x00], Inst::LoadK (3, 2)), + ([0x44, 0x01, 0x02, 0x01], Inst::Call (2, 2, 1)), + ([0x0b, 0x01, 0x00, 0x00], Inst::GetTabUp (2, 0, 0)), + ([0x80, 0x01, 0x01, 0x00], Inst::Move (3, 1)), + ([0xc4, 0x01, 0x01, 0x00], Inst::Call (3, 1, 0)), + ([0x44, 0x01, 0x00, 0x01], Inst::Call (2, 0, 1)), + ([0x0b, 0x01, 0x00, 0x00], Inst::GetTabUp (2, 0, 0)), + ([0x83, 0x81, 0x01, 0x00], Inst::LoadK (3, 3)), + ([0x44, 0x01, 0x02, 0x01], Inst::Call (2, 2, 1)), + ([0x46, 0x01, 0x01, 0x01], Inst::Return (2, 1, 1, false)), + + ([0x01, 0x00, 0x02, 0x80], Inst::LoadI (0, 5)), + ([0xc6, 0x80, 0x02, 0x00], Inst::Return (1, 2, 0, true)), + + ([0x09, 0x00, 0x01, 0x00], Inst::GetUpVal (0, 1)), + ([0x48, 0x00, 0x02, 0x00], Inst::Return1 (0)), + ([0x47, 0x00, 0x01, 0x00], Inst::Return0), + ] { + let actual = super::parse_inst (input).unwrap (); + assert_eq!(actual, expected); + } + } + + #[test] + fn parse_header () { + for (input, expected) in [ + // Bytes 0 and 1 are line and column for debugging + // Byte 4 is slot count + // Byte 5 is instruction count + + ([0x80, 0x80, 0x00, 0x01, 0x04, 0x92], (18,)), + ([0x81, 0x89, 0x00, 0x00, 0x03, 0x87], (7,)), + ([0x85, 0x88, 0x00, 0x00, 0x02, 0x86], (6,)), + ] { + let actual = super::parse_header (input).unwrap (); + assert_eq! (actual, super::Header { + inst_count: expected.0, + }); + } + } + + #[test] + fn parse_trailer () { + for (input, expected) in [ + ([0x81, 0x01, 0x00, 0x00, 0x81, 0x80], (1,)), + ([0x81, 0x00, 0x00, 0x00, 0x81, 0x80], (1,)), + ([0x82, 0x00, 0x00, 0x00, 0x01, 0x00], (2,)), + ] { + let actual = super::parse_trailer (input).unwrap (); + assert_eq! (actual, super::Trailer { + upvalue_count: expected.0, + }); + } + } +} diff --git a/src/main.rs b/src/main.rs index f75dc63..ccf896e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,4 @@ +mod loader; mod state; #[cfg (test)] @@ -11,65 +12,43 @@ fn main() { State, }; - /* - local function bool_to_x (b) - if b then - return 99 - else - return 98 - end - end - - local x = bool_to_x (not not arg [1]) - print (x) - return x - */ - let chunk = Chunk { blocks: vec! [ Block { instructions: vec! [ Inst::VarArgPrep (0), Inst::Closure (0, 0), - Inst::GetTabUp (1, 0, 0), - Inst::Move (2, 0), - Inst::LoadFalse (3), - Inst::Call (2, 2, 0), - Inst::Call (1, 0, 1), - Inst::GetTabUp (1, 0, 0), - Inst::Move (2, 0), - Inst::LoadTrue (3), - Inst::Call (2, 2, 0), - Inst::Call (1, 0, 1), Inst::Move (1, 0), - Inst::GetTabUp (2, 0, 1), - Inst::GetI (2, 2, 1), - Inst::Not (2, 2), - Inst::Not (2, 2), - Inst::Call (1, 2, 2), + Inst::Call (1, 1, 2), Inst::GetTabUp (2, 0, 0), Inst::Move (3, 1), - Inst::Call (2, 2, 1), - Inst::Return (1, 2, 1), - Inst::Return (2, 1, 1), + Inst::Call (3, 1, 0), + Inst::Call (2, 0, 1), + Inst::Return (2, 1, 1, false), ], constants: vec! [ "print".into (), - "arg".into (), ], + upvalue_count: 1, }, Block { instructions: vec! [ - Inst::Test (0, 0), - Inst::Jmp (3), - Inst::LoadI (1, 99), - Inst::Return1 (1), - Inst::Jmp (2), - Inst::LoadI (1, 98), - Inst::Return1 (1), + Inst::LoadI (0, 5), + Inst::Closure (1, 0), + Inst::Return (1, 2, 0, true), // k? + Inst::Return (1, 1, 0, true), // k? + ], + constants: vec! [], + upvalue_count: 0, + }, + Block { + instructions: vec! [ + Inst::GetUpVal (0, 0), + Inst::Return1 (0), Inst::Return0, ], constants: vec! [], + upvalue_count: 1, }, ], }; diff --git a/src/state.rs b/src/state.rs index 0313bdb..590c420 100644 --- a/src/state.rs +++ b/src/state.rs @@ -1,6 +1,6 @@ use std::collections::BTreeMap; -#[derive (Debug)] +#[derive (Debug, PartialEq)] pub enum Instruction { Add (u8, u8, u8), @@ -10,11 +10,13 @@ pub enum Instruction { // Equals Constant? EqK (u8, u8, u8), + // Get Immediate? + GetI (u8, u8, u8), + // Get Table, Upvalue GetTabUp (u8, u8, u8), - // Get Immediate? - GetI (u8, u8, u8), + GetUpVal (u8, u8), // Jump Jmp (i32), @@ -34,16 +36,20 @@ pub enum Instruction { Move (u8, u8), + Mul (u8, u8, u8), + Not (u8, u8), - // (A, B, _C) Return B - 1 registers starting with A - Return (u8, u8, u8), + // (A, B, _C, k) Return B - 1 registers starting with A + Return (u8, u8, u8, bool), Return0, // Return just one register Return1 (u8), + TailCall (u8, u8, u8), + Test (u8, i32), VarArgPrep (i32), @@ -117,6 +123,7 @@ impl Value { pub struct Block { pub instructions: Vec , pub constants: Vec , + pub upvalue_count: usize, } pub struct Chunk { @@ -224,7 +231,7 @@ impl State { match v_a { Value::BogusClosure (idx) => { let block_idx = frame.block_idx; - let target_block = idx + 1; + let target_block = *idx; let current_frame = &stack [stack.len () - 1]; @@ -259,7 +266,7 @@ impl State { let a = usize::try_from (*a).unwrap (); let b = usize::try_from (*b).unwrap (); - r [a] = Value::BogusClosure (b); + r [a] = Value::BogusClosure (b + frame.block_idx + 1); }, Instruction::EqK (a, b, c_k) => { let a = usize::try_from (*a).unwrap (); @@ -352,11 +359,26 @@ impl State { r [a] = Value::Boolean (! r [b].is_truthy()); } - Instruction::Return (a, b, _c) => { + Instruction::Return (a, b, _c, _k) => { let a = usize::try_from (*a).unwrap (); let b = usize::try_from (*b).unwrap (); - return r [a..(a + b - 1)].to_vec(); + let popped_frame = stack.pop ().unwrap (); + + if self.debug_print { + let old_block = popped_frame.block_idx; + let old_pc = popped_frame.program_counter; + println! ("Inst {old_block}:{old_pc} returns"); + let stack_depth = stack.len (); + println! ("stack_depth: {stack_depth}"); + } + + if let Some (new_frame) = stack.last() { + next_pc = new_frame.program_counter; + } + else { + return r [a..(a + b - 1)].to_vec(); + } }, Instruction::Return1 (a) => { let a = usize::try_from (*a).unwrap (); diff --git a/src/tests.rs b/src/tests.rs index 25a2303..a27ec15 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -46,13 +46,14 @@ fn bools () { Inst::GetTabUp (2, 0, 1), Inst::Move (3, 1), Inst::Call (2, 2, 1), - Inst::Return (1, 2, 1), - Inst::Return (2, 1, 1), + Inst::Return (1, 2, 1, false), + Inst::Return (2, 1, 1, false), ], constants: vec! [ "arg".into (), "print".into (), ], + upvalue_count: 1, }, Block { instructions: vec! [ @@ -66,6 +67,7 @@ fn bools () { Inst::Return0, ], constants: vec! [], + upvalue_count: 0, }, ], }; @@ -104,13 +106,14 @@ fn floats () { Inst::GetTabUp (3, 0, 1), Inst::Move (4, 2), Inst::Call (3, 2, 1), - Inst::Return (2, 2, 1), - Inst::Return (3, 1, 1), + Inst::Return (2, 2, 1, false), + Inst::Return (3, 1, 1, false), ], constants: vec! [ 0.5.into (), "print".into (), ], + upvalue_count: 1, }; let chunk = Chunk { blocks: vec! [block], @@ -128,6 +131,86 @@ fn floats () { } } +#[test] +fn fma () { + /* + + */ + + let chunk = Chunk { + blocks: vec! [ + Block { + instructions: vec! [ + Inst::VarArgPrep (0), + Inst::Closure (0, 0), + Inst::Closure (1, 1), + Inst::Closure (2, 2), + Inst::Move (3, 2), + Inst::LoadI (4, 10), + Inst::LoadI (5, 11), + Inst::LoadI (6, 12), + Inst::Call (3, 4, 2), + Inst::GetTabUp (4, 0, 0), + Inst::Move (5, 3), + Inst::Call (4, 2, 1), + Inst::Return (3, 2, 1, false), // k? + Inst::Return (3, 2, 1, false), // k? + ], + constants: vec! [ + "print".into (), + ], + upvalue_count: 1, + }, + Block { + instructions: vec! [ + Inst::Add (2, 0, 1), + Inst::MmBin (0, 1, 6), + Inst::Return1 (2), + Inst::Return0, + ], + constants: vec! [], + upvalue_count: 0, + }, + Block { + instructions: vec! [ + Inst::Mul (2, 0, 1), + Inst::MmBin (0, 1, 8), + Inst::Return1 (2), + Inst::Return0, + ], + constants: vec! [], + upvalue_count: 0, + }, + Block { + instructions: vec! [ + Inst::GetUpVal (3, 0), // add + Inst::GetUpVal (4, 1), // mul + Inst::Move (5, 0), + Inst::Move (6, 1), + Inst::Call (4, 3, 2), + Inst::Move (5, 2), + Inst::TailCall (3, 3, 0), + Inst::Return (3, 0, 0, false), + Inst::Return0, + ], + constants: vec! [], + upvalue_count: 2, + }, + ], + }; + + for (arg, expected) in [ + (vec! ["_exe_name"], vec! [122.into ()]), + (vec! ["_exe_name"], vec! [122.into ()]), + ] { + let mut vm = State::default (); + let upvalues = State::upvalues_from_args (arg.into_iter ().map (|s| s.to_string ())); + let actual = vm.execute_chunk (&chunk, &upvalues); + + assert_eq! (actual, expected); + } +} + #[test] fn is_93 () { /* @@ -151,14 +234,14 @@ fn is_93 () { Inst::LoadK (2, 3), Inst::Call (1, 2, 1), Inst::LoadI (1, 0), - Inst::Return (1, 2, 1), + Inst::Return (1, 2, 1, false), Inst::Jmp (5), Inst::GetTabUp (1, 0, 2), Inst::LoadK (2, 4), Inst::Call (1, 2, 1), Inst::LoadI (1, 1), - Inst::Return (1, 2, 1), - Inst::Return (1, 1, 1), + Inst::Return (1, 2, 1, false), + Inst::Return (1, 1, 1, false), ], constants: vec! [ "arg", @@ -167,6 +250,7 @@ fn is_93 () { "it's 93", "it's not 93", ].into_iter ().map (Value::from).collect (), + upvalue_count: 1, }; let chunk = Chunk { blocks: vec! [block], @@ -183,4 +267,14 @@ fn is_93 () { assert_eq! (actual, expected); } -} \ No newline at end of file +} + +#[test] +fn loader () { + let bytecode = include_bytes! ("../test_vectors/closure.luac"); + let mut rdr = std::io::Cursor::new (bytecode); + let file = crate::loader::parse_file (&mut rdr).unwrap (); + + assert_eq! (file.file_name, "@test_vectors/closure.lua"); + assert_eq! (file.blocks.len (), 3); +} diff --git a/test_vectors/closure.lua b/test_vectors/closure.lua new file mode 100644 index 0000000..c1917de --- /dev/null +++ b/test_vectors/closure.lua @@ -0,0 +1,15 @@ +local function make_closure () + local x = 5 + print "B" + + return function () + print "D" + return x + end +end + +print "A" +local f = make_closure () +print "C" +print (f ()) +print "E" diff --git a/test_vectors/closure.luac b/test_vectors/closure.luac new file mode 100644 index 0000000000000000000000000000000000000000..421ab9eaf1d7fc507b91b17a18ee9622b0e80ecb GIT binary patch literal 342 zcmYjN%SyyR5Ud_Yh`aa!>JNC*UsyG(9z+pjuOTcE#MPBNG{>Hfn<(NX-u#~6)nBqR zE3N}QUEN(ZUCho|5no~JV{@=Qut88xusXX>Bl;ZWNtQ>C$uys*+5U7k&x$nJn-wuL z$hMbCztDKoSaU!JL#((1nIQoc5kfN5V^-Yx8osXFa&}Ba&$fO@Z|-v|2Ud=(jCBUb z?p2>{2!{VJ_|^qqY%~b+Cp^^kDp|Y$rXDJ;7zowR1_-AxIMKYKhoY3;Rjn6+m}L=- p&nHqg>JjVOaZ!w|@e*iM|OYr~z literal 0 HcmV?d00001 diff --git a/test_vectors/fma.lua b/test_vectors/fma.lua new file mode 100644 index 0000000..7525a54 --- /dev/null +++ b/test_vectors/fma.lua @@ -0,0 +1,15 @@ +local function add (a, b) + return a + b +end + +local function mul (a, b) + return a * b +end + +local function fma (a, b, c) + return add (mul (a, b), c) +end + +local x = fma (10, 11, 12) +print (x) +return x