From 96c82c27c8b2723268c99a689ede42e9fe613b8b Mon Sep 17 00:00:00 2001 From: _ <_@_> Date: Thu, 28 Sep 2023 00:27:05 -0500 Subject: [PATCH] :bug: bug: fix long string loading and int adding Long strings didn't work before because I hadn't implemented Lua's varint decoding. It's an easy translation from the original C. Int adding previously created a float because I always coerced to float. Also an easy fix. --- src/loader.rs | 113 +++++++++++++++++++++++++++++----------- src/main.rs | 1 - src/state.rs | 14 +++-- src/tests.rs | 19 ++----- src/value.rs | 4 ++ test_vectors/varint.lua | 2 + 6 files changed, 104 insertions(+), 49 deletions(-) create mode 100644 test_vectors/varint.lua diff --git a/src/loader.rs b/src/loader.rs index ba67bbc..531dac7 100644 --- a/src/loader.rs +++ b/src/loader.rs @@ -135,15 +135,42 @@ struct Header { inst_count: u8, } +/// loadUnsigned in PUC Lua +/// Decodes a varint format that has 7 bits per bytes and the 8th bit +/// is set to 1 on the last byte. + +fn load_unsigned (rdr: &mut R, limit: usize) -> usize { + // Shrink the limit so we can tell when we pass it + let limit = limit >> 7; + + let mut x = 0; + for _ in 0..32 { + let b = parse_byte (rdr).unwrap (); + if x >= limit { + panic! ("integer overflow {x} >= {limit}"); + } + x = (x << 7) | (b as usize & 0x7f); + if (b & 0x80) != 0 { + break; + } + } + + x +} + +fn load_size (rdr: &mut R) -> usize { + load_unsigned (rdr, usize::MAX) +} + // loadString in PUC Lua. Doesn't work with long strings yet. fn parse_string (rdr: &mut R) -> Option { - let len = match parse_int (rdr)? { - 0 => 0, + let len = match load_size (rdr) { + 0 => return Some (String::new ()), x => x - 1, }; - let mut buf = vec! [0u8; len as usize]; + let mut buf = vec! [0u8; len]; rdr.read_exact (&mut buf).ok ()?; Some (String::from_utf8 (buf).ok ()?) } @@ -186,22 +213,22 @@ pub fn parse_block (rdr: &mut R, blocks: &mut Vec ) use crate::value::Value; parse_string (rdr)?; // function name - parse_int (rdr)?; // start line in source code - parse_int (rdr)?; // last line in source code - parse_byte (rdr)?; // num params - parse_byte (rdr)?; // is_vararg - parse_byte (rdr)?; // maxstacksize, might be same as num slots? + parse_int (rdr).unwrap (); // start line in source code + parse_int (rdr).unwrap (); // last line in source code + parse_byte (rdr).unwrap (); // num params + parse_byte (rdr).unwrap (); // is_vararg + parse_byte (rdr).unwrap (); // maxstacksize, might be same as num slots? - let inst_count = parse_int (rdr)?; + let inst_count = parse_int (rdr).unwrap (); let mut instructions = Vec::with_capacity (inst_count as usize); for _ in 0..inst_count { let mut buf = [0u8; 4]; - rdr.read_exact (&mut buf).ok ()?; + rdr.read_exact (&mut buf).ok ().unwrap (); instructions.push (parse_inst (buf).expect (&format! ("{buf:?}"))); } - let constant_count = parse_int (rdr)?; + let constant_count = parse_int (rdr).unwrap (); let mut constants = Vec::with_capacity (constant_count as usize); @@ -210,22 +237,26 @@ pub fn parse_block (rdr: &mut R, blocks: &mut Vec ) let const_type = parse_byte (rdr)?; let val = match const_type { - 3 => Value::from (parse_i64 (rdr)?), - 4 => parse_string (rdr)?.into (), - 19 => Value::from (parse_float (rdr)?), + 3 => parse_i64 (rdr).unwrap ().into (), + 4 => parse_string (rdr).unwrap ().into (), + + // For LUA_TNUMBER, PUC Lua uses a macro that adds 16 to signify a float + 19 => parse_float (rdr).unwrap ().into (), + // 0x10 + 4 = long string + 20 => parse_string (rdr).unwrap ().into (), x => panic! ("Constant {} has type {}", i, x), }; constants.push (val); } - let upvalue_count = parse_int (rdr)? as usize; + let upvalue_count = parse_int (rdr).unwrap () as usize; for _ in 0..upvalue_count { // Just ignore these for _ in 0..3 { - parse_byte (rdr)?; + parse_byte (rdr).unwrap (); } } @@ -238,9 +269,9 @@ pub fn parse_block (rdr: &mut R, blocks: &mut Vec ) // Recursion // Subfunctions. PUC calls them protos. - let protos_count = parse_int (rdr)?; + let protos_count = parse_int (rdr).unwrap (); for _ in 0..protos_count { - parse_block (rdr, blocks)?; + parse_block (rdr, blocks).unwrap (); } // Skip over debug stuff @@ -248,26 +279,26 @@ pub fn parse_block (rdr: &mut R, blocks: &mut Vec ) // I think this is delta line numbers, e.g. most instructions // have 0, but when you go to a new source line it's 1+. - let lineinfo_count = parse_int (rdr)?; + let lineinfo_count = parse_int (rdr).unwrap (); for _ in 0..lineinfo_count { - parse_byte (rdr)?; + parse_byte (rdr).unwrap (); } // Absolute line info, didn't see that in my test files - let abslineinfo_count = parse_int (rdr)?; + let abslineinfo_count = parse_int (rdr).unwrap (); assert_eq! (abslineinfo_count, 0); - let local_count = parse_int (rdr)?; + let local_count = parse_int (rdr).unwrap (); for _ in 0..local_count { - parse_string(rdr)?; - parse_int (rdr)?; - parse_int (rdr)?; + parse_string(rdr).unwrap (); + parse_int (rdr).unwrap (); + parse_int (rdr).unwrap (); } - let upvalue_count = parse_int (rdr)?; + let upvalue_count = parse_int (rdr).unwrap (); for _ in 0..upvalue_count { - parse_string (rdr)?; + parse_string (rdr).unwrap (); } Some (()) @@ -281,11 +312,11 @@ pub fn parse_chunk (rdr: &mut R) -> Option { let mut hdr = [0u8; 32]; rdr.read_exact (&mut hdr).ok ()?; + assert_eq! (&hdr [0..8], &[0x1b, 0x4c, 0x75, 0x61, 0x54, 0x00, 0x19, 0x93], "This isn't a Lua 5.4 bytecode file"); + let mut blocks = vec![]; - while let Some (_) = parse_block (rdr, &mut blocks) { - // - } + parse_block (rdr, &mut blocks).unwrap (); Some (Chunk { blocks, @@ -299,6 +330,28 @@ pub fn parse_chunk_from_bytes (b: &[u8]) -> Option { #[cfg (test)] mod tests { + #[test] + fn load_size () { + let f = |input: &[u8]| { + let mut cursor = std::io::Cursor::new (input); + super::load_size (&mut cursor) + }; + + assert_eq! (f (&[0x80]), 0); + assert_eq! (f (&[0x81]), 1); + assert_eq! (f (&[0x82]), 2); + + assert_eq! (f (&[0xff]), 127); + + assert_eq! (f (&[0x01, 0x80]), 128); + assert_eq! (f (&[0x01, 0x81]), 129); + assert_eq! (f (&[0x02, 0x80]), 256); + + assert_eq! (f (&[0x7f, 0xfe]), 16382); + assert_eq! (f (&[0x7f, 0xff]), 16383); + assert_eq! (f (&[0x01, 0x00, 0x80]), 16384); + } + #[test] fn parse_inst () { use super::Inst; diff --git a/src/main.rs b/src/main.rs index 6291171..5bd590f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -30,7 +30,6 @@ fn main () { let chunk = if let Some (script) = script { let bytecode = loader::compile_bytecode_from_file (&script); - dbg! (&bytecode [0..48]); let mut rdr = std::io::Cursor::new (bytecode); loader::parse_chunk (&mut rdr).unwrap () } diff --git a/src/state.rs b/src/state.rs index 7f953ce..a96fcae 100644 --- a/src/state.rs +++ b/src/state.rs @@ -157,10 +157,18 @@ impl State { match instruction { Instruction::Add (a, b, c) => { - let v_b = self.reg (*b).as_float ().unwrap (); - let v_c = self.reg (*c).as_float ().unwrap (); + let v_b = self.reg (*b); + let v_c = self.reg (*c); - *self.reg_mut (*a) = Value::from (v_b + v_c); + let sum = if let (Some (v_b), Some (v_c)) = (v_b.as_int (), v_c.as_int ()) + { + Value::from (v_b + v_c) + } + else { + Value::from (v_b.as_float ().unwrap () + v_c.as_float ().unwrap ()) + }; + + *self.reg_mut (*a) = sum; }, Instruction::Call (a, b, _c) => { let b = usize::from (*b); diff --git a/src/tests.rs b/src/tests.rs index 6c81843..1ee7c47 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -128,22 +128,11 @@ fn bools () { #[test] fn closure () { - let bytecode = include_bytes! ("../test_vectors/closure.luac"); - let mut rdr = std::io::Cursor::new (bytecode); - let file = crate::loader::parse_chunk (&mut rdr).unwrap (); + let source = include_bytes! ("../test_vectors/closure.lua"); + let bytecode = &crate::loader::compile_bytecode_from_stdin (source.to_vec ()); + let chunk = crate::loader::parse_chunk_from_bytes (bytecode).unwrap (); - for (arg, expected) in [ - // Run the same test twice so clippy won't complain about a vec of 1 element - (vec! ["_exe_name"], vec! [23.0.into ()]), - (vec! ["_exe_name"], vec! [23.0.into ()]), - ] { - let expected: Vec = expected; - let mut vm = State::default (); - let upvalues = State::upvalues_from_args (arg.into_iter ().map (|s| s.to_string ())); - let actual = vm.execute_chunk (&file, &upvalues); - - assert_eq! (actual, expected); - } + assert_eq! (run_chunk (&["_exe_name"], &chunk), vec! [Value::from (23i64)]); } #[test] diff --git a/src/value.rs b/src/value.rs index 21b6a97..648dd5b 100644 --- a/src/value.rs +++ b/src/value.rs @@ -156,6 +156,8 @@ impl PartialEq for Value { } impl Value { + /// Coerces ints to float + pub fn as_float (&self) -> Option { match self { Self::Float (x) => Some (*x), @@ -165,6 +167,8 @@ impl Value { } } + /// Does not coerce floats + pub fn as_int (&self) -> Option { match self { Self::Integer (x) => Some (*x), diff --git a/test_vectors/varint.lua b/test_vectors/varint.lua new file mode 100644 index 0000000..dc14692 --- /dev/null +++ b/test_vectors/varint.lua @@ -0,0 +1,2 @@ +local s = "a very long string with more than 128 characters, which will require 2 bytes to encode in Lua's bytecode format. This allows me to debug my bytecode loader, which doesn't seem to be handling string sizes properly." +print (#s)