🐛 bug: fix long string loading and int adding

Long strings didn't work before because I hadn't implemented Lua's varint
decoding. It's an easy translation from the original C.

Int adding previously created a float because I always coerced to float.
Also an easy fix.
main
_ 2023-09-28 00:27:05 -05:00
parent 5649f38698
commit 96c82c27c8
6 changed files with 104 additions and 49 deletions

View File

@ -135,15 +135,42 @@ struct Header {
inst_count: u8,
}
/// loadUnsigned in PUC Lua
/// Decodes a varint format that has 7 bits per bytes and the 8th bit
/// is set to 1 on the last byte.
fn load_unsigned <R: Read> (rdr: &mut R, limit: usize) -> usize {
// Shrink the limit so we can tell when we pass it
let limit = limit >> 7;
let mut x = 0;
for _ in 0..32 {
let b = parse_byte (rdr).unwrap ();
if x >= limit {
panic! ("integer overflow {x} >= {limit}");
}
x = (x << 7) | (b as usize & 0x7f);
if (b & 0x80) != 0 {
break;
}
}
x
}
fn load_size <R: Read> (rdr: &mut R) -> usize {
load_unsigned (rdr, usize::MAX)
}
// loadString in PUC Lua. Doesn't work with long strings yet.
fn parse_string <R: Read> (rdr: &mut R) -> Option <String> {
let len = match parse_int (rdr)? {
0 => 0,
let len = match load_size (rdr) {
0 => return Some (String::new ()),
x => x - 1,
};
let mut buf = vec! [0u8; len as usize];
let mut buf = vec! [0u8; len];
rdr.read_exact (&mut buf).ok ()?;
Some (String::from_utf8 (buf).ok ()?)
}
@ -186,22 +213,22 @@ pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>)
use crate::value::Value;
parse_string (rdr)?; // function name
parse_int (rdr)?; // start line in source code
parse_int (rdr)?; // last line in source code
parse_byte (rdr)?; // num params
parse_byte (rdr)?; // is_vararg
parse_byte (rdr)?; // maxstacksize, might be same as num slots?
parse_int (rdr).unwrap (); // start line in source code
parse_int (rdr).unwrap (); // last line in source code
parse_byte (rdr).unwrap (); // num params
parse_byte (rdr).unwrap (); // is_vararg
parse_byte (rdr).unwrap (); // maxstacksize, might be same as num slots?
let inst_count = parse_int (rdr)?;
let inst_count = parse_int (rdr).unwrap ();
let mut instructions = Vec::with_capacity (inst_count as usize);
for _ in 0..inst_count {
let mut buf = [0u8; 4];
rdr.read_exact (&mut buf).ok ()?;
rdr.read_exact (&mut buf).ok ().unwrap ();
instructions.push (parse_inst (buf).expect (&format! ("{buf:?}")));
}
let constant_count = parse_int (rdr)?;
let constant_count = parse_int (rdr).unwrap ();
let mut constants = Vec::with_capacity (constant_count as usize);
@ -210,22 +237,26 @@ pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>)
let const_type = parse_byte (rdr)?;
let val = match const_type {
3 => Value::from (parse_i64 (rdr)?),
4 => parse_string (rdr)?.into (),
19 => Value::from (parse_float (rdr)?),
3 => parse_i64 (rdr).unwrap ().into (),
4 => parse_string (rdr).unwrap ().into (),
// For LUA_TNUMBER, PUC Lua uses a macro that adds 16 to signify a float
19 => parse_float (rdr).unwrap ().into (),
// 0x10 + 4 = long string
20 => parse_string (rdr).unwrap ().into (),
x => panic! ("Constant {} has type {}", i, x),
};
constants.push (val);
}
let upvalue_count = parse_int (rdr)? as usize;
let upvalue_count = parse_int (rdr).unwrap () as usize;
for _ in 0..upvalue_count {
// Just ignore these
for _ in 0..3 {
parse_byte (rdr)?;
parse_byte (rdr).unwrap ();
}
}
@ -238,9 +269,9 @@ pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>)
// Recursion
// Subfunctions. PUC calls them protos.
let protos_count = parse_int (rdr)?;
let protos_count = parse_int (rdr).unwrap ();
for _ in 0..protos_count {
parse_block (rdr, blocks)?;
parse_block (rdr, blocks).unwrap ();
}
// Skip over debug stuff
@ -248,26 +279,26 @@ pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>)
// I think this is delta line numbers, e.g. most instructions
// have 0, but when you go to a new source line it's 1+.
let lineinfo_count = parse_int (rdr)?;
let lineinfo_count = parse_int (rdr).unwrap ();
for _ in 0..lineinfo_count {
parse_byte (rdr)?;
parse_byte (rdr).unwrap ();
}
// Absolute line info, didn't see that in my test files
let abslineinfo_count = parse_int (rdr)?;
let abslineinfo_count = parse_int (rdr).unwrap ();
assert_eq! (abslineinfo_count, 0);
let local_count = parse_int (rdr)?;
let local_count = parse_int (rdr).unwrap ();
for _ in 0..local_count {
parse_string(rdr)?;
parse_int (rdr)?;
parse_int (rdr)?;
parse_string(rdr).unwrap ();
parse_int (rdr).unwrap ();
parse_int (rdr).unwrap ();
}
let upvalue_count = parse_int (rdr)?;
let upvalue_count = parse_int (rdr).unwrap ();
for _ in 0..upvalue_count {
parse_string (rdr)?;
parse_string (rdr).unwrap ();
}
Some (())
@ -281,11 +312,11 @@ pub fn parse_chunk <R: Read> (rdr: &mut R) -> Option <Chunk> {
let mut hdr = [0u8; 32];
rdr.read_exact (&mut hdr).ok ()?;
assert_eq! (&hdr [0..8], &[0x1b, 0x4c, 0x75, 0x61, 0x54, 0x00, 0x19, 0x93], "This isn't a Lua 5.4 bytecode file");
let mut blocks = vec![];
while let Some (_) = parse_block (rdr, &mut blocks) {
//
}
parse_block (rdr, &mut blocks).unwrap ();
Some (Chunk {
blocks,
@ -299,6 +330,28 @@ pub fn parse_chunk_from_bytes (b: &[u8]) -> Option <Chunk> {
#[cfg (test)]
mod tests {
#[test]
fn load_size () {
let f = |input: &[u8]| {
let mut cursor = std::io::Cursor::new (input);
super::load_size (&mut cursor)
};
assert_eq! (f (&[0x80]), 0);
assert_eq! (f (&[0x81]), 1);
assert_eq! (f (&[0x82]), 2);
assert_eq! (f (&[0xff]), 127);
assert_eq! (f (&[0x01, 0x80]), 128);
assert_eq! (f (&[0x01, 0x81]), 129);
assert_eq! (f (&[0x02, 0x80]), 256);
assert_eq! (f (&[0x7f, 0xfe]), 16382);
assert_eq! (f (&[0x7f, 0xff]), 16383);
assert_eq! (f (&[0x01, 0x00, 0x80]), 16384);
}
#[test]
fn parse_inst () {
use super::Inst;

View File

@ -30,7 +30,6 @@ fn main () {
let chunk = if let Some (script) = script {
let bytecode = loader::compile_bytecode_from_file (&script);
dbg! (&bytecode [0..48]);
let mut rdr = std::io::Cursor::new (bytecode);
loader::parse_chunk (&mut rdr).unwrap ()
}

View File

@ -157,10 +157,18 @@ impl State {
match instruction {
Instruction::Add (a, b, c) => {
let v_b = self.reg (*b).as_float ().unwrap ();
let v_c = self.reg (*c).as_float ().unwrap ();
let v_b = self.reg (*b);
let v_c = self.reg (*c);
*self.reg_mut (*a) = Value::from (v_b + v_c);
let sum = if let (Some (v_b), Some (v_c)) = (v_b.as_int (), v_c.as_int ())
{
Value::from (v_b + v_c)
}
else {
Value::from (v_b.as_float ().unwrap () + v_c.as_float ().unwrap ())
};
*self.reg_mut (*a) = sum;
},
Instruction::Call (a, b, _c) => {
let b = usize::from (*b);

View File

@ -128,22 +128,11 @@ fn bools () {
#[test]
fn closure () {
let bytecode = include_bytes! ("../test_vectors/closure.luac");
let mut rdr = std::io::Cursor::new (bytecode);
let file = crate::loader::parse_chunk (&mut rdr).unwrap ();
let source = include_bytes! ("../test_vectors/closure.lua");
let bytecode = &crate::loader::compile_bytecode_from_stdin (source.to_vec ());
let chunk = crate::loader::parse_chunk_from_bytes (bytecode).unwrap ();
for (arg, expected) in [
// Run the same test twice so clippy won't complain about a vec of 1 element
(vec! ["_exe_name"], vec! [23.0.into ()]),
(vec! ["_exe_name"], vec! [23.0.into ()]),
] {
let expected: Vec <Value> = expected;
let mut vm = State::default ();
let upvalues = State::upvalues_from_args (arg.into_iter ().map (|s| s.to_string ()));
let actual = vm.execute_chunk (&file, &upvalues);
assert_eq! (actual, expected);
}
assert_eq! (run_chunk (&["_exe_name"], &chunk), vec! [Value::from (23i64)]);
}
#[test]

View File

@ -156,6 +156,8 @@ impl PartialEq <i64> for Value {
}
impl Value {
/// Coerces ints to float
pub fn as_float (&self) -> Option <f64> {
match self {
Self::Float (x) => Some (*x),
@ -165,6 +167,8 @@ impl Value {
}
}
/// Does not coerce floats
pub fn as_int (&self) -> Option <i64> {
match self {
Self::Integer (x) => Some (*x),

2
test_vectors/varint.lua Normal file
View File

@ -0,0 +1,2 @@
local s = "a very long string with more than 128 characters, which will require 2 bytes to encode in Lua's bytecode format. This allows me to debug my bytecode loader, which doesn't seem to be handling string sizes properly."
print (#s)