🐛 bug: fix long string loading and int adding
Long strings didn't work before because I hadn't implemented Lua's varint decoding. It's an easy translation from the original C. Int adding previously created a float because I always coerced to float. Also an easy fix.main
parent
5649f38698
commit
96c82c27c8
113
src/loader.rs
113
src/loader.rs
|
@ -135,15 +135,42 @@ struct Header {
|
|||
inst_count: u8,
|
||||
}
|
||||
|
||||
/// loadUnsigned in PUC Lua
|
||||
/// Decodes a varint format that has 7 bits per bytes and the 8th bit
|
||||
/// is set to 1 on the last byte.
|
||||
|
||||
fn load_unsigned <R: Read> (rdr: &mut R, limit: usize) -> usize {
|
||||
// Shrink the limit so we can tell when we pass it
|
||||
let limit = limit >> 7;
|
||||
|
||||
let mut x = 0;
|
||||
for _ in 0..32 {
|
||||
let b = parse_byte (rdr).unwrap ();
|
||||
if x >= limit {
|
||||
panic! ("integer overflow {x} >= {limit}");
|
||||
}
|
||||
x = (x << 7) | (b as usize & 0x7f);
|
||||
if (b & 0x80) != 0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
x
|
||||
}
|
||||
|
||||
fn load_size <R: Read> (rdr: &mut R) -> usize {
|
||||
load_unsigned (rdr, usize::MAX)
|
||||
}
|
||||
|
||||
// loadString in PUC Lua. Doesn't work with long strings yet.
|
||||
|
||||
fn parse_string <R: Read> (rdr: &mut R) -> Option <String> {
|
||||
let len = match parse_int (rdr)? {
|
||||
0 => 0,
|
||||
let len = match load_size (rdr) {
|
||||
0 => return Some (String::new ()),
|
||||
x => x - 1,
|
||||
};
|
||||
|
||||
let mut buf = vec! [0u8; len as usize];
|
||||
let mut buf = vec! [0u8; len];
|
||||
rdr.read_exact (&mut buf).ok ()?;
|
||||
Some (String::from_utf8 (buf).ok ()?)
|
||||
}
|
||||
|
@ -186,22 +213,22 @@ pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>)
|
|||
use crate::value::Value;
|
||||
|
||||
parse_string (rdr)?; // function name
|
||||
parse_int (rdr)?; // start line in source code
|
||||
parse_int (rdr)?; // last line in source code
|
||||
parse_byte (rdr)?; // num params
|
||||
parse_byte (rdr)?; // is_vararg
|
||||
parse_byte (rdr)?; // maxstacksize, might be same as num slots?
|
||||
parse_int (rdr).unwrap (); // start line in source code
|
||||
parse_int (rdr).unwrap (); // last line in source code
|
||||
parse_byte (rdr).unwrap (); // num params
|
||||
parse_byte (rdr).unwrap (); // is_vararg
|
||||
parse_byte (rdr).unwrap (); // maxstacksize, might be same as num slots?
|
||||
|
||||
let inst_count = parse_int (rdr)?;
|
||||
let inst_count = parse_int (rdr).unwrap ();
|
||||
let mut instructions = Vec::with_capacity (inst_count as usize);
|
||||
|
||||
for _ in 0..inst_count {
|
||||
let mut buf = [0u8; 4];
|
||||
rdr.read_exact (&mut buf).ok ()?;
|
||||
rdr.read_exact (&mut buf).ok ().unwrap ();
|
||||
instructions.push (parse_inst (buf).expect (&format! ("{buf:?}")));
|
||||
}
|
||||
|
||||
let constant_count = parse_int (rdr)?;
|
||||
let constant_count = parse_int (rdr).unwrap ();
|
||||
|
||||
let mut constants = Vec::with_capacity (constant_count as usize);
|
||||
|
||||
|
@ -210,22 +237,26 @@ pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>)
|
|||
let const_type = parse_byte (rdr)?;
|
||||
|
||||
let val = match const_type {
|
||||
3 => Value::from (parse_i64 (rdr)?),
|
||||
4 => parse_string (rdr)?.into (),
|
||||
19 => Value::from (parse_float (rdr)?),
|
||||
3 => parse_i64 (rdr).unwrap ().into (),
|
||||
4 => parse_string (rdr).unwrap ().into (),
|
||||
|
||||
// For LUA_TNUMBER, PUC Lua uses a macro that adds 16 to signify a float
|
||||
19 => parse_float (rdr).unwrap ().into (),
|
||||
// 0x10 + 4 = long string
|
||||
20 => parse_string (rdr).unwrap ().into (),
|
||||
x => panic! ("Constant {} has type {}", i, x),
|
||||
};
|
||||
|
||||
constants.push (val);
|
||||
}
|
||||
|
||||
let upvalue_count = parse_int (rdr)? as usize;
|
||||
let upvalue_count = parse_int (rdr).unwrap () as usize;
|
||||
|
||||
for _ in 0..upvalue_count {
|
||||
// Just ignore these
|
||||
|
||||
for _ in 0..3 {
|
||||
parse_byte (rdr)?;
|
||||
parse_byte (rdr).unwrap ();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -238,9 +269,9 @@ pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>)
|
|||
// Recursion
|
||||
|
||||
// Subfunctions. PUC calls them protos.
|
||||
let protos_count = parse_int (rdr)?;
|
||||
let protos_count = parse_int (rdr).unwrap ();
|
||||
for _ in 0..protos_count {
|
||||
parse_block (rdr, blocks)?;
|
||||
parse_block (rdr, blocks).unwrap ();
|
||||
}
|
||||
|
||||
// Skip over debug stuff
|
||||
|
@ -248,26 +279,26 @@ pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>)
|
|||
// I think this is delta line numbers, e.g. most instructions
|
||||
// have 0, but when you go to a new source line it's 1+.
|
||||
|
||||
let lineinfo_count = parse_int (rdr)?;
|
||||
let lineinfo_count = parse_int (rdr).unwrap ();
|
||||
for _ in 0..lineinfo_count {
|
||||
parse_byte (rdr)?;
|
||||
parse_byte (rdr).unwrap ();
|
||||
}
|
||||
|
||||
// Absolute line info, didn't see that in my test files
|
||||
|
||||
let abslineinfo_count = parse_int (rdr)?;
|
||||
let abslineinfo_count = parse_int (rdr).unwrap ();
|
||||
assert_eq! (abslineinfo_count, 0);
|
||||
|
||||
let local_count = parse_int (rdr)?;
|
||||
let local_count = parse_int (rdr).unwrap ();
|
||||
for _ in 0..local_count {
|
||||
parse_string(rdr)?;
|
||||
parse_int (rdr)?;
|
||||
parse_int (rdr)?;
|
||||
parse_string(rdr).unwrap ();
|
||||
parse_int (rdr).unwrap ();
|
||||
parse_int (rdr).unwrap ();
|
||||
}
|
||||
|
||||
let upvalue_count = parse_int (rdr)?;
|
||||
let upvalue_count = parse_int (rdr).unwrap ();
|
||||
for _ in 0..upvalue_count {
|
||||
parse_string (rdr)?;
|
||||
parse_string (rdr).unwrap ();
|
||||
}
|
||||
|
||||
Some (())
|
||||
|
@ -281,11 +312,11 @@ pub fn parse_chunk <R: Read> (rdr: &mut R) -> Option <Chunk> {
|
|||
let mut hdr = [0u8; 32];
|
||||
rdr.read_exact (&mut hdr).ok ()?;
|
||||
|
||||
assert_eq! (&hdr [0..8], &[0x1b, 0x4c, 0x75, 0x61, 0x54, 0x00, 0x19, 0x93], "This isn't a Lua 5.4 bytecode file");
|
||||
|
||||
let mut blocks = vec![];
|
||||
|
||||
while let Some (_) = parse_block (rdr, &mut blocks) {
|
||||
//
|
||||
}
|
||||
parse_block (rdr, &mut blocks).unwrap ();
|
||||
|
||||
Some (Chunk {
|
||||
blocks,
|
||||
|
@ -299,6 +330,28 @@ pub fn parse_chunk_from_bytes (b: &[u8]) -> Option <Chunk> {
|
|||
|
||||
#[cfg (test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn load_size () {
|
||||
let f = |input: &[u8]| {
|
||||
let mut cursor = std::io::Cursor::new (input);
|
||||
super::load_size (&mut cursor)
|
||||
};
|
||||
|
||||
assert_eq! (f (&[0x80]), 0);
|
||||
assert_eq! (f (&[0x81]), 1);
|
||||
assert_eq! (f (&[0x82]), 2);
|
||||
|
||||
assert_eq! (f (&[0xff]), 127);
|
||||
|
||||
assert_eq! (f (&[0x01, 0x80]), 128);
|
||||
assert_eq! (f (&[0x01, 0x81]), 129);
|
||||
assert_eq! (f (&[0x02, 0x80]), 256);
|
||||
|
||||
assert_eq! (f (&[0x7f, 0xfe]), 16382);
|
||||
assert_eq! (f (&[0x7f, 0xff]), 16383);
|
||||
assert_eq! (f (&[0x01, 0x00, 0x80]), 16384);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_inst () {
|
||||
use super::Inst;
|
||||
|
|
|
@ -30,7 +30,6 @@ fn main () {
|
|||
|
||||
let chunk = if let Some (script) = script {
|
||||
let bytecode = loader::compile_bytecode_from_file (&script);
|
||||
dbg! (&bytecode [0..48]);
|
||||
let mut rdr = std::io::Cursor::new (bytecode);
|
||||
loader::parse_chunk (&mut rdr).unwrap ()
|
||||
}
|
||||
|
|
14
src/state.rs
14
src/state.rs
|
@ -157,10 +157,18 @@ impl State {
|
|||
|
||||
match instruction {
|
||||
Instruction::Add (a, b, c) => {
|
||||
let v_b = self.reg (*b).as_float ().unwrap ();
|
||||
let v_c = self.reg (*c).as_float ().unwrap ();
|
||||
let v_b = self.reg (*b);
|
||||
let v_c = self.reg (*c);
|
||||
|
||||
*self.reg_mut (*a) = Value::from (v_b + v_c);
|
||||
let sum = if let (Some (v_b), Some (v_c)) = (v_b.as_int (), v_c.as_int ())
|
||||
{
|
||||
Value::from (v_b + v_c)
|
||||
}
|
||||
else {
|
||||
Value::from (v_b.as_float ().unwrap () + v_c.as_float ().unwrap ())
|
||||
};
|
||||
|
||||
*self.reg_mut (*a) = sum;
|
||||
},
|
||||
Instruction::Call (a, b, _c) => {
|
||||
let b = usize::from (*b);
|
||||
|
|
19
src/tests.rs
19
src/tests.rs
|
@ -128,22 +128,11 @@ fn bools () {
|
|||
|
||||
#[test]
|
||||
fn closure () {
|
||||
let bytecode = include_bytes! ("../test_vectors/closure.luac");
|
||||
let mut rdr = std::io::Cursor::new (bytecode);
|
||||
let file = crate::loader::parse_chunk (&mut rdr).unwrap ();
|
||||
let source = include_bytes! ("../test_vectors/closure.lua");
|
||||
let bytecode = &crate::loader::compile_bytecode_from_stdin (source.to_vec ());
|
||||
let chunk = crate::loader::parse_chunk_from_bytes (bytecode).unwrap ();
|
||||
|
||||
for (arg, expected) in [
|
||||
// Run the same test twice so clippy won't complain about a vec of 1 element
|
||||
(vec! ["_exe_name"], vec! [23.0.into ()]),
|
||||
(vec! ["_exe_name"], vec! [23.0.into ()]),
|
||||
] {
|
||||
let expected: Vec <Value> = expected;
|
||||
let mut vm = State::default ();
|
||||
let upvalues = State::upvalues_from_args (arg.into_iter ().map (|s| s.to_string ()));
|
||||
let actual = vm.execute_chunk (&file, &upvalues);
|
||||
|
||||
assert_eq! (actual, expected);
|
||||
}
|
||||
assert_eq! (run_chunk (&["_exe_name"], &chunk), vec! [Value::from (23i64)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -156,6 +156,8 @@ impl PartialEq <i64> for Value {
|
|||
}
|
||||
|
||||
impl Value {
|
||||
/// Coerces ints to float
|
||||
|
||||
pub fn as_float (&self) -> Option <f64> {
|
||||
match self {
|
||||
Self::Float (x) => Some (*x),
|
||||
|
@ -165,6 +167,8 @@ impl Value {
|
|||
}
|
||||
}
|
||||
|
||||
/// Does not coerce floats
|
||||
|
||||
pub fn as_int (&self) -> Option <i64> {
|
||||
match self {
|
||||
Self::Integer (x) => Some (*x),
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
local s = "a very long string with more than 128 characters, which will require 2 bytes to encode in Lua's bytecode format. This allows me to debug my bytecode loader, which doesn't seem to be handling string sizes properly."
|
||||
print (#s)
|
Loading…
Reference in New Issue