🐛 bug: fix long string loading and int adding
Long strings didn't work before because I hadn't implemented Lua's varint decoding. It's an easy translation from the original C. Int adding previously created a float because I always coerced to float. Also an easy fix.main
parent
5649f38698
commit
96c82c27c8
113
src/loader.rs
113
src/loader.rs
|
@ -135,15 +135,42 @@ struct Header {
|
||||||
inst_count: u8,
|
inst_count: u8,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// loadUnsigned in PUC Lua
|
||||||
|
/// Decodes a varint format that has 7 bits per bytes and the 8th bit
|
||||||
|
/// is set to 1 on the last byte.
|
||||||
|
|
||||||
|
fn load_unsigned <R: Read> (rdr: &mut R, limit: usize) -> usize {
|
||||||
|
// Shrink the limit so we can tell when we pass it
|
||||||
|
let limit = limit >> 7;
|
||||||
|
|
||||||
|
let mut x = 0;
|
||||||
|
for _ in 0..32 {
|
||||||
|
let b = parse_byte (rdr).unwrap ();
|
||||||
|
if x >= limit {
|
||||||
|
panic! ("integer overflow {x} >= {limit}");
|
||||||
|
}
|
||||||
|
x = (x << 7) | (b as usize & 0x7f);
|
||||||
|
if (b & 0x80) != 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
x
|
||||||
|
}
|
||||||
|
|
||||||
|
fn load_size <R: Read> (rdr: &mut R) -> usize {
|
||||||
|
load_unsigned (rdr, usize::MAX)
|
||||||
|
}
|
||||||
|
|
||||||
// loadString in PUC Lua. Doesn't work with long strings yet.
|
// loadString in PUC Lua. Doesn't work with long strings yet.
|
||||||
|
|
||||||
fn parse_string <R: Read> (rdr: &mut R) -> Option <String> {
|
fn parse_string <R: Read> (rdr: &mut R) -> Option <String> {
|
||||||
let len = match parse_int (rdr)? {
|
let len = match load_size (rdr) {
|
||||||
0 => 0,
|
0 => return Some (String::new ()),
|
||||||
x => x - 1,
|
x => x - 1,
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut buf = vec! [0u8; len as usize];
|
let mut buf = vec! [0u8; len];
|
||||||
rdr.read_exact (&mut buf).ok ()?;
|
rdr.read_exact (&mut buf).ok ()?;
|
||||||
Some (String::from_utf8 (buf).ok ()?)
|
Some (String::from_utf8 (buf).ok ()?)
|
||||||
}
|
}
|
||||||
|
@ -186,22 +213,22 @@ pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>)
|
||||||
use crate::value::Value;
|
use crate::value::Value;
|
||||||
|
|
||||||
parse_string (rdr)?; // function name
|
parse_string (rdr)?; // function name
|
||||||
parse_int (rdr)?; // start line in source code
|
parse_int (rdr).unwrap (); // start line in source code
|
||||||
parse_int (rdr)?; // last line in source code
|
parse_int (rdr).unwrap (); // last line in source code
|
||||||
parse_byte (rdr)?; // num params
|
parse_byte (rdr).unwrap (); // num params
|
||||||
parse_byte (rdr)?; // is_vararg
|
parse_byte (rdr).unwrap (); // is_vararg
|
||||||
parse_byte (rdr)?; // maxstacksize, might be same as num slots?
|
parse_byte (rdr).unwrap (); // maxstacksize, might be same as num slots?
|
||||||
|
|
||||||
let inst_count = parse_int (rdr)?;
|
let inst_count = parse_int (rdr).unwrap ();
|
||||||
let mut instructions = Vec::with_capacity (inst_count as usize);
|
let mut instructions = Vec::with_capacity (inst_count as usize);
|
||||||
|
|
||||||
for _ in 0..inst_count {
|
for _ in 0..inst_count {
|
||||||
let mut buf = [0u8; 4];
|
let mut buf = [0u8; 4];
|
||||||
rdr.read_exact (&mut buf).ok ()?;
|
rdr.read_exact (&mut buf).ok ().unwrap ();
|
||||||
instructions.push (parse_inst (buf).expect (&format! ("{buf:?}")));
|
instructions.push (parse_inst (buf).expect (&format! ("{buf:?}")));
|
||||||
}
|
}
|
||||||
|
|
||||||
let constant_count = parse_int (rdr)?;
|
let constant_count = parse_int (rdr).unwrap ();
|
||||||
|
|
||||||
let mut constants = Vec::with_capacity (constant_count as usize);
|
let mut constants = Vec::with_capacity (constant_count as usize);
|
||||||
|
|
||||||
|
@ -210,22 +237,26 @@ pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>)
|
||||||
let const_type = parse_byte (rdr)?;
|
let const_type = parse_byte (rdr)?;
|
||||||
|
|
||||||
let val = match const_type {
|
let val = match const_type {
|
||||||
3 => Value::from (parse_i64 (rdr)?),
|
3 => parse_i64 (rdr).unwrap ().into (),
|
||||||
4 => parse_string (rdr)?.into (),
|
4 => parse_string (rdr).unwrap ().into (),
|
||||||
19 => Value::from (parse_float (rdr)?),
|
|
||||||
|
// For LUA_TNUMBER, PUC Lua uses a macro that adds 16 to signify a float
|
||||||
|
19 => parse_float (rdr).unwrap ().into (),
|
||||||
|
// 0x10 + 4 = long string
|
||||||
|
20 => parse_string (rdr).unwrap ().into (),
|
||||||
x => panic! ("Constant {} has type {}", i, x),
|
x => panic! ("Constant {} has type {}", i, x),
|
||||||
};
|
};
|
||||||
|
|
||||||
constants.push (val);
|
constants.push (val);
|
||||||
}
|
}
|
||||||
|
|
||||||
let upvalue_count = parse_int (rdr)? as usize;
|
let upvalue_count = parse_int (rdr).unwrap () as usize;
|
||||||
|
|
||||||
for _ in 0..upvalue_count {
|
for _ in 0..upvalue_count {
|
||||||
// Just ignore these
|
// Just ignore these
|
||||||
|
|
||||||
for _ in 0..3 {
|
for _ in 0..3 {
|
||||||
parse_byte (rdr)?;
|
parse_byte (rdr).unwrap ();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -238,9 +269,9 @@ pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>)
|
||||||
// Recursion
|
// Recursion
|
||||||
|
|
||||||
// Subfunctions. PUC calls them protos.
|
// Subfunctions. PUC calls them protos.
|
||||||
let protos_count = parse_int (rdr)?;
|
let protos_count = parse_int (rdr).unwrap ();
|
||||||
for _ in 0..protos_count {
|
for _ in 0..protos_count {
|
||||||
parse_block (rdr, blocks)?;
|
parse_block (rdr, blocks).unwrap ();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip over debug stuff
|
// Skip over debug stuff
|
||||||
|
@ -248,26 +279,26 @@ pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>)
|
||||||
// I think this is delta line numbers, e.g. most instructions
|
// I think this is delta line numbers, e.g. most instructions
|
||||||
// have 0, but when you go to a new source line it's 1+.
|
// have 0, but when you go to a new source line it's 1+.
|
||||||
|
|
||||||
let lineinfo_count = parse_int (rdr)?;
|
let lineinfo_count = parse_int (rdr).unwrap ();
|
||||||
for _ in 0..lineinfo_count {
|
for _ in 0..lineinfo_count {
|
||||||
parse_byte (rdr)?;
|
parse_byte (rdr).unwrap ();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Absolute line info, didn't see that in my test files
|
// Absolute line info, didn't see that in my test files
|
||||||
|
|
||||||
let abslineinfo_count = parse_int (rdr)?;
|
let abslineinfo_count = parse_int (rdr).unwrap ();
|
||||||
assert_eq! (abslineinfo_count, 0);
|
assert_eq! (abslineinfo_count, 0);
|
||||||
|
|
||||||
let local_count = parse_int (rdr)?;
|
let local_count = parse_int (rdr).unwrap ();
|
||||||
for _ in 0..local_count {
|
for _ in 0..local_count {
|
||||||
parse_string(rdr)?;
|
parse_string(rdr).unwrap ();
|
||||||
parse_int (rdr)?;
|
parse_int (rdr).unwrap ();
|
||||||
parse_int (rdr)?;
|
parse_int (rdr).unwrap ();
|
||||||
}
|
}
|
||||||
|
|
||||||
let upvalue_count = parse_int (rdr)?;
|
let upvalue_count = parse_int (rdr).unwrap ();
|
||||||
for _ in 0..upvalue_count {
|
for _ in 0..upvalue_count {
|
||||||
parse_string (rdr)?;
|
parse_string (rdr).unwrap ();
|
||||||
}
|
}
|
||||||
|
|
||||||
Some (())
|
Some (())
|
||||||
|
@ -281,11 +312,11 @@ pub fn parse_chunk <R: Read> (rdr: &mut R) -> Option <Chunk> {
|
||||||
let mut hdr = [0u8; 32];
|
let mut hdr = [0u8; 32];
|
||||||
rdr.read_exact (&mut hdr).ok ()?;
|
rdr.read_exact (&mut hdr).ok ()?;
|
||||||
|
|
||||||
|
assert_eq! (&hdr [0..8], &[0x1b, 0x4c, 0x75, 0x61, 0x54, 0x00, 0x19, 0x93], "This isn't a Lua 5.4 bytecode file");
|
||||||
|
|
||||||
let mut blocks = vec![];
|
let mut blocks = vec![];
|
||||||
|
|
||||||
while let Some (_) = parse_block (rdr, &mut blocks) {
|
parse_block (rdr, &mut blocks).unwrap ();
|
||||||
//
|
|
||||||
}
|
|
||||||
|
|
||||||
Some (Chunk {
|
Some (Chunk {
|
||||||
blocks,
|
blocks,
|
||||||
|
@ -299,6 +330,28 @@ pub fn parse_chunk_from_bytes (b: &[u8]) -> Option <Chunk> {
|
||||||
|
|
||||||
#[cfg (test)]
|
#[cfg (test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
#[test]
|
||||||
|
fn load_size () {
|
||||||
|
let f = |input: &[u8]| {
|
||||||
|
let mut cursor = std::io::Cursor::new (input);
|
||||||
|
super::load_size (&mut cursor)
|
||||||
|
};
|
||||||
|
|
||||||
|
assert_eq! (f (&[0x80]), 0);
|
||||||
|
assert_eq! (f (&[0x81]), 1);
|
||||||
|
assert_eq! (f (&[0x82]), 2);
|
||||||
|
|
||||||
|
assert_eq! (f (&[0xff]), 127);
|
||||||
|
|
||||||
|
assert_eq! (f (&[0x01, 0x80]), 128);
|
||||||
|
assert_eq! (f (&[0x01, 0x81]), 129);
|
||||||
|
assert_eq! (f (&[0x02, 0x80]), 256);
|
||||||
|
|
||||||
|
assert_eq! (f (&[0x7f, 0xfe]), 16382);
|
||||||
|
assert_eq! (f (&[0x7f, 0xff]), 16383);
|
||||||
|
assert_eq! (f (&[0x01, 0x00, 0x80]), 16384);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_inst () {
|
fn parse_inst () {
|
||||||
use super::Inst;
|
use super::Inst;
|
||||||
|
|
|
@ -30,7 +30,6 @@ fn main () {
|
||||||
|
|
||||||
let chunk = if let Some (script) = script {
|
let chunk = if let Some (script) = script {
|
||||||
let bytecode = loader::compile_bytecode_from_file (&script);
|
let bytecode = loader::compile_bytecode_from_file (&script);
|
||||||
dbg! (&bytecode [0..48]);
|
|
||||||
let mut rdr = std::io::Cursor::new (bytecode);
|
let mut rdr = std::io::Cursor::new (bytecode);
|
||||||
loader::parse_chunk (&mut rdr).unwrap ()
|
loader::parse_chunk (&mut rdr).unwrap ()
|
||||||
}
|
}
|
||||||
|
|
14
src/state.rs
14
src/state.rs
|
@ -157,10 +157,18 @@ impl State {
|
||||||
|
|
||||||
match instruction {
|
match instruction {
|
||||||
Instruction::Add (a, b, c) => {
|
Instruction::Add (a, b, c) => {
|
||||||
let v_b = self.reg (*b).as_float ().unwrap ();
|
let v_b = self.reg (*b);
|
||||||
let v_c = self.reg (*c).as_float ().unwrap ();
|
let v_c = self.reg (*c);
|
||||||
|
|
||||||
*self.reg_mut (*a) = Value::from (v_b + v_c);
|
let sum = if let (Some (v_b), Some (v_c)) = (v_b.as_int (), v_c.as_int ())
|
||||||
|
{
|
||||||
|
Value::from (v_b + v_c)
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
Value::from (v_b.as_float ().unwrap () + v_c.as_float ().unwrap ())
|
||||||
|
};
|
||||||
|
|
||||||
|
*self.reg_mut (*a) = sum;
|
||||||
},
|
},
|
||||||
Instruction::Call (a, b, _c) => {
|
Instruction::Call (a, b, _c) => {
|
||||||
let b = usize::from (*b);
|
let b = usize::from (*b);
|
||||||
|
|
19
src/tests.rs
19
src/tests.rs
|
@ -128,22 +128,11 @@ fn bools () {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn closure () {
|
fn closure () {
|
||||||
let bytecode = include_bytes! ("../test_vectors/closure.luac");
|
let source = include_bytes! ("../test_vectors/closure.lua");
|
||||||
let mut rdr = std::io::Cursor::new (bytecode);
|
let bytecode = &crate::loader::compile_bytecode_from_stdin (source.to_vec ());
|
||||||
let file = crate::loader::parse_chunk (&mut rdr).unwrap ();
|
let chunk = crate::loader::parse_chunk_from_bytes (bytecode).unwrap ();
|
||||||
|
|
||||||
for (arg, expected) in [
|
assert_eq! (run_chunk (&["_exe_name"], &chunk), vec! [Value::from (23i64)]);
|
||||||
// Run the same test twice so clippy won't complain about a vec of 1 element
|
|
||||||
(vec! ["_exe_name"], vec! [23.0.into ()]),
|
|
||||||
(vec! ["_exe_name"], vec! [23.0.into ()]),
|
|
||||||
] {
|
|
||||||
let expected: Vec <Value> = expected;
|
|
||||||
let mut vm = State::default ();
|
|
||||||
let upvalues = State::upvalues_from_args (arg.into_iter ().map (|s| s.to_string ()));
|
|
||||||
let actual = vm.execute_chunk (&file, &upvalues);
|
|
||||||
|
|
||||||
assert_eq! (actual, expected);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
|
@ -156,6 +156,8 @@ impl PartialEq <i64> for Value {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Value {
|
impl Value {
|
||||||
|
/// Coerces ints to float
|
||||||
|
|
||||||
pub fn as_float (&self) -> Option <f64> {
|
pub fn as_float (&self) -> Option <f64> {
|
||||||
match self {
|
match self {
|
||||||
Self::Float (x) => Some (*x),
|
Self::Float (x) => Some (*x),
|
||||||
|
@ -165,6 +167,8 @@ impl Value {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Does not coerce floats
|
||||||
|
|
||||||
pub fn as_int (&self) -> Option <i64> {
|
pub fn as_int (&self) -> Option <i64> {
|
||||||
match self {
|
match self {
|
||||||
Self::Integer (x) => Some (*x),
|
Self::Integer (x) => Some (*x),
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
local s = "a very long string with more than 128 characters, which will require 2 bytes to encode in Lua's bytecode format. This allows me to debug my bytecode loader, which doesn't seem to be handling string sizes properly."
|
||||||
|
print (#s)
|
Loading…
Reference in New Issue