Compare commits

..

3 Commits

Author SHA1 Message Date
_ b639d02027 remove unused file 2023-09-28 00:28:33 -05:00
_ 96c82c27c8 🐛 bug: fix long string loading and int adding
Long strings didn't work before because I hadn't implemented Lua's varint
decoding. It's an easy translation from the original C.

Int adding previously created a float because I always coerced to float.
Also an easy fix.
2023-09-28 00:27:05 -05:00
_ 5649f38698 add OP_LEN and allow telling luac a file name
This make debugging easier, since the bytecodes will match between
a terminal and lunar_wave
2023-09-28 00:11:25 -05:00
8 changed files with 152 additions and 55 deletions

View File

@ -31,6 +31,8 @@ pub enum Instruction {
// Jump // Jump
Jmp (i32), Jmp (i32),
Len (u8, u8),
// Load F (Float?) // Load F (Float?)
LoadF (u8, i32), LoadF (u8, i32),

View File

@ -8,12 +8,35 @@ use crate::{
} }
}; };
pub (crate) fn compile_bytecode_from_file (path: &str) -> Vec <u8> {
use std::{
process::{
Command,
Stdio,
},
};
let child = Command::new ("luac5.4")
.arg ("-o") // Output to...
.arg ("-") // Standard output
.arg (path)
.stdout (Stdio::piped ())
.spawn ()
.expect ("failed to execute `luac5.4`. Is Lua installed?");
let output = child
.wait_with_output ()
.expect ("failed to wait on child");
output.stdout.as_slice ().to_vec ()
}
/// Invoke `luac` as a subprocess /// Invoke `luac` as a subprocess
/// Luckily luac is single-pass, so we can just pipe in and out /// Luckily luac is single-pass, so we can just pipe in and out
/// ///
/// `source` is a Vec because we move it to a worker thread /// `source` is a Vec because we move it to a worker thread
pub (crate) fn compile_bytecode (source: Vec <u8>) -> Vec <u8> { pub (crate) fn compile_bytecode_from_stdin (source: Vec <u8>) -> Vec <u8> {
use std::{ use std::{
io::Write, io::Write,
process::{ process::{
@ -87,6 +110,7 @@ pub fn parse_inst (buf: [u8; 4]) -> Option <Inst>
0x2e => Inst::MmBin (a, b, c), 0x2e => Inst::MmBin (a, b, c),
0x30 => Inst::MmBinK (a, b, c, k), 0x30 => Inst::MmBinK (a, b, c, k),
0x33 => Inst::Not (a, b), 0x33 => Inst::Not (a, b),
0x34 => Inst::Len (a, b),
0x3c => Inst::EqK (a, b, k), 0x3c => Inst::EqK (a, b, k),
0x3d => Inst::EqI (a, i_sb (buf)?, k), 0x3d => Inst::EqI (a, i_sb (buf)?, k),
0x38 => Inst::Jmp (s_j), 0x38 => Inst::Jmp (s_j),
@ -111,15 +135,42 @@ struct Header {
inst_count: u8, inst_count: u8,
} }
/// loadUnsigned in PUC Lua
/// Decodes a varint format that has 7 bits per bytes and the 8th bit
/// is set to 1 on the last byte.
fn load_unsigned <R: Read> (rdr: &mut R, limit: usize) -> usize {
// Shrink the limit so we can tell when we pass it
let limit = limit >> 7;
let mut x = 0;
for _ in 0..32 {
let b = parse_byte (rdr).unwrap ();
if x >= limit {
panic! ("integer overflow {x} >= {limit}");
}
x = (x << 7) | (b as usize & 0x7f);
if (b & 0x80) != 0 {
break;
}
}
x
}
fn load_size <R: Read> (rdr: &mut R) -> usize {
load_unsigned (rdr, usize::MAX)
}
// loadString in PUC Lua. Doesn't work with long strings yet. // loadString in PUC Lua. Doesn't work with long strings yet.
fn parse_string <R: Read> (rdr: &mut R) -> Option <String> { fn parse_string <R: Read> (rdr: &mut R) -> Option <String> {
let len = match parse_int (rdr)? { let len = match load_size (rdr) {
0 => 0, 0 => return Some (String::new ()),
x => x - 1, x => x - 1,
}; };
let mut buf = vec! [0u8; len as usize]; let mut buf = vec! [0u8; len];
rdr.read_exact (&mut buf).ok ()?; rdr.read_exact (&mut buf).ok ()?;
Some (String::from_utf8 (buf).ok ()?) Some (String::from_utf8 (buf).ok ()?)
} }
@ -162,22 +213,22 @@ pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>)
use crate::value::Value; use crate::value::Value;
parse_string (rdr)?; // function name parse_string (rdr)?; // function name
parse_int (rdr)?; // start line in source code parse_int (rdr).unwrap (); // start line in source code
parse_int (rdr)?; // last line in source code parse_int (rdr).unwrap (); // last line in source code
parse_byte (rdr)?; // num params parse_byte (rdr).unwrap (); // num params
parse_byte (rdr)?; // is_vararg parse_byte (rdr).unwrap (); // is_vararg
parse_byte (rdr)?; // maxstacksize, might be same as num slots? parse_byte (rdr).unwrap (); // maxstacksize, might be same as num slots?
let inst_count = parse_int (rdr)?; let inst_count = parse_int (rdr).unwrap ();
let mut instructions = Vec::with_capacity (inst_count as usize); let mut instructions = Vec::with_capacity (inst_count as usize);
for _ in 0..inst_count { for _ in 0..inst_count {
let mut buf = [0u8; 4]; let mut buf = [0u8; 4];
rdr.read_exact (&mut buf).ok ()?; rdr.read_exact (&mut buf).ok ().unwrap ();
instructions.push (parse_inst (buf).expect (&format! ("{buf:?}"))); instructions.push (parse_inst (buf).expect (&format! ("{buf:?}")));
} }
let constant_count = parse_int (rdr)?; let constant_count = parse_int (rdr).unwrap ();
let mut constants = Vec::with_capacity (constant_count as usize); let mut constants = Vec::with_capacity (constant_count as usize);
@ -186,22 +237,26 @@ pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>)
let const_type = parse_byte (rdr)?; let const_type = parse_byte (rdr)?;
let val = match const_type { let val = match const_type {
3 => Value::from (parse_i64 (rdr)?), 3 => parse_i64 (rdr).unwrap ().into (),
4 => parse_string (rdr)?.into (), 4 => parse_string (rdr).unwrap ().into (),
19 => Value::from (parse_float (rdr)?),
// For LUA_TNUMBER, PUC Lua uses a macro that adds 16 to signify a float
19 => parse_float (rdr).unwrap ().into (),
// 0x10 + 4 = long string
20 => parse_string (rdr).unwrap ().into (),
x => panic! ("Constant {} has type {}", i, x), x => panic! ("Constant {} has type {}", i, x),
}; };
constants.push (val); constants.push (val);
} }
let upvalue_count = parse_int (rdr)? as usize; let upvalue_count = parse_int (rdr).unwrap () as usize;
for _ in 0..upvalue_count { for _ in 0..upvalue_count {
// Just ignore these // Just ignore these
for _ in 0..3 { for _ in 0..3 {
parse_byte (rdr)?; parse_byte (rdr).unwrap ();
} }
} }
@ -214,9 +269,9 @@ pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>)
// Recursion // Recursion
// Subfunctions. PUC calls them protos. // Subfunctions. PUC calls them protos.
let protos_count = parse_int (rdr)?; let protos_count = parse_int (rdr).unwrap ();
for _ in 0..protos_count { for _ in 0..protos_count {
parse_block (rdr, blocks)?; parse_block (rdr, blocks).unwrap ();
} }
// Skip over debug stuff // Skip over debug stuff
@ -224,26 +279,26 @@ pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>)
// I think this is delta line numbers, e.g. most instructions // I think this is delta line numbers, e.g. most instructions
// have 0, but when you go to a new source line it's 1+. // have 0, but when you go to a new source line it's 1+.
let lineinfo_count = parse_int (rdr)?; let lineinfo_count = parse_int (rdr).unwrap ();
for _ in 0..lineinfo_count { for _ in 0..lineinfo_count {
parse_byte (rdr)?; parse_byte (rdr).unwrap ();
} }
// Absolute line info, didn't see that in my test files // Absolute line info, didn't see that in my test files
let abslineinfo_count = parse_int (rdr)?; let abslineinfo_count = parse_int (rdr).unwrap ();
assert_eq! (abslineinfo_count, 0); assert_eq! (abslineinfo_count, 0);
let local_count = parse_int (rdr)?; let local_count = parse_int (rdr).unwrap ();
for _ in 0..local_count { for _ in 0..local_count {
parse_string(rdr)?; parse_string(rdr).unwrap ();
parse_int (rdr)?; parse_int (rdr).unwrap ();
parse_int (rdr)?; parse_int (rdr).unwrap ();
} }
let upvalue_count = parse_int (rdr)?; let upvalue_count = parse_int (rdr).unwrap ();
for _ in 0..upvalue_count { for _ in 0..upvalue_count {
parse_string (rdr)?; parse_string (rdr).unwrap ();
} }
Some (()) Some (())
@ -257,11 +312,11 @@ pub fn parse_chunk <R: Read> (rdr: &mut R) -> Option <Chunk> {
let mut hdr = [0u8; 32]; let mut hdr = [0u8; 32];
rdr.read_exact (&mut hdr).ok ()?; rdr.read_exact (&mut hdr).ok ()?;
assert_eq! (&hdr [0..8], &[0x1b, 0x4c, 0x75, 0x61, 0x54, 0x00, 0x19, 0x93], "This isn't a Lua 5.4 bytecode file");
let mut blocks = vec![]; let mut blocks = vec![];
while let Some (_) = parse_block (rdr, &mut blocks) { parse_block (rdr, &mut blocks).unwrap ();
//
}
Some (Chunk { Some (Chunk {
blocks, blocks,
@ -275,6 +330,28 @@ pub fn parse_chunk_from_bytes (b: &[u8]) -> Option <Chunk> {
#[cfg (test)] #[cfg (test)]
mod tests { mod tests {
#[test]
fn load_size () {
let f = |input: &[u8]| {
let mut cursor = std::io::Cursor::new (input);
super::load_size (&mut cursor)
};
assert_eq! (f (&[0x80]), 0);
assert_eq! (f (&[0x81]), 1);
assert_eq! (f (&[0x82]), 2);
assert_eq! (f (&[0xff]), 127);
assert_eq! (f (&[0x01, 0x80]), 128);
assert_eq! (f (&[0x01, 0x81]), 129);
assert_eq! (f (&[0x02, 0x80]), 256);
assert_eq! (f (&[0x7f, 0xfe]), 16382);
assert_eq! (f (&[0x7f, 0xff]), 16383);
assert_eq! (f (&[0x01, 0x00, 0x80]), 16384);
}
#[test] #[test]
fn parse_inst () { fn parse_inst () {
use super::Inst; use super::Inst;

View File

@ -11,6 +11,7 @@ mod tests;
fn main () { fn main () {
use state::State; use state::State;
let mut list_bytecode = false;
let mut pipe_bytecode = false; let mut pipe_bytecode = false;
let mut script = None; let mut script = None;
@ -19,6 +20,7 @@ fn main () {
while let Some (arg) = args.next () { while let Some (arg) = args.next () {
match arg.as_str () { match arg.as_str () {
"--list-bytecode" => list_bytecode = true,
"--pipe-bytecode" => pipe_bytecode = true, "--pipe-bytecode" => pipe_bytecode = true,
"--script" => script = Some (args.next ().unwrap ()), "--script" => script = Some (args.next ().unwrap ()),
"--" => break, "--" => break,
@ -26,9 +28,8 @@ fn main () {
} }
} }
let lua_file = if let Some (script) = script { let chunk = if let Some (script) = script {
let source = std::fs::read (script).expect ("couldn't load Lua source code"); let bytecode = loader::compile_bytecode_from_file (&script);
let bytecode = loader::compile_bytecode(source);
let mut rdr = std::io::Cursor::new (bytecode); let mut rdr = std::io::Cursor::new (bytecode);
loader::parse_chunk (&mut rdr).unwrap () loader::parse_chunk (&mut rdr).unwrap ()
} }
@ -40,6 +41,10 @@ fn main () {
unimplemented!(); unimplemented!();
}; };
if list_bytecode {
dbg! (&chunk);
}
let mut vm = State::default (); let mut vm = State::default ();
if std::env::var("LUA_DEBUG").is_ok() { if std::env::var("LUA_DEBUG").is_ok() {
vm.debug_print = true; vm.debug_print = true;
@ -52,5 +57,5 @@ fn main () {
program_counter: 0, program_counter: 0,
}); });
vm.execute_chunk (&lua_file, &upvalues); vm.execute_chunk (&chunk, &upvalues);
} }

View File

@ -8,12 +8,14 @@ use crate::{
}, },
}; };
#[derive (Debug)]
pub struct Block { pub struct Block {
pub instructions: Vec <Instruction>, pub instructions: Vec <Instruction>,
pub constants: Vec <Value>, pub constants: Vec <Value>,
pub upvalue_count: usize, pub upvalue_count: usize,
} }
#[derive (Debug)]
pub struct Chunk { pub struct Chunk {
pub blocks: Vec <Block>, pub blocks: Vec <Block>,
} }
@ -155,10 +157,18 @@ impl State {
match instruction { match instruction {
Instruction::Add (a, b, c) => { Instruction::Add (a, b, c) => {
let v_b = self.reg (*b).as_float ().unwrap (); let v_b = self.reg (*b);
let v_c = self.reg (*c).as_float ().unwrap (); let v_c = self.reg (*c);
*self.reg_mut (*a) = Value::from (v_b + v_c); let sum = if let (Some (v_b), Some (v_c)) = (v_b.as_int (), v_c.as_int ())
{
Value::from (v_b + v_c)
}
else {
Value::from (v_b.as_float ().unwrap () + v_c.as_float ().unwrap ())
};
*self.reg_mut (*a) = sum;
}, },
Instruction::Call (a, b, _c) => { Instruction::Call (a, b, _c) => {
let b = usize::from (*b); let b = usize::from (*b);
@ -338,6 +348,14 @@ impl State {
*self.reg_mut (*a) = upvalues [b].clone (); *self.reg_mut (*a) = upvalues [b].clone ();
}, },
Instruction::Jmp (s_j) => next_pc += s_j, Instruction::Jmp (s_j) => next_pc += s_j,
Instruction::Len (a, b) => {
let len = match self.reg (*b) {
Value::String (s) => s.len (),
_ => unimplemented!(),
};
*self.reg_mut (*a) = len.into ();
}
Instruction::LoadF (a, sbx) => { Instruction::LoadF (a, sbx) => {
*self.reg_mut (*a) = Value::Float (*sbx as f64); *self.reg_mut (*a) = Value::Float (*sbx as f64);
} }

View File

@ -42,7 +42,7 @@ fn run_bytecode (args: &[&str], bc: &[u8]) -> Vec <Value> {
/// and returns the output /// and returns the output
fn run_source (args: &[&str], s: &str) -> Vec <Value> { fn run_source (args: &[&str], s: &str) -> Vec <Value> {
let bc = loader::compile_bytecode (s.as_bytes ().to_vec ()); let bc = loader::compile_bytecode_from_stdin (s.as_bytes ().to_vec ());
run_bytecode (args, &bc) run_bytecode (args, &bc)
} }
@ -128,22 +128,11 @@ fn bools () {
#[test] #[test]
fn closure () { fn closure () {
let bytecode = include_bytes! ("../test_vectors/closure.luac"); let source = include_bytes! ("../test_vectors/closure.lua");
let mut rdr = std::io::Cursor::new (bytecode); let bytecode = &crate::loader::compile_bytecode_from_stdin (source.to_vec ());
let file = crate::loader::parse_chunk (&mut rdr).unwrap (); let chunk = crate::loader::parse_chunk_from_bytes (bytecode).unwrap ();
for (arg, expected) in [ assert_eq! (run_chunk (&["_exe_name"], &chunk), vec! [Value::from (23i64)]);
// Run the same test twice so clippy won't complain about a vec of 1 element
(vec! ["_exe_name"], vec! [23.0.into ()]),
(vec! ["_exe_name"], vec! [23.0.into ()]),
] {
let expected: Vec <Value> = expected;
let mut vm = State::default ();
let upvalues = State::upvalues_from_args (arg.into_iter ().map (|s| s.to_string ()));
let actual = vm.execute_chunk (&file, &upvalues);
assert_eq! (actual, expected);
}
} }
#[test] #[test]
@ -289,7 +278,7 @@ fn is_93 () {
end end
"#; "#;
let bc = loader::compile_bytecode (src.as_bytes ().to_vec ()); let bc = loader::compile_bytecode_from_stdin (src.as_bytes ().to_vec ());
let chunk = loader::parse_chunk_from_bytes (&bc).unwrap (); let chunk = loader::parse_chunk_from_bytes (&bc).unwrap ();
assert_eq! (chunk.blocks [0].instructions [3], Inst::EqK (0, 1, false)); assert_eq! (chunk.blocks [0].instructions [3], Inst::EqK (0, 1, false));

View File

@ -156,6 +156,8 @@ impl PartialEq <i64> for Value {
} }
impl Value { impl Value {
/// Coerces ints to float
pub fn as_float (&self) -> Option <f64> { pub fn as_float (&self) -> Option <f64> {
match self { match self {
Self::Float (x) => Some (*x), Self::Float (x) => Some (*x),
@ -165,6 +167,8 @@ impl Value {
} }
} }
/// Does not coerce floats
pub fn as_int (&self) -> Option <i64> { pub fn as_int (&self) -> Option <i64> {
match self { match self {
Self::Integer (x) => Some (*x), Self::Integer (x) => Some (*x),

Binary file not shown.

2
test_vectors/varint.lua Normal file
View File

@ -0,0 +1,2 @@
local s = "a very long string with more than 128 characters, which will require 2 bytes to encode in Lua's bytecode format. This allows me to debug my bytecode loader, which doesn't seem to be handling string sizes properly."
print (#s)