Compare commits
3 Commits
565fd19e66
...
b639d02027
Author | SHA1 | Date |
---|---|---|
_ | b639d02027 | |
_ | 96c82c27c8 | |
_ | 5649f38698 |
|
@ -31,6 +31,8 @@ pub enum Instruction {
|
|||
// Jump
|
||||
Jmp (i32),
|
||||
|
||||
Len (u8, u8),
|
||||
|
||||
// Load F (Float?)
|
||||
LoadF (u8, i32),
|
||||
|
||||
|
|
139
src/loader.rs
139
src/loader.rs
|
@ -8,12 +8,35 @@ use crate::{
|
|||
}
|
||||
};
|
||||
|
||||
pub (crate) fn compile_bytecode_from_file (path: &str) -> Vec <u8> {
|
||||
use std::{
|
||||
process::{
|
||||
Command,
|
||||
Stdio,
|
||||
},
|
||||
};
|
||||
|
||||
let child = Command::new ("luac5.4")
|
||||
.arg ("-o") // Output to...
|
||||
.arg ("-") // Standard output
|
||||
.arg (path)
|
||||
.stdout (Stdio::piped ())
|
||||
.spawn ()
|
||||
.expect ("failed to execute `luac5.4`. Is Lua installed?");
|
||||
|
||||
let output = child
|
||||
.wait_with_output ()
|
||||
.expect ("failed to wait on child");
|
||||
|
||||
output.stdout.as_slice ().to_vec ()
|
||||
}
|
||||
|
||||
/// Invoke `luac` as a subprocess
|
||||
/// Luckily luac is single-pass, so we can just pipe in and out
|
||||
///
|
||||
/// `source` is a Vec because we move it to a worker thread
|
||||
|
||||
pub (crate) fn compile_bytecode (source: Vec <u8>) -> Vec <u8> {
|
||||
pub (crate) fn compile_bytecode_from_stdin (source: Vec <u8>) -> Vec <u8> {
|
||||
use std::{
|
||||
io::Write,
|
||||
process::{
|
||||
|
@ -87,6 +110,7 @@ pub fn parse_inst (buf: [u8; 4]) -> Option <Inst>
|
|||
0x2e => Inst::MmBin (a, b, c),
|
||||
0x30 => Inst::MmBinK (a, b, c, k),
|
||||
0x33 => Inst::Not (a, b),
|
||||
0x34 => Inst::Len (a, b),
|
||||
0x3c => Inst::EqK (a, b, k),
|
||||
0x3d => Inst::EqI (a, i_sb (buf)?, k),
|
||||
0x38 => Inst::Jmp (s_j),
|
||||
|
@ -111,15 +135,42 @@ struct Header {
|
|||
inst_count: u8,
|
||||
}
|
||||
|
||||
/// loadUnsigned in PUC Lua
|
||||
/// Decodes a varint format that has 7 bits per bytes and the 8th bit
|
||||
/// is set to 1 on the last byte.
|
||||
|
||||
fn load_unsigned <R: Read> (rdr: &mut R, limit: usize) -> usize {
|
||||
// Shrink the limit so we can tell when we pass it
|
||||
let limit = limit >> 7;
|
||||
|
||||
let mut x = 0;
|
||||
for _ in 0..32 {
|
||||
let b = parse_byte (rdr).unwrap ();
|
||||
if x >= limit {
|
||||
panic! ("integer overflow {x} >= {limit}");
|
||||
}
|
||||
x = (x << 7) | (b as usize & 0x7f);
|
||||
if (b & 0x80) != 0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
x
|
||||
}
|
||||
|
||||
fn load_size <R: Read> (rdr: &mut R) -> usize {
|
||||
load_unsigned (rdr, usize::MAX)
|
||||
}
|
||||
|
||||
// loadString in PUC Lua. Doesn't work with long strings yet.
|
||||
|
||||
fn parse_string <R: Read> (rdr: &mut R) -> Option <String> {
|
||||
let len = match parse_int (rdr)? {
|
||||
0 => 0,
|
||||
let len = match load_size (rdr) {
|
||||
0 => return Some (String::new ()),
|
||||
x => x - 1,
|
||||
};
|
||||
|
||||
let mut buf = vec! [0u8; len as usize];
|
||||
let mut buf = vec! [0u8; len];
|
||||
rdr.read_exact (&mut buf).ok ()?;
|
||||
Some (String::from_utf8 (buf).ok ()?)
|
||||
}
|
||||
|
@ -162,22 +213,22 @@ pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>)
|
|||
use crate::value::Value;
|
||||
|
||||
parse_string (rdr)?; // function name
|
||||
parse_int (rdr)?; // start line in source code
|
||||
parse_int (rdr)?; // last line in source code
|
||||
parse_byte (rdr)?; // num params
|
||||
parse_byte (rdr)?; // is_vararg
|
||||
parse_byte (rdr)?; // maxstacksize, might be same as num slots?
|
||||
parse_int (rdr).unwrap (); // start line in source code
|
||||
parse_int (rdr).unwrap (); // last line in source code
|
||||
parse_byte (rdr).unwrap (); // num params
|
||||
parse_byte (rdr).unwrap (); // is_vararg
|
||||
parse_byte (rdr).unwrap (); // maxstacksize, might be same as num slots?
|
||||
|
||||
let inst_count = parse_int (rdr)?;
|
||||
let inst_count = parse_int (rdr).unwrap ();
|
||||
let mut instructions = Vec::with_capacity (inst_count as usize);
|
||||
|
||||
for _ in 0..inst_count {
|
||||
let mut buf = [0u8; 4];
|
||||
rdr.read_exact (&mut buf).ok ()?;
|
||||
rdr.read_exact (&mut buf).ok ().unwrap ();
|
||||
instructions.push (parse_inst (buf).expect (&format! ("{buf:?}")));
|
||||
}
|
||||
|
||||
let constant_count = parse_int (rdr)?;
|
||||
let constant_count = parse_int (rdr).unwrap ();
|
||||
|
||||
let mut constants = Vec::with_capacity (constant_count as usize);
|
||||
|
||||
|
@ -186,22 +237,26 @@ pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>)
|
|||
let const_type = parse_byte (rdr)?;
|
||||
|
||||
let val = match const_type {
|
||||
3 => Value::from (parse_i64 (rdr)?),
|
||||
4 => parse_string (rdr)?.into (),
|
||||
19 => Value::from (parse_float (rdr)?),
|
||||
3 => parse_i64 (rdr).unwrap ().into (),
|
||||
4 => parse_string (rdr).unwrap ().into (),
|
||||
|
||||
// For LUA_TNUMBER, PUC Lua uses a macro that adds 16 to signify a float
|
||||
19 => parse_float (rdr).unwrap ().into (),
|
||||
// 0x10 + 4 = long string
|
||||
20 => parse_string (rdr).unwrap ().into (),
|
||||
x => panic! ("Constant {} has type {}", i, x),
|
||||
};
|
||||
|
||||
constants.push (val);
|
||||
}
|
||||
|
||||
let upvalue_count = parse_int (rdr)? as usize;
|
||||
let upvalue_count = parse_int (rdr).unwrap () as usize;
|
||||
|
||||
for _ in 0..upvalue_count {
|
||||
// Just ignore these
|
||||
|
||||
for _ in 0..3 {
|
||||
parse_byte (rdr)?;
|
||||
parse_byte (rdr).unwrap ();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -214,9 +269,9 @@ pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>)
|
|||
// Recursion
|
||||
|
||||
// Subfunctions. PUC calls them protos.
|
||||
let protos_count = parse_int (rdr)?;
|
||||
let protos_count = parse_int (rdr).unwrap ();
|
||||
for _ in 0..protos_count {
|
||||
parse_block (rdr, blocks)?;
|
||||
parse_block (rdr, blocks).unwrap ();
|
||||
}
|
||||
|
||||
// Skip over debug stuff
|
||||
|
@ -224,26 +279,26 @@ pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>)
|
|||
// I think this is delta line numbers, e.g. most instructions
|
||||
// have 0, but when you go to a new source line it's 1+.
|
||||
|
||||
let lineinfo_count = parse_int (rdr)?;
|
||||
let lineinfo_count = parse_int (rdr).unwrap ();
|
||||
for _ in 0..lineinfo_count {
|
||||
parse_byte (rdr)?;
|
||||
parse_byte (rdr).unwrap ();
|
||||
}
|
||||
|
||||
// Absolute line info, didn't see that in my test files
|
||||
|
||||
let abslineinfo_count = parse_int (rdr)?;
|
||||
let abslineinfo_count = parse_int (rdr).unwrap ();
|
||||
assert_eq! (abslineinfo_count, 0);
|
||||
|
||||
let local_count = parse_int (rdr)?;
|
||||
let local_count = parse_int (rdr).unwrap ();
|
||||
for _ in 0..local_count {
|
||||
parse_string(rdr)?;
|
||||
parse_int (rdr)?;
|
||||
parse_int (rdr)?;
|
||||
parse_string(rdr).unwrap ();
|
||||
parse_int (rdr).unwrap ();
|
||||
parse_int (rdr).unwrap ();
|
||||
}
|
||||
|
||||
let upvalue_count = parse_int (rdr)?;
|
||||
let upvalue_count = parse_int (rdr).unwrap ();
|
||||
for _ in 0..upvalue_count {
|
||||
parse_string (rdr)?;
|
||||
parse_string (rdr).unwrap ();
|
||||
}
|
||||
|
||||
Some (())
|
||||
|
@ -257,11 +312,11 @@ pub fn parse_chunk <R: Read> (rdr: &mut R) -> Option <Chunk> {
|
|||
let mut hdr = [0u8; 32];
|
||||
rdr.read_exact (&mut hdr).ok ()?;
|
||||
|
||||
assert_eq! (&hdr [0..8], &[0x1b, 0x4c, 0x75, 0x61, 0x54, 0x00, 0x19, 0x93], "This isn't a Lua 5.4 bytecode file");
|
||||
|
||||
let mut blocks = vec![];
|
||||
|
||||
while let Some (_) = parse_block (rdr, &mut blocks) {
|
||||
//
|
||||
}
|
||||
parse_block (rdr, &mut blocks).unwrap ();
|
||||
|
||||
Some (Chunk {
|
||||
blocks,
|
||||
|
@ -275,6 +330,28 @@ pub fn parse_chunk_from_bytes (b: &[u8]) -> Option <Chunk> {
|
|||
|
||||
#[cfg (test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn load_size () {
|
||||
let f = |input: &[u8]| {
|
||||
let mut cursor = std::io::Cursor::new (input);
|
||||
super::load_size (&mut cursor)
|
||||
};
|
||||
|
||||
assert_eq! (f (&[0x80]), 0);
|
||||
assert_eq! (f (&[0x81]), 1);
|
||||
assert_eq! (f (&[0x82]), 2);
|
||||
|
||||
assert_eq! (f (&[0xff]), 127);
|
||||
|
||||
assert_eq! (f (&[0x01, 0x80]), 128);
|
||||
assert_eq! (f (&[0x01, 0x81]), 129);
|
||||
assert_eq! (f (&[0x02, 0x80]), 256);
|
||||
|
||||
assert_eq! (f (&[0x7f, 0xfe]), 16382);
|
||||
assert_eq! (f (&[0x7f, 0xff]), 16383);
|
||||
assert_eq! (f (&[0x01, 0x00, 0x80]), 16384);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_inst () {
|
||||
use super::Inst;
|
||||
|
|
13
src/main.rs
13
src/main.rs
|
@ -11,6 +11,7 @@ mod tests;
|
|||
fn main () {
|
||||
use state::State;
|
||||
|
||||
let mut list_bytecode = false;
|
||||
let mut pipe_bytecode = false;
|
||||
let mut script = None;
|
||||
|
||||
|
@ -19,6 +20,7 @@ fn main () {
|
|||
|
||||
while let Some (arg) = args.next () {
|
||||
match arg.as_str () {
|
||||
"--list-bytecode" => list_bytecode = true,
|
||||
"--pipe-bytecode" => pipe_bytecode = true,
|
||||
"--script" => script = Some (args.next ().unwrap ()),
|
||||
"--" => break,
|
||||
|
@ -26,9 +28,8 @@ fn main () {
|
|||
}
|
||||
}
|
||||
|
||||
let lua_file = if let Some (script) = script {
|
||||
let source = std::fs::read (script).expect ("couldn't load Lua source code");
|
||||
let bytecode = loader::compile_bytecode(source);
|
||||
let chunk = if let Some (script) = script {
|
||||
let bytecode = loader::compile_bytecode_from_file (&script);
|
||||
let mut rdr = std::io::Cursor::new (bytecode);
|
||||
loader::parse_chunk (&mut rdr).unwrap ()
|
||||
}
|
||||
|
@ -40,6 +41,10 @@ fn main () {
|
|||
unimplemented!();
|
||||
};
|
||||
|
||||
if list_bytecode {
|
||||
dbg! (&chunk);
|
||||
}
|
||||
|
||||
let mut vm = State::default ();
|
||||
if std::env::var("LUA_DEBUG").is_ok() {
|
||||
vm.debug_print = true;
|
||||
|
@ -52,5 +57,5 @@ fn main () {
|
|||
program_counter: 0,
|
||||
});
|
||||
|
||||
vm.execute_chunk (&lua_file, &upvalues);
|
||||
vm.execute_chunk (&chunk, &upvalues);
|
||||
}
|
||||
|
|
24
src/state.rs
24
src/state.rs
|
@ -8,12 +8,14 @@ use crate::{
|
|||
},
|
||||
};
|
||||
|
||||
#[derive (Debug)]
|
||||
pub struct Block {
|
||||
pub instructions: Vec <Instruction>,
|
||||
pub constants: Vec <Value>,
|
||||
pub upvalue_count: usize,
|
||||
}
|
||||
|
||||
#[derive (Debug)]
|
||||
pub struct Chunk {
|
||||
pub blocks: Vec <Block>,
|
||||
}
|
||||
|
@ -155,10 +157,18 @@ impl State {
|
|||
|
||||
match instruction {
|
||||
Instruction::Add (a, b, c) => {
|
||||
let v_b = self.reg (*b).as_float ().unwrap ();
|
||||
let v_c = self.reg (*c).as_float ().unwrap ();
|
||||
let v_b = self.reg (*b);
|
||||
let v_c = self.reg (*c);
|
||||
|
||||
*self.reg_mut (*a) = Value::from (v_b + v_c);
|
||||
let sum = if let (Some (v_b), Some (v_c)) = (v_b.as_int (), v_c.as_int ())
|
||||
{
|
||||
Value::from (v_b + v_c)
|
||||
}
|
||||
else {
|
||||
Value::from (v_b.as_float ().unwrap () + v_c.as_float ().unwrap ())
|
||||
};
|
||||
|
||||
*self.reg_mut (*a) = sum;
|
||||
},
|
||||
Instruction::Call (a, b, _c) => {
|
||||
let b = usize::from (*b);
|
||||
|
@ -338,6 +348,14 @@ impl State {
|
|||
*self.reg_mut (*a) = upvalues [b].clone ();
|
||||
},
|
||||
Instruction::Jmp (s_j) => next_pc += s_j,
|
||||
Instruction::Len (a, b) => {
|
||||
let len = match self.reg (*b) {
|
||||
Value::String (s) => s.len (),
|
||||
_ => unimplemented!(),
|
||||
};
|
||||
|
||||
*self.reg_mut (*a) = len.into ();
|
||||
}
|
||||
Instruction::LoadF (a, sbx) => {
|
||||
*self.reg_mut (*a) = Value::Float (*sbx as f64);
|
||||
}
|
||||
|
|
23
src/tests.rs
23
src/tests.rs
|
@ -42,7 +42,7 @@ fn run_bytecode (args: &[&str], bc: &[u8]) -> Vec <Value> {
|
|||
/// and returns the output
|
||||
|
||||
fn run_source (args: &[&str], s: &str) -> Vec <Value> {
|
||||
let bc = loader::compile_bytecode (s.as_bytes ().to_vec ());
|
||||
let bc = loader::compile_bytecode_from_stdin (s.as_bytes ().to_vec ());
|
||||
run_bytecode (args, &bc)
|
||||
}
|
||||
|
||||
|
@ -128,22 +128,11 @@ fn bools () {
|
|||
|
||||
#[test]
|
||||
fn closure () {
|
||||
let bytecode = include_bytes! ("../test_vectors/closure.luac");
|
||||
let mut rdr = std::io::Cursor::new (bytecode);
|
||||
let file = crate::loader::parse_chunk (&mut rdr).unwrap ();
|
||||
let source = include_bytes! ("../test_vectors/closure.lua");
|
||||
let bytecode = &crate::loader::compile_bytecode_from_stdin (source.to_vec ());
|
||||
let chunk = crate::loader::parse_chunk_from_bytes (bytecode).unwrap ();
|
||||
|
||||
for (arg, expected) in [
|
||||
// Run the same test twice so clippy won't complain about a vec of 1 element
|
||||
(vec! ["_exe_name"], vec! [23.0.into ()]),
|
||||
(vec! ["_exe_name"], vec! [23.0.into ()]),
|
||||
] {
|
||||
let expected: Vec <Value> = expected;
|
||||
let mut vm = State::default ();
|
||||
let upvalues = State::upvalues_from_args (arg.into_iter ().map (|s| s.to_string ()));
|
||||
let actual = vm.execute_chunk (&file, &upvalues);
|
||||
|
||||
assert_eq! (actual, expected);
|
||||
}
|
||||
assert_eq! (run_chunk (&["_exe_name"], &chunk), vec! [Value::from (23i64)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -289,7 +278,7 @@ fn is_93 () {
|
|||
end
|
||||
"#;
|
||||
|
||||
let bc = loader::compile_bytecode (src.as_bytes ().to_vec ());
|
||||
let bc = loader::compile_bytecode_from_stdin (src.as_bytes ().to_vec ());
|
||||
let chunk = loader::parse_chunk_from_bytes (&bc).unwrap ();
|
||||
|
||||
assert_eq! (chunk.blocks [0].instructions [3], Inst::EqK (0, 1, false));
|
||||
|
|
|
@ -156,6 +156,8 @@ impl PartialEq <i64> for Value {
|
|||
}
|
||||
|
||||
impl Value {
|
||||
/// Coerces ints to float
|
||||
|
||||
pub fn as_float (&self) -> Option <f64> {
|
||||
match self {
|
||||
Self::Float (x) => Some (*x),
|
||||
|
@ -165,6 +167,8 @@ impl Value {
|
|||
}
|
||||
}
|
||||
|
||||
/// Does not coerce floats
|
||||
|
||||
pub fn as_int (&self) -> Option <i64> {
|
||||
match self {
|
||||
Self::Integer (x) => Some (*x),
|
||||
|
|
Binary file not shown.
|
@ -0,0 +1,2 @@
|
|||
local s = "a very long string with more than 128 characters, which will require 2 bytes to encode in Lua's bytecode format. This allows me to debug my bytecode loader, which doesn't seem to be handling string sizes properly."
|
||||
print (#s)
|
Loading…
Reference in New Issue