Compare commits

...

2 Commits

Author SHA1 Message Date
_ 8baea40e82 ♻️ refactor: extract `State::eval` for testing bugs the REPL finds 2023-10-01 23:07:52 -05:00
_ f9e8f26ac3 star: add a primitive REPL
This turned out a few new bugs in the VM
2023-10-01 22:50:06 -05:00
10 changed files with 269 additions and 66 deletions

58
Cargo.lock generated
View File

@ -12,3 +12,61 @@ dependencies = [
[[package]]
name = "lunar_wave_vm"
version = "0.1.0"
dependencies = [
"thiserror",
]
[[package]]
name = "proc-macro2"
version = "1.0.67"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d433d9f1a3e8c1263d9456598b16fec66f4acc9a74dacffd35c7bb09b3a1328"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
dependencies = [
"proc-macro2",
]
[[package]]
name = "syn"
version = "2.0.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7303ef2c05cd654186cb250d29049a24840ca25d2747c25c0381c8d9e2f582e8"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "thiserror"
version = "1.0.49"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1177e8c6d7ede7afde3585fd2513e611227efd6481bd78d2e82ba1ce16557ed4"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.49"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "10712f02019e9288794769fba95cd6847df9874d49d871d062172f9dd41bc4cc"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"

View File

@ -1,6 +1,9 @@
// cargo run -- --script lunar_wave_vm/test_vectors/fizz_buzz.lua
use std::io::Read;
use std::io::{
Read,
Write,
};
use lunar_wave_vm as lwvm;
@ -36,9 +39,8 @@ fn lunar_wave (args: Vec <String>) -> Result <Vec <lwvm::Value>, lwvm::StepError
"-" => {
let mut buf = vec! [];
std::io::stdin ().read_to_end (&mut buf).unwrap ();
let bc = lwvm::ensure_bytecode (buf);
let mut rdr = std::io::Cursor::new (bc);
chunk = Some (lwvm::parse_chunk (&mut rdr).unwrap ());
let bc = lwvm::ensure_bytecode (buf).unwrap ();
chunk = Some (lwvm::parse_chunk (&bc).unwrap ());
lua_args = vec! ["-".to_string ()];
},
@ -49,8 +51,7 @@ fn lunar_wave (args: Vec <String>) -> Result <Vec <lwvm::Value>, lwvm::StepError
}
else if chunk.is_none () {
let bc = lwvm::compile_bytecode_from_file (x);
let mut rdr = std::io::Cursor::new (bc);
chunk = Some (lwvm::parse_chunk (&mut rdr).unwrap ());
chunk = Some (lwvm::parse_chunk (&bc).unwrap ());
lua_args = vec! [x.to_string ()];
}
@ -61,24 +62,55 @@ fn lunar_wave (args: Vec <String>) -> Result <Vec <lwvm::Value>, lwvm::StepError
}
}
let chunk = chunk.unwrap ();
match chunk {
Some (chunk) => debugger (DebuggerParams {
breakpoints,
chunk,
list_bytecode,
lua_args,
}),
None => repl (ReplParams {
list_bytecode,
lua_args,
}),
}
}
if list_bytecode {
dbg! (&chunk);
struct DebuggerParams {
breakpoints: Vec <lwvm::Breakpoint>,
chunk: lwvm::Chunk,
list_bytecode: bool,
lua_args: Vec <String>,
}
struct ReplParams {
list_bytecode: bool,
lua_args: Vec <String>,
}
/// The interpreter mode, which has optional debugging abilities
/// sort of like a cut-down gdb.
fn debugger (params: DebuggerParams) -> Result <Vec <lwvm::Value>, lwvm::StepError>
{
if params.list_bytecode {
dbg! (&params.chunk);
}
let upvalues = lwvm::State::upvalues_from_args (lua_args.into_iter ());
let upvalues = lwvm::State::upvalues_from_args (params.lua_args.into_iter ());
let mut vm = lwvm::State::new (chunk, upvalues);
let mut vm = lwvm::State::new (params.chunk, upvalues);
if std::env::var("LWVM_DEBUG").is_ok() {
vm.debug_print = true;
}
// Variables for interactive debugging
let mut in_break = false;
let mut last_input = String::new ();
loop {
if in_break || breakpoints.iter ().any (|bp| vm.at_breakpoint (bp)) {
if in_break || params.breakpoints.iter ().any (|bp| vm.at_breakpoint (bp)) {
in_break = true;
dbg! (&vm.stack);
@ -122,3 +154,52 @@ fn lunar_wave (args: Vec <String>) -> Result <Vec <lwvm::Value>, lwvm::StepError
}
}
/// A REPL that's sort of like the PUC Lua or LuaJIT REPL,
/// but with fewer features.
/// It still have to cheat and run `luac5.4` as a subprocess.
fn repl (params: ReplParams) -> Result <Vec <lwvm::Value>, lwvm::StepError>
{
let upvalues = lwvm::State::upvalues_from_args (params.lua_args.into_iter ());
let mut vm = lwvm::State::new (lwvm::Chunk::default (), upvalues);
println! ("Lunar Wave 0.1.0-modified Copyright (C) 2023 ReactorScram (implements Lua 5.4 Copyright (C) 1994-2022 Lua.org, PUC-Rio");
loop {
{
let mut stdout = std::io::stdout ().lock ();
stdout.write_all (b"> ").unwrap ();
stdout.flush ().unwrap ();
}
let mut input = Default::default ();
std::io::stdin ().read_line (&mut input).unwrap ();
if input.is_empty () {
println! ();
return Ok (vec! []);
}
let bytecode = match lwvm::compile_bytecode (input.into_bytes ()) {
Ok (x) => x,
Err (e) => {
eprintln! ("Compile error from luac subprocess:");
eprintln! ("{}", e);
continue;
},
};
let chunk = lwvm::parse_chunk (&bytecode).unwrap ();
if params.list_bytecode {
dbg! (&chunk);
}
vm.set_chunk (chunk);
match vm.execute () {
Ok (x) => if ! x.is_empty () {
println! ("{x:?}")
},
Err (e) => println! ("{e:?}"),
}
}
}

View File

@ -4,3 +4,6 @@ description = "A Lua virtual machine implementation"
version = "0.1.0"
edition = "2021"
authors = ["ReactorScram"]
[dependencies]
thiserror = "1.0.49"

View File

@ -0,0 +1,7 @@
#[derive (Debug, thiserror::Error)]
pub enum Error {
#[error ("loader")]
Loader (#[from] crate::loader::Error),
#[error ("VM step")]
VmStep (#[from] crate::state::StepError),
}

View File

@ -5,6 +5,7 @@ pub enum Instruction {
Call (u8, u8, u8),
Closure (u8, u32),
Concat (u8, u8),
Div (u8, u8, u8),
@ -84,7 +85,7 @@ pub enum Instruction {
SetList (u8, u8, u8, bool),
SetTabUp (u8, u8, u8),
SetTabUp (u8, u8, u8, bool),
Sub (u8, u8, u8),

View File

@ -1,13 +1,17 @@
mod error;
mod instruction;
mod loader;
mod state;
mod value;
pub use error::Error as Error;
pub use loader::compile_bytecode_from_file as compile_bytecode_from_file;
pub use loader::compile_bytecode as compile_bytecode;
pub use loader::ensure_bytecode as ensure_bytecode;
pub use loader::parse_chunk as parse_chunk;
pub use loader::parse_chunk_from_reader as parse_chunk_from_reader;
pub use state::Breakpoint as Breakpoint;
pub use state::Chunk as Chunk;
pub use state::State as State;
pub use state::StepError as StepError;
pub use state::StepOutput as StepOutput;

View File

@ -29,12 +29,18 @@ pub fn compile_bytecode_from_file (path: &str) -> Vec <u8> {
output.stdout.as_slice ().to_vec ()
}
#[derive (Debug, thiserror::Error)]
pub enum Error {
#[error ("compile")]
Compile (String)
}
/// Invoke `luac` as a subprocess
/// Luckily luac is single-pass, so we can just pipe in and out
///
/// `source` is a Vec because we move it to a worker thread
pub fn compile_bytecode (source: Vec <u8>) -> Vec <u8> {
pub fn compile_bytecode (source: Vec <u8>) -> Result <Vec <u8>, Error> {
use std::{
io::Write,
process::{
@ -48,6 +54,7 @@ pub fn compile_bytecode (source: Vec <u8>) -> Vec <u8> {
.arg ("-") // Standard output
.arg ("-") // Input from standard input
.stdin (Stdio::piped ())
.stderr (Stdio::piped ())
.stdout (Stdio::piped ())
.spawn ()
.expect ("failed to execute `luac5.4`. Is Lua installed?");
@ -61,17 +68,23 @@ pub fn compile_bytecode (source: Vec <u8>) -> Vec <u8> {
.wait_with_output ()
.expect ("failed to wait on child");
output.stdout.as_slice ().to_vec ()
if output.status.success () && output.status.code () == Some (0)
{
Ok (output.stdout)
}
else {
Err (Error::Compile (String::from_utf8 (output.stderr).unwrap ()))
}
}
/// Checks whether the input is already bytecode, or is possibly
/// Lua source code. If it's source code, compiles and returns bytecode.
/// If it's bytecode, just returns the input.
pub fn ensure_bytecode (buffer: Vec <u8>) -> Vec <u8> {
pub fn ensure_bytecode (buffer: Vec <u8>) -> Result <Vec <u8>, Error> {
let bytecode_header = &[0x1b, 0x4c, 0x75, 0x61, 0x54, 0x00, 0x19, 0x93];
if buffer.starts_with (bytecode_header) {
return buffer;
return Ok (buffer);
}
compile_bytecode (buffer)
@ -116,7 +129,7 @@ pub fn parse_inst (buf: [u8; 4]) -> Option <Inst>
0x0c => Inst::GetTable (a, b, c),
0x0d => Inst::GetI (a, b, c),
0x0e => Inst::GetField (a, b, c),
0x0f => Inst::SetTabUp (a, b, c),
0x0f => Inst::SetTabUp (a, b, c, k),
0x11 => Inst::SetI (a, b, c, k),
0x12 => Inst::SetField (a, b, c, k),
0x13 => Inst::NewTable (a),
@ -133,6 +146,7 @@ pub fn parse_inst (buf: [u8; 4]) -> Option <Inst>
0x31 => Inst::UnM (a, b),
0x33 => Inst::Not (a, b),
0x34 => Inst::Len (a, b),
0x35 => Inst::Concat (a, b),
0x3c => Inst::EqK (a, b, k),
0x3d => Inst::EqI (a, i_sb (buf)?, k),
0x38 => Inst::Jmp (s_j),
@ -333,8 +347,12 @@ pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>)
Some (())
}
pub fn parse_chunk (buf: &[u8]) -> Option <Chunk> {
let mut rdr = std::io::Cursor::new (buf);
parse_chunk_from_reader (&mut rdr)
}
pub fn parse_chunk <R: Read> (rdr: &mut R) -> Option <Chunk> {
pub fn parse_chunk_from_reader <R: Read> (rdr: &mut R) -> Option <Chunk> {
// Discard 32 bytes from the start of the file.
// This is magic number, version number, etc.
@ -352,11 +370,6 @@ pub fn parse_chunk <R: Read> (rdr: &mut R) -> Option <Chunk> {
})
}
pub fn parse_chunk_from_bytes (b: &[u8]) -> Option <Chunk> {
let mut rdr = std::io::Cursor::new (b);
parse_chunk (&mut rdr)
}
#[cfg (test)]
mod tests {
#[test]
@ -447,8 +460,7 @@ mod tests {
}
if false {
let mut rdr = std::io::Cursor::new (bytecode.clone ());
let file = crate::loader::parse_chunk (&mut rdr).unwrap ();
let file = crate::loader::parse_chunk (bytecode).unwrap ();
assert_eq! (file.blocks.len (), 5);
}

View File

@ -20,12 +20,12 @@ pub struct Block {
pub upvalues: Vec <Upvalue>,
}
#[derive (Clone, Debug)]
#[derive (Clone, Debug, Default)]
pub struct Chunk {
pub blocks: Vec <Block>,
}
#[derive (Clone, Debug)]
#[derive (Clone, Debug, Default)]
pub struct StackFrame {
// i32 makes it a little easier to implement jumps
// Starts at 0 right after OP_CALL
@ -133,11 +133,14 @@ pub enum StepOutput {
ChunkReturned (Vec <Value>),
}
#[derive (Debug)]
pub struct StepError {
frame: StackFrame,
inst: Instruction,
msg: &'static str,
#[derive (Debug, thiserror::Error)]
pub enum StepError {
#[error ("generic")]
Generic {
frame: StackFrame,
inst: Instruction,
msg: &'static str,
},
}
impl State {
@ -148,11 +151,7 @@ impl State {
registers: vec! [Value::Nil; 256],
top: 0,
stack: vec! [
StackFrame {
program_counter: 0,
block_idx: 0,
register_offset: 0,
},
StackFrame::default (),
],
debug_print: false,
step_count: 0,
@ -221,7 +220,7 @@ impl State {
fn make_step_error (&self, msg: &'static str, inst: &Instruction) -> StepError
{
StepError {
StepError::Generic {
frame: self.stack.last ().unwrap ().clone (),
inst: inst.clone (),
msg,
@ -386,6 +385,9 @@ impl State {
upvalues: new_upvalues,
});
},
Instruction::Concat (a, b) => {
unimplemented! ("OP_CONCAT")
},
Instruction::Div (a, b, c) => {
let v_b = self.reg (b);
let v_c = self.reg (c);
@ -760,7 +762,22 @@ impl State {
dst.insert_int (i64::from (c + i), src.clone ());
}
},
Instruction::SetTabUp (_a, _b, _c) => unimplemented! (),
Instruction::SetTabUp (a, b, c, k_flag) => {
let a = usize::try_from (a).unwrap ();
let b = usize::try_from (b).unwrap ();
let value = if k_flag {
&k [usize::from (c)]
}
else {
self.reg (c)
}
.clone ();
let table = self.upvalues.get_mut (a).unwrap ().as_table ().unwrap ();
let key = k.get (b).unwrap ().as_str ().expect ("SetTabUp K[B] must be a string");
table.borrow_mut ().insert_str (key, value);
},
Instruction::Sub (a, b, c) => {
let v_b = self.reg (b);
let v_c = self.reg (c);
@ -886,15 +903,20 @@ impl State {
Ok (None)
}
pub fn execute_chunk (&mut self, breakpoints: &[Breakpoint])
pub fn eval (&mut self, src: &str) -> Result <Vec <Value>, crate::Error>
{
let bytecode = crate::compile_bytecode (src.as_bytes ().to_vec ())?;
let chunk = crate::parse_chunk (&bytecode).unwrap ();
self.set_chunk (chunk);
Ok (self.execute ()?)
}
pub fn execute (&mut self)
-> Result <Vec <Value>, StepError> {
let max_iters = 2000;
for _ in 0..max_iters {
if breakpoints.iter ().any (|bp| self.at_breakpoint (bp)) {
dbg! (&self);
}
match self.step ()? {
None => (),
Some (StepOutput::ChunkReturned (x)) => return Ok (x),
@ -904,4 +926,9 @@ impl State {
dbg! (self);
panic! ("Hit max iterations before block returned");
}
pub fn set_chunk (&mut self, chunk: Chunk) {
self.stack = vec! [Default::default ()];
self.chunk = chunk;
}
}

View File

@ -25,14 +25,14 @@ fn calculate_hash<T: Hash>(t: &T) -> u64 {
fn run_chunk (args: &[&str], chunk: Chunk) -> Vec <Value> {
let upvalues = State::upvalues_from_args (args.into_iter ().map (|s| s.to_string ()));
let mut vm = State::new (chunk, upvalues);
vm.execute_chunk (&[]).unwrap ()
vm.execute ().unwrap ()
}
/// Takes arguments and Lua bytecode, loads it, runs it,
/// and return the output
fn run_bytecode (args: &[&str], bc: &[u8]) -> Vec <Value> {
let chunk = loader::parse_chunk_from_bytes (&bc).unwrap ();
let chunk = loader::parse_chunk (&bc).unwrap ();
run_chunk (args, chunk)
}
@ -42,7 +42,7 @@ fn run_bytecode (args: &[&str], bc: &[u8]) -> Vec <Value> {
/// and returns the output
fn run_source (args: &[&str], s: &str) -> Vec <Value> {
let bc = loader::compile_bytecode (s.as_bytes ().to_vec ());
let bc = loader::compile_bytecode (s.as_bytes ().to_vec ()).unwrap ();
run_bytecode (args, &bc)
}
@ -120,7 +120,7 @@ fn bools () {
let upvalues = State::upvalues_from_args (arg.into_iter ().map (|s| s.to_string ()));
let mut vm = State::new (chunk.clone (), upvalues);
let actual = vm.execute_chunk (&[]).unwrap ();
let actual = vm.execute ().unwrap ();
assert_eq! (actual, expected);
}
}
@ -128,8 +128,8 @@ fn bools () {
#[test]
fn closure () {
let source = include_bytes! ("../test_vectors/closure.lua");
let bytecode = &crate::loader::compile_bytecode (source.to_vec ());
let chunk = crate::loader::parse_chunk_from_bytes (bytecode).unwrap ();
let bytecode = &crate::loader::compile_bytecode (source.to_vec ()).unwrap ();
let chunk = crate::loader::parse_chunk (bytecode).unwrap ();
assert_eq! (run_chunk (&["_exe_name"], chunk), vec! [Value::from (23i64)]);
}
@ -176,7 +176,7 @@ fn floats () {
let expected: Vec <Value> = expected;
let upvalues = State::upvalues_from_args (arg.into_iter ().map (|s| s.to_string ()));
let mut vm = State::new (chunk.clone (), upvalues);
let actual = vm.execute_chunk (&[]).unwrap ();
let actual = vm.execute ().unwrap ();
assert_eq! (actual, expected);
}
@ -185,8 +185,8 @@ fn floats () {
#[test]
fn fma () {
let source = include_bytes! ("../test_vectors/fma.lua");
let bytecode = &crate::loader::compile_bytecode (source.to_vec ());
let chunk = crate::loader::parse_chunk_from_bytes (bytecode).unwrap ();
let bytecode = &crate::loader::compile_bytecode (source.to_vec ()).unwrap ();
let chunk = crate::loader::parse_chunk (bytecode).unwrap ();
assert_eq! (chunk.blocks.len (), 5);
assert_eq! (chunk.blocks [3].upvalues.len (), 2);
@ -194,12 +194,23 @@ fn fma () {
let arg = vec! ["_exe_name"];
let upvalues = State::upvalues_from_args (arg.into_iter ().map (|s| s.to_string ()));
let mut vm = State::new (chunk, upvalues);
let actual = vm.execute_chunk (&[]).unwrap ();
let actual = vm.execute ().unwrap ();
let expected = vec! [Value::from (122)];
assert_eq! (actual, expected);
}
#[test]
fn function_calls () {
let upvalues = crate::State::upvalues_from_args (vec! ["_exe_name".to_string ()].into_iter ());
let mut vm = crate::State::new (crate::Chunk::default (), upvalues);
vm.eval ("print (x ())").ok ();
vm.eval ("x = function () return 5 end").ok ();
vm.eval ("print (x ())").ok ();
}
#[test]
fn heap () {
use std::{
@ -265,7 +276,7 @@ fn is_93 () {
assert_ne! (calculate_hash (&Value::from ("94")), calculate_hash (&Value::from ("93")));
assert_ne! (Value::Nil, Value::from ("93"));
let src = r#"
let src = br#"
if arg [1] == "93" then
print "it's 93"
return 0
@ -275,8 +286,8 @@ fn is_93 () {
end
"#;
let bc = loader::compile_bytecode (src.as_bytes ().to_vec ());
let chunk = loader::parse_chunk_from_bytes (&bc).unwrap ();
let bc = loader::compile_bytecode (src.to_vec ()).unwrap ();
let chunk = loader::parse_chunk (&bc).unwrap ();
assert_eq! (chunk.blocks [0].instructions [3], Inst::EqK (0, 1, false));
@ -359,12 +370,12 @@ fn tables_2 () {
fn tailcall () {
use crate::instruction::Instruction;
let src = r#"
let src = br#"
return tonumber ("5")
"#;
let bc = loader::compile_bytecode (src.as_bytes ().to_vec ());
let chunk = loader::parse_chunk_from_bytes (&bc).unwrap ();
let bc = loader::compile_bytecode (src.to_vec ()).unwrap ();
let chunk = loader::parse_chunk (&bc).unwrap ();
assert_eq! (chunk.blocks [0].instructions [3], Instruction::TailCall (0, 2, 1, false));

View File

@ -19,9 +19,8 @@ fn embedding () {
1
}
let bytecode = lwvm::compile_bytecode (src.to_vec ());
let mut rdr = std::io::Cursor::new (bytecode);
let chunk = lwvm::parse_chunk (&mut rdr).unwrap ();
let bc = lwvm::compile_bytecode (src.to_vec ()).unwrap ();
let chunk = lwvm::parse_chunk (&bc).unwrap ();
let host_lib = [
("add", Value::RsFunc (host_add)),
@ -36,7 +35,7 @@ fn embedding () {
];
let mut vm = State::new (chunk, upvalues);
let output = vm.execute_chunk (&vec! []).unwrap ();
let output = vm.execute ().unwrap ();
assert_eq! (output, vec! [Value::from (2019)]);
}