From f9e8f26ac3e01572bf965dc7437b6d6e2b4a3134 Mon Sep 17 00:00:00 2001 From: _ <_@_> Date: Sun, 1 Oct 2023 22:50:06 -0500 Subject: [PATCH] :star: star: add a primitive REPL This turned out a few new bugs in the VM --- lunar_wave_cli/src/main.rs | 107 +++++++++++++++++++++++++++---- lunar_wave_vm/src/instruction.rs | 3 +- lunar_wave_vm/src/lib.rs | 2 + lunar_wave_vm/src/loader.rs | 32 +++++---- lunar_wave_vm/src/state.rs | 41 ++++++++---- lunar_wave_vm/src/tests.rs | 63 +++++++++++++----- lunar_wave_vm/tests/embedding.rs | 7 +- 7 files changed, 195 insertions(+), 60 deletions(-) diff --git a/lunar_wave_cli/src/main.rs b/lunar_wave_cli/src/main.rs index f0946ce..054f0c2 100644 --- a/lunar_wave_cli/src/main.rs +++ b/lunar_wave_cli/src/main.rs @@ -1,6 +1,9 @@ // cargo run -- --script lunar_wave_vm/test_vectors/fizz_buzz.lua -use std::io::Read; +use std::io::{ + Read, + Write, +}; use lunar_wave_vm as lwvm; @@ -36,9 +39,8 @@ fn lunar_wave (args: Vec ) -> Result , lwvm::StepError "-" => { let mut buf = vec! []; std::io::stdin ().read_to_end (&mut buf).unwrap (); - let bc = lwvm::ensure_bytecode (buf); - let mut rdr = std::io::Cursor::new (bc); - chunk = Some (lwvm::parse_chunk (&mut rdr).unwrap ()); + let bc = lwvm::ensure_bytecode (buf).unwrap (); + chunk = Some (lwvm::parse_chunk (&bc).unwrap ()); lua_args = vec! ["-".to_string ()]; }, @@ -49,8 +51,7 @@ fn lunar_wave (args: Vec ) -> Result , lwvm::StepError } else if chunk.is_none () { let bc = lwvm::compile_bytecode_from_file (x); - let mut rdr = std::io::Cursor::new (bc); - chunk = Some (lwvm::parse_chunk (&mut rdr).unwrap ()); + chunk = Some (lwvm::parse_chunk (&bc).unwrap ()); lua_args = vec! [x.to_string ()]; } @@ -61,24 +62,55 @@ fn lunar_wave (args: Vec ) -> Result , lwvm::StepError } } - let chunk = chunk.unwrap (); - - if list_bytecode { - dbg! (&chunk); + match chunk { + Some (chunk) => debugger (DebuggerParams { + breakpoints, + chunk, + list_bytecode, + lua_args, + }), + None => repl (ReplParams { + list_bytecode, + lua_args, + }), + } +} + +struct DebuggerParams { + breakpoints: Vec , + chunk: lwvm::Chunk, + list_bytecode: bool, + lua_args: Vec , +} + +struct ReplParams { + list_bytecode: bool, + lua_args: Vec , +} + +/// The interpreter mode, which has optional debugging abilities +/// sort of like a cut-down gdb. + +fn debugger (params: DebuggerParams) -> Result , lwvm::StepError> +{ + if params.list_bytecode { + dbg! (¶ms.chunk); } - let upvalues = lwvm::State::upvalues_from_args (lua_args.into_iter ()); + let upvalues = lwvm::State::upvalues_from_args (params.lua_args.into_iter ()); - let mut vm = lwvm::State::new (chunk, upvalues); + let mut vm = lwvm::State::new (params.chunk, upvalues); if std::env::var("LWVM_DEBUG").is_ok() { vm.debug_print = true; } + // Variables for interactive debugging + let mut in_break = false; let mut last_input = String::new (); loop { - if in_break || breakpoints.iter ().any (|bp| vm.at_breakpoint (bp)) { + if in_break || params.breakpoints.iter ().any (|bp| vm.at_breakpoint (bp)) { in_break = true; dbg! (&vm.stack); @@ -122,3 +154,52 @@ fn lunar_wave (args: Vec ) -> Result , lwvm::StepError } } +/// A REPL that's sort of like the PUC Lua or LuaJIT REPL, +/// but with fewer features. +/// It still have to cheat and run `luac5.4` as a subprocess. + +fn repl (params: ReplParams) -> Result , lwvm::StepError> +{ + let upvalues = lwvm::State::upvalues_from_args (params.lua_args.into_iter ()); + + let mut vm = lwvm::State::new (lwvm::Chunk::default (), upvalues); + + println! ("Lunar Wave 0.1.0-modified Copyright (C) 2023 ReactorScram (implements Lua 5.4 Copyright (C) 1994-2022 Lua.org, PUC-Rio"); + + loop { + { + let mut stdout = std::io::stdout ().lock (); + stdout.write_all (b"> ").unwrap (); + stdout.flush ().unwrap (); + } + + let mut input = Default::default (); + std::io::stdin ().read_line (&mut input).unwrap (); + if input.is_empty () { + println! (); + return Ok (vec! []); + } + + let bytecode = match lwvm::compile_bytecode (input.into_bytes ()) { + Ok (x) => x, + Err (e) => { + eprintln! ("Compile error from luac subprocess:"); + eprintln! ("{}", e); + continue; + }, + }; + let chunk = lwvm::parse_chunk (&bytecode).unwrap (); + + if params.list_bytecode { + dbg! (&chunk); + } + + vm.set_chunk (chunk); + match vm.execute () { + Ok (x) => if ! x.is_empty () { + println! ("{x:?}") + }, + Err (e) => println! ("{e:?}"), + } + } +} diff --git a/lunar_wave_vm/src/instruction.rs b/lunar_wave_vm/src/instruction.rs index fb76c0e..dcb97d2 100644 --- a/lunar_wave_vm/src/instruction.rs +++ b/lunar_wave_vm/src/instruction.rs @@ -5,6 +5,7 @@ pub enum Instruction { Call (u8, u8, u8), Closure (u8, u32), + Concat (u8, u8), Div (u8, u8, u8), @@ -84,7 +85,7 @@ pub enum Instruction { SetList (u8, u8, u8, bool), - SetTabUp (u8, u8, u8), + SetTabUp (u8, u8, u8, bool), Sub (u8, u8, u8), diff --git a/lunar_wave_vm/src/lib.rs b/lunar_wave_vm/src/lib.rs index 173ca6d..f7d66cd 100644 --- a/lunar_wave_vm/src/lib.rs +++ b/lunar_wave_vm/src/lib.rs @@ -7,7 +7,9 @@ pub use loader::compile_bytecode_from_file as compile_bytecode_from_file; pub use loader::compile_bytecode as compile_bytecode; pub use loader::ensure_bytecode as ensure_bytecode; pub use loader::parse_chunk as parse_chunk; +pub use loader::parse_chunk_from_reader as parse_chunk_from_reader; pub use state::Breakpoint as Breakpoint; +pub use state::Chunk as Chunk; pub use state::State as State; pub use state::StepError as StepError; pub use state::StepOutput as StepOutput; diff --git a/lunar_wave_vm/src/loader.rs b/lunar_wave_vm/src/loader.rs index b4d63a9..16c7c15 100644 --- a/lunar_wave_vm/src/loader.rs +++ b/lunar_wave_vm/src/loader.rs @@ -34,7 +34,7 @@ pub fn compile_bytecode_from_file (path: &str) -> Vec { /// /// `source` is a Vec because we move it to a worker thread -pub fn compile_bytecode (source: Vec ) -> Vec { +pub fn compile_bytecode (source: Vec ) -> Result , String> { use std::{ io::Write, process::{ @@ -48,6 +48,7 @@ pub fn compile_bytecode (source: Vec ) -> Vec { .arg ("-") // Standard output .arg ("-") // Input from standard input .stdin (Stdio::piped ()) + .stderr (Stdio::piped ()) .stdout (Stdio::piped ()) .spawn () .expect ("failed to execute `luac5.4`. Is Lua installed?"); @@ -61,17 +62,23 @@ pub fn compile_bytecode (source: Vec ) -> Vec { .wait_with_output () .expect ("failed to wait on child"); - output.stdout.as_slice ().to_vec () + if output.status.success () && output.status.code () == Some (0) + { + Ok (output.stdout) + } + else { + Err (String::from_utf8 (output.stderr).unwrap ()) + } } /// Checks whether the input is already bytecode, or is possibly /// Lua source code. If it's source code, compiles and returns bytecode. /// If it's bytecode, just returns the input. -pub fn ensure_bytecode (buffer: Vec ) -> Vec { +pub fn ensure_bytecode (buffer: Vec ) -> Result , String> { let bytecode_header = &[0x1b, 0x4c, 0x75, 0x61, 0x54, 0x00, 0x19, 0x93]; if buffer.starts_with (bytecode_header) { - return buffer; + return Ok (buffer); } compile_bytecode (buffer) @@ -116,7 +123,7 @@ pub fn parse_inst (buf: [u8; 4]) -> Option 0x0c => Inst::GetTable (a, b, c), 0x0d => Inst::GetI (a, b, c), 0x0e => Inst::GetField (a, b, c), - 0x0f => Inst::SetTabUp (a, b, c), + 0x0f => Inst::SetTabUp (a, b, c, k), 0x11 => Inst::SetI (a, b, c, k), 0x12 => Inst::SetField (a, b, c, k), 0x13 => Inst::NewTable (a), @@ -133,6 +140,7 @@ pub fn parse_inst (buf: [u8; 4]) -> Option 0x31 => Inst::UnM (a, b), 0x33 => Inst::Not (a, b), 0x34 => Inst::Len (a, b), + 0x35 => Inst::Concat (a, b), 0x3c => Inst::EqK (a, b, k), 0x3d => Inst::EqI (a, i_sb (buf)?, k), 0x38 => Inst::Jmp (s_j), @@ -333,8 +341,12 @@ pub fn parse_block (rdr: &mut R, blocks: &mut Vec ) Some (()) } +pub fn parse_chunk (buf: &[u8]) -> Option { + let mut rdr = std::io::Cursor::new (buf); + parse_chunk_from_reader (&mut rdr) +} -pub fn parse_chunk (rdr: &mut R) -> Option { +pub fn parse_chunk_from_reader (rdr: &mut R) -> Option { // Discard 32 bytes from the start of the file. // This is magic number, version number, etc. @@ -352,11 +364,6 @@ pub fn parse_chunk (rdr: &mut R) -> Option { }) } -pub fn parse_chunk_from_bytes (b: &[u8]) -> Option { - let mut rdr = std::io::Cursor::new (b); - parse_chunk (&mut rdr) -} - #[cfg (test)] mod tests { #[test] @@ -447,8 +454,7 @@ mod tests { } if false { - let mut rdr = std::io::Cursor::new (bytecode.clone ()); - let file = crate::loader::parse_chunk (&mut rdr).unwrap (); + let file = crate::loader::parse_chunk (bytecode).unwrap (); assert_eq! (file.blocks.len (), 5); } diff --git a/lunar_wave_vm/src/state.rs b/lunar_wave_vm/src/state.rs index 2e33800..cb3e1e1 100644 --- a/lunar_wave_vm/src/state.rs +++ b/lunar_wave_vm/src/state.rs @@ -20,12 +20,12 @@ pub struct Block { pub upvalues: Vec , } -#[derive (Clone, Debug)] +#[derive (Clone, Debug, Default)] pub struct Chunk { pub blocks: Vec , } -#[derive (Clone, Debug)] +#[derive (Clone, Debug, Default)] pub struct StackFrame { // i32 makes it a little easier to implement jumps // Starts at 0 right after OP_CALL @@ -148,11 +148,7 @@ impl State { registers: vec! [Value::Nil; 256], top: 0, stack: vec! [ - StackFrame { - program_counter: 0, - block_idx: 0, - register_offset: 0, - }, + StackFrame::default (), ], debug_print: false, step_count: 0, @@ -386,6 +382,9 @@ impl State { upvalues: new_upvalues, }); }, + Instruction::Concat (a, b) => { + unimplemented! ("OP_CONCAT") + }, Instruction::Div (a, b, c) => { let v_b = self.reg (b); let v_c = self.reg (c); @@ -760,7 +759,22 @@ impl State { dst.insert_int (i64::from (c + i), src.clone ()); } }, - Instruction::SetTabUp (_a, _b, _c) => unimplemented! (), + Instruction::SetTabUp (a, b, c, k_flag) => { + let a = usize::try_from (a).unwrap (); + let b = usize::try_from (b).unwrap (); + + let value = if k_flag { + &k [usize::from (c)] + } + else { + self.reg (c) + } + .clone (); + + let table = self.upvalues.get_mut (a).unwrap ().as_table ().unwrap (); + let key = k.get (b).unwrap ().as_str ().expect ("SetTabUp K[B] must be a string"); + table.borrow_mut ().insert_str (key, value); + }, Instruction::Sub (a, b, c) => { let v_b = self.reg (b); let v_c = self.reg (c); @@ -886,15 +900,11 @@ impl State { Ok (None) } - pub fn execute_chunk (&mut self, breakpoints: &[Breakpoint]) + pub fn execute (&mut self) -> Result , StepError> { let max_iters = 2000; for _ in 0..max_iters { - if breakpoints.iter ().any (|bp| self.at_breakpoint (bp)) { - dbg! (&self); - } - match self.step ()? { None => (), Some (StepOutput::ChunkReturned (x)) => return Ok (x), @@ -904,4 +914,9 @@ impl State { dbg! (self); panic! ("Hit max iterations before block returned"); } + + pub fn set_chunk (&mut self, chunk: Chunk) { + self.stack = vec! [Default::default ()]; + self.chunk = chunk; + } } diff --git a/lunar_wave_vm/src/tests.rs b/lunar_wave_vm/src/tests.rs index 15d65d9..5a98873 100644 --- a/lunar_wave_vm/src/tests.rs +++ b/lunar_wave_vm/src/tests.rs @@ -25,14 +25,14 @@ fn calculate_hash(t: &T) -> u64 { fn run_chunk (args: &[&str], chunk: Chunk) -> Vec { let upvalues = State::upvalues_from_args (args.into_iter ().map (|s| s.to_string ())); let mut vm = State::new (chunk, upvalues); - vm.execute_chunk (&[]).unwrap () + vm.execute ().unwrap () } /// Takes arguments and Lua bytecode, loads it, runs it, /// and return the output fn run_bytecode (args: &[&str], bc: &[u8]) -> Vec { - let chunk = loader::parse_chunk_from_bytes (&bc).unwrap (); + let chunk = loader::parse_chunk (&bc).unwrap (); run_chunk (args, chunk) } @@ -42,7 +42,7 @@ fn run_bytecode (args: &[&str], bc: &[u8]) -> Vec { /// and returns the output fn run_source (args: &[&str], s: &str) -> Vec { - let bc = loader::compile_bytecode (s.as_bytes ().to_vec ()); + let bc = loader::compile_bytecode (s.as_bytes ().to_vec ()).unwrap (); run_bytecode (args, &bc) } @@ -120,7 +120,7 @@ fn bools () { let upvalues = State::upvalues_from_args (arg.into_iter ().map (|s| s.to_string ())); let mut vm = State::new (chunk.clone (), upvalues); - let actual = vm.execute_chunk (&[]).unwrap (); + let actual = vm.execute ().unwrap (); assert_eq! (actual, expected); } } @@ -128,8 +128,8 @@ fn bools () { #[test] fn closure () { let source = include_bytes! ("../test_vectors/closure.lua"); - let bytecode = &crate::loader::compile_bytecode (source.to_vec ()); - let chunk = crate::loader::parse_chunk_from_bytes (bytecode).unwrap (); + let bytecode = &crate::loader::compile_bytecode (source.to_vec ()).unwrap (); + let chunk = crate::loader::parse_chunk (bytecode).unwrap (); assert_eq! (run_chunk (&["_exe_name"], chunk), vec! [Value::from (23i64)]); } @@ -176,7 +176,7 @@ fn floats () { let expected: Vec = expected; let upvalues = State::upvalues_from_args (arg.into_iter ().map (|s| s.to_string ())); let mut vm = State::new (chunk.clone (), upvalues); - let actual = vm.execute_chunk (&[]).unwrap (); + let actual = vm.execute ().unwrap (); assert_eq! (actual, expected); } @@ -185,8 +185,8 @@ fn floats () { #[test] fn fma () { let source = include_bytes! ("../test_vectors/fma.lua"); - let bytecode = &crate::loader::compile_bytecode (source.to_vec ()); - let chunk = crate::loader::parse_chunk_from_bytes (bytecode).unwrap (); + let bytecode = &crate::loader::compile_bytecode (source.to_vec ()).unwrap (); + let chunk = crate::loader::parse_chunk (bytecode).unwrap (); assert_eq! (chunk.blocks.len (), 5); assert_eq! (chunk.blocks [3].upvalues.len (), 2); @@ -194,12 +194,43 @@ fn fma () { let arg = vec! ["_exe_name"]; let upvalues = State::upvalues_from_args (arg.into_iter ().map (|s| s.to_string ())); let mut vm = State::new (chunk, upvalues); - let actual = vm.execute_chunk (&[]).unwrap (); + let actual = vm.execute ().unwrap (); let expected = vec! [Value::from (122)]; assert_eq! (actual, expected); } +#[test] +fn function_calls () { + let upvalues = crate::State::upvalues_from_args (vec! ["_exe_name".to_string ()].into_iter ()); + + let mut vm = crate::State::new (crate::Chunk::default (), upvalues); + + { + let bc = crate::compile_bytecode (b"print (x ())".to_vec ()).unwrap (); + let chunk = crate::parse_chunk (&bc).unwrap (); + + vm.set_chunk (chunk); + vm.execute ().unwrap (); + } + + { + let bc = crate::compile_bytecode (b"x = function () return 5 end".to_vec ()).unwrap (); + let chunk = crate::parse_chunk (&bc).unwrap (); + + vm.set_chunk (chunk); + vm.execute ().unwrap (); + } + + { + let bc = crate::compile_bytecode (b"print (x ())".to_vec ()).unwrap (); + let chunk = crate::parse_chunk (&bc).unwrap (); + + vm.set_chunk (chunk); + vm.execute ().unwrap (); + } +} + #[test] fn heap () { use std::{ @@ -265,7 +296,7 @@ fn is_93 () { assert_ne! (calculate_hash (&Value::from ("94")), calculate_hash (&Value::from ("93"))); assert_ne! (Value::Nil, Value::from ("93")); - let src = r#" + let src = br#" if arg [1] == "93" then print "it's 93" return 0 @@ -275,8 +306,8 @@ fn is_93 () { end "#; - let bc = loader::compile_bytecode (src.as_bytes ().to_vec ()); - let chunk = loader::parse_chunk_from_bytes (&bc).unwrap (); + let bc = loader::compile_bytecode (src.to_vec ()).unwrap (); + let chunk = loader::parse_chunk (&bc).unwrap (); assert_eq! (chunk.blocks [0].instructions [3], Inst::EqK (0, 1, false)); @@ -359,12 +390,12 @@ fn tables_2 () { fn tailcall () { use crate::instruction::Instruction; - let src = r#" + let src = br#" return tonumber ("5") "#; - let bc = loader::compile_bytecode (src.as_bytes ().to_vec ()); - let chunk = loader::parse_chunk_from_bytes (&bc).unwrap (); + let bc = loader::compile_bytecode (src.to_vec ()).unwrap (); + let chunk = loader::parse_chunk (&bc).unwrap (); assert_eq! (chunk.blocks [0].instructions [3], Instruction::TailCall (0, 2, 1, false)); diff --git a/lunar_wave_vm/tests/embedding.rs b/lunar_wave_vm/tests/embedding.rs index 0d77181..ba171bd 100644 --- a/lunar_wave_vm/tests/embedding.rs +++ b/lunar_wave_vm/tests/embedding.rs @@ -19,9 +19,8 @@ fn embedding () { 1 } - let bytecode = lwvm::compile_bytecode (src.to_vec ()); - let mut rdr = std::io::Cursor::new (bytecode); - let chunk = lwvm::parse_chunk (&mut rdr).unwrap (); + let bc = lwvm::compile_bytecode (src.to_vec ()).unwrap (); + let chunk = lwvm::parse_chunk (&bc).unwrap (); let host_lib = [ ("add", Value::RsFunc (host_add)), @@ -36,7 +35,7 @@ fn embedding () { ]; let mut vm = State::new (chunk, upvalues); - let output = vm.execute_chunk (&vec! []).unwrap (); + let output = vm.execute ().unwrap (); assert_eq! (output, vec! [Value::from (2019)]); }