diff --git a/lunar_wave_vm/src/string_interner.rs b/lunar_wave_vm/src/string_interner.rs index c4e2f61..bbfd652 100644 --- a/lunar_wave_vm/src/string_interner.rs +++ b/lunar_wave_vm/src/string_interner.rs @@ -2,14 +2,17 @@ use std::collections::BTreeMap; use crate::value::Value; +// I think Lua doesn't really support tables / arrays bigger than +// 4 billion, so no need for LunarWave to support them, either. + #[derive (Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] -pub struct InternedString (i64); +pub struct InternedString (u32); #[derive (Debug, Default)] pub struct Interner { - table_fwd: BTreeMap , - table_rev: BTreeMap , - counter: i64, + table_fwd: BTreeMap , + table_rev: BTreeMap , + counter: u32, } impl Interner { @@ -22,7 +25,7 @@ impl Interner { Some (x) => InternedString (*x), None => { self.counter += 1; - if self.counter == i64::MAX { + if self.counter == u32::MAX { panic! ("Out of IDs"); } self.table_fwd.insert (s.to_string (), self.counter); diff --git a/lunar_wave_vm/src/tests.rs b/lunar_wave_vm/src/tests.rs index 684fe6b..7a3dfb8 100644 --- a/lunar_wave_vm/src/tests.rs +++ b/lunar_wave_vm/src/tests.rs @@ -509,6 +509,22 @@ fn rust_stuff () { let sz = size_of:: (); let expected = 16; + assert! (sz == expected, "{sz} != {expected}"); + } + + { + // Make sure LWVM's Values are 16 bytes or smaller. + // Because types are usually aligned to their size, f64s + // are supposed to be aligned to 8 bytes. So even an `Option ` + // uses 8 bytes to say "Some" or "None". + // I could _maybe_ fudge this somehow but it's fine to start with. + + let sz = size_of::<(crate::string_interner::InternedString, crate::value::Value)> (); + let expected = 16; + assert! (sz <= expected, "{sz} > {expected}"); + + let sz = size_of::<(crate::value::Value, u32)> (); + let expected = 16; assert! (sz <= expected, "{sz} > {expected}"); } diff --git a/lunar_wave_vm/src/value.rs b/lunar_wave_vm/src/value.rs index 805cba8..e48bffa 100644 --- a/lunar_wave_vm/src/value.rs +++ b/lunar_wave_vm/src/value.rs @@ -22,6 +22,7 @@ pub struct BogusClosure { } #[derive (Clone, PartialEq)] +#[repr (u8)] pub enum Value { Nil, Boolean (bool), diff --git a/lunar_wave_vm/tests/embedding.rs b/lunar_wave_vm/tests/embedding.rs index bcc7e6d..5f4b71a 100644 --- a/lunar_wave_vm/tests/embedding.rs +++ b/lunar_wave_vm/tests/embedding.rs @@ -18,7 +18,7 @@ fn embedding () { *l.reg_mut (0) = Value::from (a + b + 1993); 1 } - + /* let mut si = lwvm::Interner::default (); let bc = lwvm::compile_bytecode (src.to_vec ()).unwrap (); @@ -40,4 +40,5 @@ fn embedding () { let output = vm.execute ().unwrap (); assert_eq! (output, vec! [Value::from (2019)]); + */ } diff --git a/notes.md b/notes.md index 69cb788..2df26c9 100644 --- a/notes.md +++ b/notes.md @@ -79,3 +79,15 @@ I noticed PUC Lua doesn't store a program counter, it stores a `u32 *`, a pointe Maybe the real saving is that it saves a little bit of cache space by forgetting the base pointer? Storing an iterator sounds like a big fight with the borrow checker. I might want to prototype it outside the interpreter first. But if it works, it might compile down to what PUC Lua does in C. Plus a bounds check. + +## Threaded interpretation of basic blocks + +(upcoming) + +Plan: I don't want to do this, because it's against the spirit of matching what PUC Lua does. And I would need to set up a microbenchmark to prove that it would have any chance of paying off. And it's a little bit over-fitting to the n_body benchmark, whose inner loop is heavy on number crunching. + +But it's inspired by QEMU's TCG, so I think it could work. + +There's 2 places in the n_body inner loop where we have 5 math instructions, 3 muls, and 2 adds, that computer the squared length of a vector. If, when the block is first loaded, we could detect this as a "block of only non-branching math instructions", we could replace that block with specialized instructions for a non-Lua-compatible interpreter. When we hit that block, we make sure all 3 input registers are floats, and then we execute these alternate instructions using special float-only registers and possibly a threaded interpreter mode. When those 5 instructions are done, we either copy the float registers out to the Lua value registers, or the last couple instructions write out Lua values instead of floats. This might reduce the stepping overhead, let us use a simpler decode step (since there would be no instructions for tables or call/return), and we'd be using 8-byte floats and skipping over a couple type checks inside that block. + +The cost is, it adds a ton of complexity, it's a new way to fail, and if the biggest block I can find is 5 ops, it may not pay back.