Compare commits

..

No commits in common. "32ddedc066167b5299fd1c994559f226ff7a4d40" and "130330b688acb309216759962de51c4df22fccca" have entirely different histories.

6 changed files with 380 additions and 610 deletions

View File

@ -8,10 +8,6 @@ authors = ["ReactorScram"]
[dependencies] [dependencies]
lunar_wave_vm = { path = "../lunar_wave_vm" } lunar_wave_vm = { path = "../lunar_wave_vm" }
[profile.release]
codegen-units = 1
lto = "fat"
[target.x86_64-unknown-linux-gnu] [target.x86_64-unknown-linux-gnu]
linker = "/usr/bin/clang" linker = "/usr/bin/clang"
# Recommended for flamegraph # Recommended for flamegraph

View File

@ -1,7 +1,4 @@
use std::{ use std::io::Read;
io::Read,
rc::Rc,
};
use crate::{ use crate::{
instruction::Instruction as Inst, instruction::Instruction as Inst,
@ -104,68 +101,6 @@ fn i_sc (buf: [u8; 4]) -> Option <i8> {
i8::try_from (i32::try_from (c).ok ()? - 127).ok () i8::try_from (i32::try_from (c).ok ()? - 127).ok ()
} }
pub trait DecodeInstruction {
fn opcode (self) -> u8;
fn a (self) -> u8;
fn ax (self) -> u32;
fn b (self) -> u8;
fn bx (self) -> u32;
fn c (self) -> u8;
fn k (self) -> bool;
fn sb (self) -> i8;
fn sbx (self) -> i32;
fn sc (self) -> i8;
fn sj (self) -> i32;
}
impl DecodeInstruction for u32 {
#[inline(always)]
fn opcode (self) -> u8 {
((self >> 0) & 0x7f) as u8
}
fn a (self) -> u8 {
((self >> 7) & 0xff) as u8
}
fn ax (self) -> u32 {
self >> 7
}
fn b (self) -> u8 {
((self >> 16) & 0xff) as u8
}
fn bx (self) -> u32 {
(self >> 15) as u32
}
fn c (self) -> u8 {
(self >> 24) as u8
}
fn k (self) -> bool {
((self >> 15) & 0x1) == 1
}
fn sb (self) -> i8 {
((((self >> 16) & 0xff) as i16) - 127) as i8
}
fn sbx (self) -> i32 {
(self >> 15) as i32 - 65535
}
fn sc (self) -> i8 {
(((self >> 24) as i16) - 127) as i8
}
fn sj (self) -> i32 {
((self >> 7) as i32) - 0xffffff
}
}
pub fn parse_inst (buf: [u8; 4]) -> Option <Inst> pub fn parse_inst (buf: [u8; 4]) -> Option <Inst>
{ {
let opcode = buf [0] & 0x7f; let opcode = buf [0] & 0x7f;
@ -213,9 +148,9 @@ pub fn parse_inst (buf: [u8; 4]) -> Option <Inst>
0x33 => Inst::Not (a, b), 0x33 => Inst::Not (a, b),
0x34 => Inst::Len (a, b), 0x34 => Inst::Len (a, b),
0x35 => Inst::Concat (a, b), 0x35 => Inst::Concat (a, b),
0x38 => Inst::Jmp (s_j),
0x3c => Inst::EqK (a, b, k), 0x3c => Inst::EqK (a, b, k),
0x3d => Inst::EqI (a, i_sb (buf)?, k), 0x3d => Inst::EqI (a, i_sb (buf)?, k),
0x38 => Inst::Jmp (s_j),
0x42 => Inst::Test (a, k), 0x42 => Inst::Test (a, k),
0x44 => Inst::Call (a, b, c), 0x44 => Inst::Call (a, b, c),
0x45 => Inst::TailCall (a, b, c, k), 0x45 => Inst::TailCall (a, b, c, k),
@ -308,7 +243,7 @@ fn parse_i64 <R: Read> (rdr: &mut R) -> Option <i64> {
// code, but I don't like recursion in general, and I don't know // code, but I don't like recursion in general, and I don't know
// why PUC wrote it that way. // why PUC wrote it that way.
pub fn parse_block <R: Read> (rdr: &mut R, si: &mut Interner, blocks: &mut Vec <Rc <Block>>) pub fn parse_block <R: Read> (rdr: &mut R, si: &mut Interner, blocks: &mut Vec <Block>)
-> Option <()> -> Option <()>
{ {
// Ignore things I haven't implemented yet // Ignore things I haven't implemented yet
@ -326,11 +261,9 @@ pub fn parse_block <R: Read> (rdr: &mut R, si: &mut Interner, blocks: &mut Vec <
for _ in 0..inst_count { for _ in 0..inst_count {
let mut buf = [0u8; 4]; let mut buf = [0u8; 4];
rdr.read_exact (&mut buf).ok ().unwrap (); rdr.read_exact (&mut buf).ok ().unwrap ();
instructions.push (u32::from_le_bytes (buf)); instructions.push (parse_inst (buf).expect (&format! ("{buf:?}")));
} }
let instructions = Rc::from (instructions);
let constant_count = parse_int (rdr).unwrap (); let constant_count = parse_int (rdr).unwrap ();
let mut constants = Vec::with_capacity (constant_count as usize); let mut constants = Vec::with_capacity (constant_count as usize);
@ -373,7 +306,7 @@ pub fn parse_block <R: Read> (rdr: &mut R, si: &mut Interner, blocks: &mut Vec <
constants, constants,
instructions, instructions,
upvalues, upvalues,
}.into ()); });
// Recursion // Recursion

File diff suppressed because it is too large Load Diff

View File

@ -1,14 +1,8 @@
use std::{ use std::hash::Hash;
hash::Hash,
rc::Rc,
};
use crate::{ use crate::{
instruction::Instruction as Inst, instruction::Instruction as Inst,
loader::{ loader,
self,
DecodeInstruction,
},
state::{ state::{
Block, Block,
Chunk, Chunk,
@ -45,7 +39,8 @@ fn run_bytecode (vm: &mut State, args: &[&str], bc: &[u8]) -> Vec <Value> {
/// Takes arguments and Lua source code, /// Takes arguments and Lua source code,
/// invokes `luac` to compile it to bytecode, /// invokes `luac` to compile it to bytecode,
/// runs it, and returns the output /// runs it,
/// and returns the output
fn run_source (vm: &mut State, args: &[&str], s: &str) -> Vec <Value> { fn run_source (vm: &mut State, args: &[&str], s: &str) -> Vec <Value> {
let bc = loader::compile_bytecode (s.as_bytes ().to_vec ()).unwrap (); let bc = loader::compile_bytecode (s.as_bytes ().to_vec ()).unwrap ();
@ -69,7 +64,7 @@ fn bools () {
*/ */
let mut si = Interner::default (); let mut si = Interner::default ();
/*
let chunk = Chunk { let chunk = Chunk {
blocks: vec! [ blocks: vec! [
Block { Block {
@ -102,7 +97,7 @@ fn bools () {
si.to_value ("print"), si.to_value ("print"),
], ],
upvalues: vec! [], upvalues: vec! [],
}.into (), },
Block { Block {
instructions: vec! [ instructions: vec! [
Inst::Test (0, false), Inst::Test (0, false),
@ -116,7 +111,7 @@ fn bools () {
], ],
constants: vec! [], constants: vec! [],
upvalues: vec! [], upvalues: vec! [],
}.into (), },
], ],
}; };
@ -131,7 +126,6 @@ fn bools () {
let actual = run_chunk (&mut vm, &arg, chunk.clone ()); let actual = run_chunk (&mut vm, &arg, chunk.clone ());
assert_eq! (actual, expected); assert_eq! (actual, expected);
} }
*/
} }
#[test] #[test]
@ -158,7 +152,7 @@ fn floats () {
*/ */
let mut si = Interner::default (); let mut si = Interner::default ();
/*
let block = Block { let block = Block {
instructions: vec! [ instructions: vec! [
Inst::VarArgPrep (0), Inst::VarArgPrep (0),
@ -179,7 +173,7 @@ fn floats () {
upvalues: vec! [], upvalues: vec! [],
}; };
let chunk = Chunk { let chunk = Chunk {
blocks: vec! [block.into ()], blocks: vec! [block],
}; };
let mut vm = crate::State::new_with_args (Chunk::default (), si, vec! [].into_iter()); let mut vm = crate::State::new_with_args (Chunk::default (), si, vec! [].into_iter());
@ -193,7 +187,6 @@ fn floats () {
assert_eq! (actual, expected); assert_eq! (actual, expected);
} }
*/
} }
#[test] #[test]
@ -202,49 +195,10 @@ fn fma () {
let mut si = Interner::default (); let mut si = Interner::default ();
let bytecode = &crate::loader::compile_bytecode (source.to_vec ()).unwrap (); let bytecode = &crate::loader::compile_bytecode (source.to_vec ()).unwrap ();
let chunk = crate::loader::parse_chunk (bytecode, &mut si).unwrap (); let chunk = crate::loader::parse_chunk (bytecode, &mut si).unwrap ();
assert_eq! (chunk.blocks.len (), 5); assert_eq! (chunk.blocks.len (), 5);
assert_eq! (chunk.blocks [3].upvalues.len (), 2); assert_eq! (chunk.blocks [3].upvalues.len (), 2);
let i = chunk.blocks [1].instructions [0];
assert_eq! (i.opcode (), 0x22);
assert_eq! (i.a (), 2);
assert_eq! (i.b (), 0);
assert_eq! (i.c (), 1);
let i = chunk.blocks [1].instructions [1];
assert_eq! (i.opcode (), 0x2e);
assert_eq! (i.a (), 0);
assert_eq! (i.b (), 1);
assert_eq! (i.c (), 6);
let i = chunk.blocks [2].instructions [0];
assert_eq! (i.opcode (), 0x24);
assert_eq! (i.a (), 2);
assert_eq! (i.b (), 0);
assert_eq! (i.c (), 1);
let i = chunk.blocks [2].instructions [1];
assert_eq! (i.opcode (), 0x2e);
assert_eq! (i.a (), 0);
assert_eq! (i.b (), 1);
assert_eq! (i.c (), 8);
let i = chunk.blocks [3].instructions [2];
assert_eq! (i.opcode (), 0x00);
assert_eq! (i.a (), 5);
assert_eq! (i.b (), 0);
let i = chunk.blocks [3].instructions [4];
assert_eq! (i.opcode (), 0x44);
assert_eq! (i.a (), 4);
assert_eq! (i.b (), 3);
assert_eq! (i.c (), 2);
let i = chunk.blocks [4].instructions [1];
assert_eq! (i.opcode (), 0x01);
assert_eq! (i.a (), 1);
assert_eq! (i.sbx (), 10);
let mut vm = crate::State::new_with_args (chunk, si, vec! ["_exe_name".to_string ()].into_iter ()); let mut vm = crate::State::new_with_args (chunk, si, vec! ["_exe_name".to_string ()].into_iter ());
let actual = vm.execute ().unwrap (); let actual = vm.execute ().unwrap ();
@ -365,15 +319,7 @@ fn is_93 () {
let bc = loader::compile_bytecode (src.to_vec ()).unwrap (); let bc = loader::compile_bytecode (src.to_vec ()).unwrap ();
let chunk = loader::parse_chunk (&bc, &mut si).unwrap (); let chunk = loader::parse_chunk (&bc, &mut si).unwrap ();
let i = chunk.blocks [0].instructions [3]; assert_eq! (chunk.blocks [0].instructions [3], Inst::EqK (0, 1, false));
assert_eq! (i.opcode (), 0x3c);
assert_eq! (i.a (), 0);
assert_eq! (i.b (), 1);
assert_eq! (i.k (), false);
let i = chunk.blocks [0].instructions [4];
assert_eq! (i.opcode (), 0x38);
assert_eq! (i.sj (), 6);
let mut vm = crate::State::new_with_args (Chunk::default (), si, vec! [].into_iter()); let mut vm = crate::State::new_with_args (Chunk::default (), si, vec! [].into_iter());
@ -458,6 +404,8 @@ fn tables_2 () {
#[test] #[test]
fn tailcall () { fn tailcall () {
use crate::instruction::Instruction;
let mut si = Interner::default (); let mut si = Interner::default ();
let src = br#" let src = br#"
@ -467,8 +415,7 @@ fn tailcall () {
let bc = loader::compile_bytecode (src.to_vec ()).unwrap (); let bc = loader::compile_bytecode (src.to_vec ()).unwrap ();
let chunk = loader::parse_chunk (&bc, &mut si).unwrap (); let chunk = loader::parse_chunk (&bc, &mut si).unwrap ();
// assert_eq! (chunk.blocks [0].instructions [3].opcode (), Instruction::TailCall (0, 2, 1, false)); assert_eq! (chunk.blocks [0].instructions [3], Instruction::TailCall (0, 2, 1, false));
assert_eq! (chunk.blocks [0].instructions [3].opcode (), 0x45);
let mut vm = crate::State::new_with_args (Chunk::default (), si, vec! [].into_iter()); let mut vm = crate::State::new_with_args (Chunk::default (), si, vec! [].into_iter());
@ -479,7 +426,7 @@ fn tailcall () {
} }
#[test] #[test]
fn rust_stuff () { fn value_size () {
// Per https://www.lua.org/doc/jucs05.pdf, // Per https://www.lua.org/doc/jucs05.pdf,
// "The Implementation of Lua 5.0", // "The Implementation of Lua 5.0",
// //
@ -533,9 +480,4 @@ fn rust_stuff () {
let expected = 8; let expected = 8;
assert! (sz == expected, "{sz} != {expected}"); assert! (sz == expected, "{sz} != {expected}");
} }
assert_eq! (size_of::<crate::instruction::Instruction> (), 8);
let x = vec! [100, 101, 102, 103];
let x: Rc <[u32]> = Rc::from (x);
} }

View File

@ -247,7 +247,6 @@ impl Value {
pub struct Table { pub struct Table {
array: Vec <Value>, array: Vec <Value>,
hash: HashMap <Value, Value>, hash: HashMap <Value, Value>,
strings: Vec <(InternedString, Value)>,
map: BTreeMap <InternedString, Value>, map: BTreeMap <InternedString, Value>,
} }
@ -263,7 +262,7 @@ impl Table {
fn get_inner (&self, key: &Value) -> &Value { fn get_inner (&self, key: &Value) -> &Value {
match key { match key {
Value::Nil => &NIL, Value::Nil => &NIL,
Value::String (x) => self.get_str (*x), Value::String (x) => self.map.get (x).unwrap_or (&NIL),
Value::Integer (x) => self.array.get (usize::try_from (*x).unwrap ()).unwrap_or (&NIL), Value::Integer (x) => self.array.get (usize::try_from (*x).unwrap ()).unwrap_or (&NIL),
x => self.hash.get (x).unwrap_or (&NIL), x => self.hash.get (x).unwrap_or (&NIL),
} }
@ -278,7 +277,10 @@ impl Table {
} }
pub fn get_str (&self, key: InternedString) -> &Value { pub fn get_str (&self, key: InternedString) -> &Value {
self.strings.iter ().find (|(hay, _)| hay == &key).map (|(_, v)| v).unwrap_or (&NIL) match self.map.get (&key) {
None => &NIL,
Some (x) => x,
}
} }
/// Insert value at arbitrary key /// Insert value at arbitrary key
@ -291,7 +293,9 @@ impl Table {
match a.into () { match a.into () {
Value::Integer (x) => self.insert_int (x, b), Value::Integer (x) => self.insert_int (x, b),
Value::Nil => (), Value::Nil => (),
Value::String (x) => self.insert_str (x, b.into ()), Value::String (x) => {
self.map.insert (x, b.into ());
},
x => { x => {
self.hash.insert (x, b.into ()); self.hash.insert (x, b.into ());
}, },
@ -308,11 +312,7 @@ impl Table {
} }
pub fn insert_str (&mut self, key: InternedString, v: Value) { pub fn insert_str (&mut self, key: InternedString, v: Value) {
match self.strings.iter_mut ().find (|(hay, _)| hay == &key).map (|(_, v)| v) self.map.insert (key, v);
{
None => self.strings.push ((key, v)),
Some (x) => *x = v,
}
} }
pub fn length (&self) -> i64 { pub fn length (&self) -> i64 {

View File

@ -1,67 +0,0 @@
# Optimizations
Making notes on optimizations I've made and plan to make, so I can remember which ones paid off.
## String interning
Worked well. PUC Lua does this. I think it's faster not because it avoids
hashing or comparing strings, but because it avoids the pointer deref.
I still ended up hashing ints after this change.
## Linear search
The n_body benchmark uses tables with about 7 slots in its hot loop.
The hashing overhead of HashMap for i64 seems pretty bad for this.
BTreeMap was faster, but not fast enough.
I switched to just an unsorted Vec and linear search, and it's the
fastest by a small margin.
I don't think PUC Lua does this, but PUC Lua might have a faster, less
secure hash algorithm than Rust's default.
Flamegraph reveals we still spend a lot of time in linear searching tables.
## Lazy instruction decoding
I think this actually slowed it down. PUC Lua keeps instructions in their
encoded u32 form and decodes them lazily inside the interpreter's main loop.
I did this mostly to match PUC Lua, although I didn't think it would work. My enum for decoded instructions is only 64 bits, and I didn't think the extra bit fiddling was cheap enough.
Maybe if I tweaked it, it would pay off. It just really doesn't look like it should work.
## Caching the current block
I think this one paid off. The idea was to avoid some `chunk.blocks [i]` derefs and bound checks in the inner loop.
I used an `Rc` to make it work. PUC Lua probably just keeps a raw pointer to the block.
## Caching the current instruction list
I think this one paid off more. Instead of caching the current block I just cached its instructions, since the inner loop doesn't use constants or upvalues much, but every step requires access to the instruction list.
Using `Rc <[u32]>` was fun, too. I never stored a slice directly in a smart pointer before.
## Fat LTO and codegen-units = 1
Did absolutely nothing. I couldn't outsmart LLVM.
## Remove RefCell
(upcoming)
I think the `borrow` and `borrow_mut` calls slow down OP_GETFIELD and OP_SETFIELD. I can remove them if I store all the tables in State directly, replacing `Rc <RefCell <Table>>` with my own ref counting. This might
remove a layer of indirection, too.
It's a big change, but I'd need _something_ like this for adding a GC anyway, and sometimes big changes have paid off.
## Iterating over instruction list
(upcoming)
I noticed PUC Lua doesn't store a program counter, it stores a `u32 *`, a pointer to the next instruction itself. This might save, like, 1 single cycle or something, I can't believe it does anything, but it could. Because it saves you that "Look at the instruction list, multiply the index by 4, add it to the base pointer" step.
Maybe the real saving is that it saves a little bit of cache space by forgetting the base pointer?
Storing an iterator sounds like a big fight with the borrow checker. I might want to prototype it outside the interpreter first. But if it works, it might compile down to what PUC Lua does in C. Plus a bounds check.