Compare commits
12 Commits
130330b688
...
32ddedc066
Author | SHA1 | Date |
---|---|---|
_ | 32ddedc066 | |
_ | 43a3294c57 | |
_ | 47d5fe3df1 | |
_ | 3698feeb90 | |
_ | 55ea0c233e | |
_ | 7878efc235 | |
_ | 4e23f51634 | |
_ | b35dc346e8 | |
_ | 52df317326 | |
_ | 11fd5b6cbc | |
_ | 66fe54adef | |
_ | e11026a553 |
|
@ -8,6 +8,10 @@ authors = ["ReactorScram"]
|
|||
[dependencies]
|
||||
lunar_wave_vm = { path = "../lunar_wave_vm" }
|
||||
|
||||
[profile.release]
|
||||
codegen-units = 1
|
||||
lto = "fat"
|
||||
|
||||
[target.x86_64-unknown-linux-gnu]
|
||||
linker = "/usr/bin/clang"
|
||||
# Recommended for flamegraph
|
||||
|
|
|
@ -1,4 +1,7 @@
|
|||
use std::io::Read;
|
||||
use std::{
|
||||
io::Read,
|
||||
rc::Rc,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
instruction::Instruction as Inst,
|
||||
|
@ -101,6 +104,68 @@ fn i_sc (buf: [u8; 4]) -> Option <i8> {
|
|||
i8::try_from (i32::try_from (c).ok ()? - 127).ok ()
|
||||
}
|
||||
|
||||
pub trait DecodeInstruction {
|
||||
fn opcode (self) -> u8;
|
||||
|
||||
fn a (self) -> u8;
|
||||
fn ax (self) -> u32;
|
||||
fn b (self) -> u8;
|
||||
fn bx (self) -> u32;
|
||||
fn c (self) -> u8;
|
||||
fn k (self) -> bool;
|
||||
fn sb (self) -> i8;
|
||||
fn sbx (self) -> i32;
|
||||
fn sc (self) -> i8;
|
||||
fn sj (self) -> i32;
|
||||
}
|
||||
|
||||
impl DecodeInstruction for u32 {
|
||||
#[inline(always)]
|
||||
fn opcode (self) -> u8 {
|
||||
((self >> 0) & 0x7f) as u8
|
||||
}
|
||||
|
||||
fn a (self) -> u8 {
|
||||
((self >> 7) & 0xff) as u8
|
||||
}
|
||||
|
||||
fn ax (self) -> u32 {
|
||||
self >> 7
|
||||
}
|
||||
|
||||
fn b (self) -> u8 {
|
||||
((self >> 16) & 0xff) as u8
|
||||
}
|
||||
|
||||
fn bx (self) -> u32 {
|
||||
(self >> 15) as u32
|
||||
}
|
||||
|
||||
fn c (self) -> u8 {
|
||||
(self >> 24) as u8
|
||||
}
|
||||
|
||||
fn k (self) -> bool {
|
||||
((self >> 15) & 0x1) == 1
|
||||
}
|
||||
|
||||
fn sb (self) -> i8 {
|
||||
((((self >> 16) & 0xff) as i16) - 127) as i8
|
||||
}
|
||||
|
||||
fn sbx (self) -> i32 {
|
||||
(self >> 15) as i32 - 65535
|
||||
}
|
||||
|
||||
fn sc (self) -> i8 {
|
||||
(((self >> 24) as i16) - 127) as i8
|
||||
}
|
||||
|
||||
fn sj (self) -> i32 {
|
||||
((self >> 7) as i32) - 0xffffff
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_inst (buf: [u8; 4]) -> Option <Inst>
|
||||
{
|
||||
let opcode = buf [0] & 0x7f;
|
||||
|
@ -148,9 +213,9 @@ pub fn parse_inst (buf: [u8; 4]) -> Option <Inst>
|
|||
0x33 => Inst::Not (a, b),
|
||||
0x34 => Inst::Len (a, b),
|
||||
0x35 => Inst::Concat (a, b),
|
||||
0x38 => Inst::Jmp (s_j),
|
||||
0x3c => Inst::EqK (a, b, k),
|
||||
0x3d => Inst::EqI (a, i_sb (buf)?, k),
|
||||
0x38 => Inst::Jmp (s_j),
|
||||
0x42 => Inst::Test (a, k),
|
||||
0x44 => Inst::Call (a, b, c),
|
||||
0x45 => Inst::TailCall (a, b, c, k),
|
||||
|
@ -243,7 +308,7 @@ fn parse_i64 <R: Read> (rdr: &mut R) -> Option <i64> {
|
|||
// code, but I don't like recursion in general, and I don't know
|
||||
// why PUC wrote it that way.
|
||||
|
||||
pub fn parse_block <R: Read> (rdr: &mut R, si: &mut Interner, blocks: &mut Vec <Block>)
|
||||
pub fn parse_block <R: Read> (rdr: &mut R, si: &mut Interner, blocks: &mut Vec <Rc <Block>>)
|
||||
-> Option <()>
|
||||
{
|
||||
// Ignore things I haven't implemented yet
|
||||
|
@ -261,9 +326,11 @@ pub fn parse_block <R: Read> (rdr: &mut R, si: &mut Interner, blocks: &mut Vec <
|
|||
for _ in 0..inst_count {
|
||||
let mut buf = [0u8; 4];
|
||||
rdr.read_exact (&mut buf).ok ().unwrap ();
|
||||
instructions.push (parse_inst (buf).expect (&format! ("{buf:?}")));
|
||||
instructions.push (u32::from_le_bytes (buf));
|
||||
}
|
||||
|
||||
let instructions = Rc::from (instructions);
|
||||
|
||||
let constant_count = parse_int (rdr).unwrap ();
|
||||
|
||||
let mut constants = Vec::with_capacity (constant_count as usize);
|
||||
|
@ -306,7 +373,7 @@ pub fn parse_block <R: Read> (rdr: &mut R, si: &mut Interner, blocks: &mut Vec <
|
|||
constants,
|
||||
instructions,
|
||||
upvalues,
|
||||
});
|
||||
}.into ());
|
||||
|
||||
// Recursion
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,8 +1,14 @@
|
|||
use std::hash::Hash;
|
||||
use std::{
|
||||
hash::Hash,
|
||||
rc::Rc,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
instruction::Instruction as Inst,
|
||||
loader,
|
||||
loader::{
|
||||
self,
|
||||
DecodeInstruction,
|
||||
},
|
||||
state::{
|
||||
Block,
|
||||
Chunk,
|
||||
|
@ -39,8 +45,7 @@ fn run_bytecode (vm: &mut State, args: &[&str], bc: &[u8]) -> Vec <Value> {
|
|||
|
||||
/// Takes arguments and Lua source code,
|
||||
/// invokes `luac` to compile it to bytecode,
|
||||
/// runs it,
|
||||
/// and returns the output
|
||||
/// runs it, and returns the output
|
||||
|
||||
fn run_source (vm: &mut State, args: &[&str], s: &str) -> Vec <Value> {
|
||||
let bc = loader::compile_bytecode (s.as_bytes ().to_vec ()).unwrap ();
|
||||
|
@ -64,7 +69,7 @@ fn bools () {
|
|||
*/
|
||||
|
||||
let mut si = Interner::default ();
|
||||
|
||||
/*
|
||||
let chunk = Chunk {
|
||||
blocks: vec! [
|
||||
Block {
|
||||
|
@ -97,7 +102,7 @@ fn bools () {
|
|||
si.to_value ("print"),
|
||||
],
|
||||
upvalues: vec! [],
|
||||
},
|
||||
}.into (),
|
||||
Block {
|
||||
instructions: vec! [
|
||||
Inst::Test (0, false),
|
||||
|
@ -111,7 +116,7 @@ fn bools () {
|
|||
],
|
||||
constants: vec! [],
|
||||
upvalues: vec! [],
|
||||
},
|
||||
}.into (),
|
||||
],
|
||||
};
|
||||
|
||||
|
@ -126,6 +131,7 @@ fn bools () {
|
|||
let actual = run_chunk (&mut vm, &arg, chunk.clone ());
|
||||
assert_eq! (actual, expected);
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -152,7 +158,7 @@ fn floats () {
|
|||
*/
|
||||
|
||||
let mut si = Interner::default ();
|
||||
|
||||
/*
|
||||
let block = Block {
|
||||
instructions: vec! [
|
||||
Inst::VarArgPrep (0),
|
||||
|
@ -173,7 +179,7 @@ fn floats () {
|
|||
upvalues: vec! [],
|
||||
};
|
||||
let chunk = Chunk {
|
||||
blocks: vec! [block],
|
||||
blocks: vec! [block.into ()],
|
||||
};
|
||||
|
||||
let mut vm = crate::State::new_with_args (Chunk::default (), si, vec! [].into_iter());
|
||||
|
@ -187,6 +193,7 @@ fn floats () {
|
|||
|
||||
assert_eq! (actual, expected);
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -195,10 +202,49 @@ fn fma () {
|
|||
let mut si = Interner::default ();
|
||||
let bytecode = &crate::loader::compile_bytecode (source.to_vec ()).unwrap ();
|
||||
let chunk = crate::loader::parse_chunk (bytecode, &mut si).unwrap ();
|
||||
assert_eq! (chunk.blocks.len (), 5);
|
||||
|
||||
assert_eq! (chunk.blocks.len (), 5);
|
||||
assert_eq! (chunk.blocks [3].upvalues.len (), 2);
|
||||
|
||||
let i = chunk.blocks [1].instructions [0];
|
||||
assert_eq! (i.opcode (), 0x22);
|
||||
assert_eq! (i.a (), 2);
|
||||
assert_eq! (i.b (), 0);
|
||||
assert_eq! (i.c (), 1);
|
||||
|
||||
let i = chunk.blocks [1].instructions [1];
|
||||
assert_eq! (i.opcode (), 0x2e);
|
||||
assert_eq! (i.a (), 0);
|
||||
assert_eq! (i.b (), 1);
|
||||
assert_eq! (i.c (), 6);
|
||||
|
||||
let i = chunk.blocks [2].instructions [0];
|
||||
assert_eq! (i.opcode (), 0x24);
|
||||
assert_eq! (i.a (), 2);
|
||||
assert_eq! (i.b (), 0);
|
||||
assert_eq! (i.c (), 1);
|
||||
|
||||
let i = chunk.blocks [2].instructions [1];
|
||||
assert_eq! (i.opcode (), 0x2e);
|
||||
assert_eq! (i.a (), 0);
|
||||
assert_eq! (i.b (), 1);
|
||||
assert_eq! (i.c (), 8);
|
||||
|
||||
let i = chunk.blocks [3].instructions [2];
|
||||
assert_eq! (i.opcode (), 0x00);
|
||||
assert_eq! (i.a (), 5);
|
||||
assert_eq! (i.b (), 0);
|
||||
|
||||
let i = chunk.blocks [3].instructions [4];
|
||||
assert_eq! (i.opcode (), 0x44);
|
||||
assert_eq! (i.a (), 4);
|
||||
assert_eq! (i.b (), 3);
|
||||
assert_eq! (i.c (), 2);
|
||||
|
||||
let i = chunk.blocks [4].instructions [1];
|
||||
assert_eq! (i.opcode (), 0x01);
|
||||
assert_eq! (i.a (), 1);
|
||||
assert_eq! (i.sbx (), 10);
|
||||
|
||||
let mut vm = crate::State::new_with_args (chunk, si, vec! ["_exe_name".to_string ()].into_iter ());
|
||||
let actual = vm.execute ().unwrap ();
|
||||
|
@ -319,7 +365,15 @@ fn is_93 () {
|
|||
let bc = loader::compile_bytecode (src.to_vec ()).unwrap ();
|
||||
let chunk = loader::parse_chunk (&bc, &mut si).unwrap ();
|
||||
|
||||
assert_eq! (chunk.blocks [0].instructions [3], Inst::EqK (0, 1, false));
|
||||
let i = chunk.blocks [0].instructions [3];
|
||||
assert_eq! (i.opcode (), 0x3c);
|
||||
assert_eq! (i.a (), 0);
|
||||
assert_eq! (i.b (), 1);
|
||||
assert_eq! (i.k (), false);
|
||||
|
||||
let i = chunk.blocks [0].instructions [4];
|
||||
assert_eq! (i.opcode (), 0x38);
|
||||
assert_eq! (i.sj (), 6);
|
||||
|
||||
let mut vm = crate::State::new_with_args (Chunk::default (), si, vec! [].into_iter());
|
||||
|
||||
|
@ -404,8 +458,6 @@ fn tables_2 () {
|
|||
|
||||
#[test]
|
||||
fn tailcall () {
|
||||
use crate::instruction::Instruction;
|
||||
|
||||
let mut si = Interner::default ();
|
||||
|
||||
let src = br#"
|
||||
|
@ -415,7 +467,8 @@ fn tailcall () {
|
|||
let bc = loader::compile_bytecode (src.to_vec ()).unwrap ();
|
||||
let chunk = loader::parse_chunk (&bc, &mut si).unwrap ();
|
||||
|
||||
assert_eq! (chunk.blocks [0].instructions [3], Instruction::TailCall (0, 2, 1, false));
|
||||
// assert_eq! (chunk.blocks [0].instructions [3].opcode (), Instruction::TailCall (0, 2, 1, false));
|
||||
assert_eq! (chunk.blocks [0].instructions [3].opcode (), 0x45);
|
||||
|
||||
let mut vm = crate::State::new_with_args (Chunk::default (), si, vec! [].into_iter());
|
||||
|
||||
|
@ -426,7 +479,7 @@ fn tailcall () {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn value_size () {
|
||||
fn rust_stuff () {
|
||||
// Per https://www.lua.org/doc/jucs05.pdf,
|
||||
// "The Implementation of Lua 5.0",
|
||||
//
|
||||
|
@ -480,4 +533,9 @@ fn value_size () {
|
|||
let expected = 8;
|
||||
assert! (sz == expected, "{sz} != {expected}");
|
||||
}
|
||||
|
||||
assert_eq! (size_of::<crate::instruction::Instruction> (), 8);
|
||||
|
||||
let x = vec! [100, 101, 102, 103];
|
||||
let x: Rc <[u32]> = Rc::from (x);
|
||||
}
|
||||
|
|
|
@ -247,6 +247,7 @@ impl Value {
|
|||
pub struct Table {
|
||||
array: Vec <Value>,
|
||||
hash: HashMap <Value, Value>,
|
||||
strings: Vec <(InternedString, Value)>,
|
||||
map: BTreeMap <InternedString, Value>,
|
||||
}
|
||||
|
||||
|
@ -262,7 +263,7 @@ impl Table {
|
|||
fn get_inner (&self, key: &Value) -> &Value {
|
||||
match key {
|
||||
Value::Nil => &NIL,
|
||||
Value::String (x) => self.map.get (x).unwrap_or (&NIL),
|
||||
Value::String (x) => self.get_str (*x),
|
||||
Value::Integer (x) => self.array.get (usize::try_from (*x).unwrap ()).unwrap_or (&NIL),
|
||||
x => self.hash.get (x).unwrap_or (&NIL),
|
||||
}
|
||||
|
@ -277,10 +278,7 @@ impl Table {
|
|||
}
|
||||
|
||||
pub fn get_str (&self, key: InternedString) -> &Value {
|
||||
match self.map.get (&key) {
|
||||
None => &NIL,
|
||||
Some (x) => x,
|
||||
}
|
||||
self.strings.iter ().find (|(hay, _)| hay == &key).map (|(_, v)| v).unwrap_or (&NIL)
|
||||
}
|
||||
|
||||
/// Insert value at arbitrary key
|
||||
|
@ -293,9 +291,7 @@ impl Table {
|
|||
match a.into () {
|
||||
Value::Integer (x) => self.insert_int (x, b),
|
||||
Value::Nil => (),
|
||||
Value::String (x) => {
|
||||
self.map.insert (x, b.into ());
|
||||
},
|
||||
Value::String (x) => self.insert_str (x, b.into ()),
|
||||
x => {
|
||||
self.hash.insert (x, b.into ());
|
||||
},
|
||||
|
@ -312,7 +308,11 @@ impl Table {
|
|||
}
|
||||
|
||||
pub fn insert_str (&mut self, key: InternedString, v: Value) {
|
||||
self.map.insert (key, v);
|
||||
match self.strings.iter_mut ().find (|(hay, _)| hay == &key).map (|(_, v)| v)
|
||||
{
|
||||
None => self.strings.push ((key, v)),
|
||||
Some (x) => *x = v,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn length (&self) -> i64 {
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
# Optimizations
|
||||
|
||||
Making notes on optimizations I've made and plan to make, so I can remember which ones paid off.
|
||||
|
||||
## String interning
|
||||
|
||||
Worked well. PUC Lua does this. I think it's faster not because it avoids
|
||||
hashing or comparing strings, but because it avoids the pointer deref.
|
||||
I still ended up hashing ints after this change.
|
||||
|
||||
## Linear search
|
||||
|
||||
The n_body benchmark uses tables with about 7 slots in its hot loop.
|
||||
The hashing overhead of HashMap for i64 seems pretty bad for this.
|
||||
BTreeMap was faster, but not fast enough.
|
||||
|
||||
I switched to just an unsorted Vec and linear search, and it's the
|
||||
fastest by a small margin.
|
||||
|
||||
I don't think PUC Lua does this, but PUC Lua might have a faster, less
|
||||
secure hash algorithm than Rust's default.
|
||||
|
||||
Flamegraph reveals we still spend a lot of time in linear searching tables.
|
||||
|
||||
## Lazy instruction decoding
|
||||
|
||||
I think this actually slowed it down. PUC Lua keeps instructions in their
|
||||
encoded u32 form and decodes them lazily inside the interpreter's main loop.
|
||||
|
||||
I did this mostly to match PUC Lua, although I didn't think it would work. My enum for decoded instructions is only 64 bits, and I didn't think the extra bit fiddling was cheap enough.
|
||||
|
||||
Maybe if I tweaked it, it would pay off. It just really doesn't look like it should work.
|
||||
|
||||
## Caching the current block
|
||||
|
||||
I think this one paid off. The idea was to avoid some `chunk.blocks [i]` derefs and bound checks in the inner loop.
|
||||
|
||||
I used an `Rc` to make it work. PUC Lua probably just keeps a raw pointer to the block.
|
||||
|
||||
## Caching the current instruction list
|
||||
|
||||
I think this one paid off more. Instead of caching the current block I just cached its instructions, since the inner loop doesn't use constants or upvalues much, but every step requires access to the instruction list.
|
||||
|
||||
Using `Rc <[u32]>` was fun, too. I never stored a slice directly in a smart pointer before.
|
||||
|
||||
## Fat LTO and codegen-units = 1
|
||||
|
||||
Did absolutely nothing. I couldn't outsmart LLVM.
|
||||
|
||||
## Remove RefCell
|
||||
|
||||
(upcoming)
|
||||
|
||||
I think the `borrow` and `borrow_mut` calls slow down OP_GETFIELD and OP_SETFIELD. I can remove them if I store all the tables in State directly, replacing `Rc <RefCell <Table>>` with my own ref counting. This might
|
||||
remove a layer of indirection, too.
|
||||
|
||||
It's a big change, but I'd need _something_ like this for adding a GC anyway, and sometimes big changes have paid off.
|
||||
|
||||
## Iterating over instruction list
|
||||
|
||||
(upcoming)
|
||||
|
||||
I noticed PUC Lua doesn't store a program counter, it stores a `u32 *`, a pointer to the next instruction itself. This might save, like, 1 single cycle or something, I can't believe it does anything, but it could. Because it saves you that "Look at the instruction list, multiply the index by 4, add it to the base pointer" step.
|
||||
|
||||
Maybe the real saving is that it saves a little bit of cache space by forgetting the base pointer?
|
||||
|
||||
Storing an iterator sounds like a big fight with the borrow checker. I might want to prototype it outside the interpreter first. But if it works, it might compile down to what PUC Lua does in C. Plus a bounds check.
|
Loading…
Reference in New Issue