📈 performance: down to 800 ms / 3.3x by changing PC handling

main
_ 2023-10-02 17:39:33 -05:00
parent 130330b688
commit e11026a553
1 changed files with 162 additions and 135 deletions

View File

@ -28,10 +28,9 @@ pub struct Chunk {
#[derive (Clone, Debug, Default)]
pub struct StackFrame {
// i32 makes it a little easier to implement jumps
// Starts at 0 right after OP_CALL
program_counter: i32,
program_counter: usize,
// Starts from 0 for main and 1 for the first closure
block_idx: usize,
@ -42,7 +41,7 @@ pub struct StackFrame {
#[derive (Debug)]
pub struct Breakpoint {
pub block_idx: usize,
pub program_counter: i32,
pub program_counter: usize,
}
#[derive (Debug)]
@ -59,6 +58,7 @@ pub struct State {
pub si: Interner,
register_offset: usize,
block_idx: usize,
next_pc: usize,
}
fn lw_io_write (l: &mut State, num_args: usize) -> usize {
@ -198,6 +198,7 @@ impl State {
si: Default::default (),
register_offset: 0,
block_idx: 0,
next_pc: 0,
}
}
@ -219,6 +220,7 @@ impl State {
si,
register_offset: 0,
block_idx: 0,
next_pc: 0,
}
}
@ -308,6 +310,101 @@ impl State {
true
}
fn op_call (&mut self, a: u8, b: u8, c: u8) -> bool {
let b = usize::from (b);
// Take arguments from registers [a + 1, a + b)
// Call the function in register [a]
// Return values in registers [a, a + c - 1)
//
// That is, call a with b - 1 arguments and expect c returns
//
// e.g. CALL 0 2 1 mean "Call 0 with 1 argument, return 1 value", like for printing a constant
// Do a clone here to avoid a borow problem.
// Should be fixable with more clever code.
let v_a = self.reg (a).clone ();
match v_a {
Value::BogusClosure (rc) => {
let idx = rc.borrow ().idx;
let target_block = idx;
let new_frame = StackFrame {
program_counter: 0,
block_idx: target_block,
register_offset: self.register_offset + a as usize + 1,
};
self.next_pc = 0;
self.block_idx = target_block;
self.register_offset = new_frame.register_offset;
self.stack.push (new_frame);
// Skip the PC increment at the bottom of the loop
return true;
},
Value::RsFunc (x) => {
let old_offset = self.register_offset;
self.register_offset = old_offset + usize::from (a) + 1;
// Trash the stack frame so it doesn't point to a
// valid Lua function
self.stack.push (StackFrame {
program_counter: 65535, // Bogus for native functions
block_idx: 65535, // Bogus
register_offset: self.register_offset,
});
let num_args = if b == 0 {
self.top - self.register_offset
}
else {
b - 1
};
// Call
let num_results = x (self, num_args);
let popped_frame = self.stack.pop ().unwrap ();
self.register_offset = old_offset;
let offset = old_offset + usize::from (a);
for i in (offset)..(offset + usize::try_from (num_results).unwrap ()) {
self.registers [i] = self.registers [i + 1].take ();
}
// Set up top for the next call
if c == 0 {
self.top = popped_frame.register_offset - 1 + num_results;
}
},
x => {
panic! ("Cannot call value {x:?}");
},
}
false
}
fn op_div (&mut self, a: u8, b: u8, c: u8) -> bool {
let v_b = self.reg (b);
let v_c = self.reg (c);
*self.reg_mut (a) = match (v_b, v_c) {
(Value::Float (b), Value::Float (c)) => Value::from (b / c),
(Value::Integer (b), Value::Integer (c)) => Value::from (*b as f64 / *c as f64),
(Value::Integer (b), Value::Float (c)) => Value::from (*b as f64 / c),
(Value::Float (b), Value::Integer (c)) => Value::from (b / *c as f64),
_ => return false,
};
true
}
fn op_get_field (&mut self, a: u8, b: u8, c: u8) {
let block = self.chunk.blocks.get (self.block_idx).unwrap ();
let constants = &block.constants;
@ -392,19 +489,28 @@ impl State {
dst.insert_str (key, value);
}
fn op_sub (&mut self, a: u8, b: u8, c: u8) -> bool {
let v_b = self.reg (b);
let v_c = self.reg (c);
*self.reg_mut (a) = match (v_b, v_c) {
(Value::Float (b), Value::Float (c)) => Value::from (b - c),
(Value::Integer (b), Value::Integer (c)) => Value::from (b - c),
(Value::Integer (b), Value::Float (c)) => Value::from (*b as f64 - c),
(Value::Float (b), Value::Integer (c)) => Value::from (b - *c as f64),
_ => return false,
};
true
}
pub fn step (&mut self) -> Result <Option <StepOutput>, StepError>
{
self.step_count += 1;
let frame = self.stack.last ().unwrap ();
self.block_idx = frame.block_idx;
self.register_offset = frame.register_offset;
let block_idx = frame.block_idx;
let block = self.chunk.blocks.get (block_idx).unwrap ();
let block = self.chunk.blocks.get (self.block_idx).unwrap ();
let mut next_pc = frame.program_counter;
let pc = usize::try_from (frame.program_counter).expect ("program_counter is not a valid usize");
let pc = usize::try_from (self.next_pc).expect ("program_counter is not a valid usize");
let instruction = match block.instructions.get (pc) {
Some (x) => *x,
None => {
@ -423,7 +529,7 @@ impl State {
match instruction {
Instruction::Add (a, b, c) => {
if self.op_add (a, b, c) {
next_pc += 1;
self.next_pc += 1;
}
},
Instruction::AddI (a, b, s_c) => {
@ -441,86 +547,15 @@ impl State {
*self.reg_mut (a) = x;
},
Instruction::Call (a, b, c) => {
let b = usize::from (b);
// Take arguments from registers [a + 1, a + b)
// Call the function in register [a]
// Return values in registers [a, a + c - 1)
//
// That is, call a with b - 1 arguments and expect c returns
//
// e.g. CALL 0 2 1 mean "Call 0 with 1 argument, return 1 value", like for printing a constant
// Do a clone here to avoid a borow problem.
// Should be fixable with more clever code.
let v_a = self.reg (a).clone ();
match v_a {
Value::BogusClosure (rc) => {
let idx = rc.borrow ().idx;
let target_block = idx;
self.stack.push (StackFrame {
program_counter: 0,
block_idx: target_block,
register_offset: self.register_offset + a as usize + 1,
});
if self.debug_print {
println! ("Inst {block_idx}:{pc} calls {target_block}:0");
let stack_depth = self.stack.len ();
println! ("stack_depth: {stack_depth}");
}
// Skip the PC increment at the bottom of the loop
return Ok (None);
},
Value::RsFunc (x) => {
let old_offset = self.register_offset;
self.register_offset = old_offset + usize::from (a) + 1;
// Trash the stack frame so it doesn't point to a
// valid Lua function
self.stack.push (StackFrame {
program_counter: 65535, // Bogus for native functions
block_idx: 65535, // Bogus
register_offset: self.register_offset,
});
let num_args = if b == 0 {
self.top - self.register_offset
}
else {
b - 1
};
// Call
let num_results = x (self, num_args);
let popped_frame = self.stack.pop ().unwrap ();
self.register_offset = old_offset;
let offset = old_offset + usize::from (a);
for i in (offset)..(offset + usize::try_from (num_results).unwrap ()) {
self.registers [i] = self.registers [i + 1].take ();
}
// Set up top for the next call
if c == 0 {
self.top = popped_frame.register_offset - 1 + num_results;
}
},
_ => {
Err (make_step_error ("Cannot call value"))?;
},
if self.op_call (a, b, c) {
// Skip the PC increment at the bottom
return Ok (None);
}
},
Instruction::Closure (a, b) => {
let b = usize::try_from (b).unwrap ();
let idx = frame.block_idx + b + 1;
let idx = self.block_idx + b + 1;
let block = &self.chunk.blocks [idx];
let mut new_upvalues = Vec::with_capacity (block.upvalues.len ());
@ -544,26 +579,21 @@ impl State {
unimplemented! ("OP_CONCAT")
},
Instruction::Div (a, b, c) => {
let v_b = self.reg (b);
let v_c = self.reg (c);
let v_b = v_b.as_float ().unwrap_or_else (|| panic! ("{v_b}"));
let v_c = v_c.as_float ().ok_or_else (|| make_step_error ("C must be a number"))?;
*self.reg_mut (a) = Value::from (v_b / v_c);
if self.op_div (a, b, c) {
self.next_pc += 1;
}
},
Instruction::EqI (a, sb, k_flag) => {
if (self.reg (a).as_int ().unwrap () == sb as i64) != k_flag
{
next_pc += 1;
self.next_pc += 1;
}
},
Instruction::EqK (a, b, k_flag) => {
let b = usize::from (b);
if (*self.reg (a) == k [b]) != k_flag {
next_pc += 1;
self.next_pc += 1;
}
},
Instruction::ExtraArg (ax) => {
@ -580,7 +610,7 @@ impl State {
let stop = self.reg (a + 1).as_int ().unwrap ();
if iter <= stop {
next_pc -= i32::try_from (bx).unwrap ();
self.next_pc -= usize::try_from (bx).unwrap ();
}
},
Instruction::ForPrep (a, bx) => {
@ -588,7 +618,7 @@ impl State {
let stop = self.reg (a + 1).as_int ().unwrap ();
if start > stop {
next_pc += i32::try_from (bx).unwrap () + 1;
self.next_pc += usize::try_from (bx).unwrap () + 1;
}
*self.reg_mut (a + 3) = start.into ();
@ -657,7 +687,7 @@ impl State {
};
*self.reg_mut (a) = upvalue;
},
Instruction::Jmp (s_j) => next_pc += s_j,
Instruction::Jmp (s_j) => self.next_pc = usize::try_from (i32::try_from (self.next_pc).unwrap () + s_j).unwrap (),
Instruction::Len (a, b) => {
let len = match self.reg (b) {
Value::BogusClosure (_) => Err (make_step_error ("attempt to get length of a function value"))?,
@ -718,7 +748,7 @@ impl State {
// skip the OP_MMBIN that probably comes after this
if self.op_mul (a, b, c) {
next_pc += 1;
self.next_pc += 1;
}
},
Instruction::MulK (a, b, c) => {
@ -779,7 +809,9 @@ impl State {
}
if let Some (new_frame) = self.stack.last() {
next_pc = new_frame.program_counter;
self.block_idx = new_frame.block_idx;
self.next_pc = new_frame.program_counter;
self.register_offset = new_frame.register_offset;
// Shift our output registers down so the caller
// can grab them
@ -802,7 +834,10 @@ impl State {
},
Instruction::Return0 => {
let popped_frame = self.stack.pop ().unwrap ();
next_pc = self.stack.last ().unwrap ().program_counter;
let new_frame = self.stack.last ().unwrap ();
self.block_idx = new_frame.block_idx;
self.next_pc = new_frame.program_counter;
self.register_offset = new_frame.register_offset;
self.top = popped_frame.register_offset - 1 + 0;
},
Instruction::Return1 (a) => {
@ -813,16 +848,9 @@ impl State {
self.registers [popped_frame.register_offset - 1] = self.register_window ()[a].clone ();
let frame = self.stack.last ().unwrap ();
let new_block = frame.block_idx;
next_pc = frame.program_counter;
if self.debug_print {
let old_block = popped_frame.block_idx;
let old_pc = popped_frame.program_counter;
println! ("Inst {old_block}:{old_pc} returns to inst {new_block}:{next_pc}");
let stack_depth = self.stack.len ();
println! ("stack_depth: {stack_depth}");
}
self.block_idx = frame.block_idx;
self.next_pc = frame.program_counter;
self.register_offset = frame.register_offset;
// Shift output register down
let offset = popped_frame.register_offset;
@ -878,26 +906,15 @@ impl State {
table.borrow_mut ().insert_str (key, value);
},
Instruction::Sub (a, b, c) => {
let v_b = self.reg (b);
let v_c = self.reg (c);
let x = if let (Some (v_b), Some (v_c)) = (v_b.as_int (), v_c.as_int ())
{
Value::from (v_b - v_c)
if self.op_sub (a, b, c) {
self.next_pc += 1;
}
else {
let v_b = v_b.as_float ().unwrap_or_else (|| panic! ("{v_b}"));
let v_c = v_c.as_float ().unwrap_or_else (|| panic! ("{v_c}"));
Value::from (v_b - v_c)
};
*self.reg_mut (a) = x;
},
Instruction::TailCall (a, b, c, k) => {
let a = usize::from (a);
assert! (!k, "closing over values in tail calls not implemented");
let offset = frame.register_offset;
let offset = self.register_offset;
let value = self.registers [offset + a].take ();
match value {
Value::BogusClosure (closure) => {
@ -923,6 +940,8 @@ impl State {
let frame = self.stack.last_mut ().unwrap ();
frame.program_counter = 0;
frame.block_idx = closure.idx;
self.block_idx = closure.idx;
self.next_pc = 0;
// Skip the PC increment
return Ok (None);
@ -954,16 +973,20 @@ impl State {
let num_results = x (self, num_args);
let popped_frame = self.stack.pop ().unwrap ();
if self.stack.is_empty () {
// The whole chunk is exiting
return Ok (Some (StepOutput::ChunkReturned (self.registers [a..(a + num_results)].to_vec())));
}
else {
if let Some (new_frame) = self.stack.last () {
self.block_idx = new_frame.block_idx;
self.next_pc = new_frame.program_counter;
self.register_offset = new_frame.register_offset;
// Set up top for the next call
if c == 0 {
self.top = popped_frame.register_offset - 1 + num_results;
}
}
else {
// The whole chunk is exiting
return Ok (Some (StepOutput::ChunkReturned (self.registers [a..(a + num_results)].to_vec())));
}
},
_ => {
dbg! (&self.stack);
@ -973,7 +996,7 @@ impl State {
},
Instruction::Test (a, k_flag) => {
if self.reg (a).is_truthy() != k_flag {
next_pc += 1;
self.next_pc += 1;
}
},
Instruction::UnM (a, b) => {
@ -993,10 +1016,11 @@ impl State {
Instruction::VarArgPrep (_) => (),
}
next_pc += 1;
self.next_pc += 1;
{
let frame = self.stack.last_mut ().unwrap ();
frame.program_counter = next_pc;
frame.program_counter = self.next_pc;
self.register_offset = frame.register_offset;
}
Ok (None)
@ -1029,6 +1053,9 @@ impl State {
pub fn set_chunk (&mut self, chunk: Chunk) {
self.stack = vec! [Default::default ()];
self.chunk = chunk;
self.block_idx = 0;
self.next_pc = 0;
self.register_offset = 0;
}
pub fn to_string (&mut self, s: &str) -> Value {