⭐ the nested functions thing is working, though I wish I didn't have to use recursion
parent
3870dc2c02
commit
0d88653c21
219
src/loader.rs
219
src/loader.rs
|
@ -29,12 +29,14 @@ pub fn parse_inst (buf: [u8; 4]) -> Option <Inst>
|
||||||
0x09 => Inst::GetUpVal (a, b),
|
0x09 => Inst::GetUpVal (a, b),
|
||||||
0x0b => Inst::GetTabUp (a, b, c),
|
0x0b => Inst::GetTabUp (a, b, c),
|
||||||
0x0d => Inst::GetI (a, b, c),
|
0x0d => Inst::GetI (a, b, c),
|
||||||
|
0x0f => Inst::SetTabUp (a, b, c),
|
||||||
0x22 => Inst::Add (a, b, c),
|
0x22 => Inst::Add (a, b, c),
|
||||||
0x24 => Inst::Mul (a, b, c),
|
0x24 => Inst::Mul (a, b, c),
|
||||||
0x2e => Inst::MmBin (a, b, c),
|
0x2e => Inst::MmBin (a, b, c),
|
||||||
0x3c => Inst::EqK (a, b, c),
|
0x3c => Inst::EqK (a, b, c),
|
||||||
0x38 => Inst::Jmp (s_j),
|
0x38 => Inst::Jmp (s_j),
|
||||||
0x44 => Inst::Call (a, b, c),
|
0x44 => Inst::Call (a, b, c),
|
||||||
|
0x45 => Inst::TailCall (a, b, c, k),
|
||||||
0x46 => Inst::Return (a, b, c, k),
|
0x46 => Inst::Return (a, b, c, k),
|
||||||
0x47 => Inst::Return0,
|
0x47 => Inst::Return0,
|
||||||
0x48 => Inst::Return1 (a),
|
0x48 => Inst::Return1 (a),
|
||||||
|
@ -49,95 +51,122 @@ struct Header {
|
||||||
inst_count: u8,
|
inst_count: u8,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_file_name <R: Read> (rdr: &mut R) -> Option <String> {
|
// loadString in PUC Lua. Doesn't work with long strings yet.
|
||||||
let file_name_sz = {
|
|
||||||
let mut file_name_sz = [0u8; 1];
|
fn parse_string <R: Read> (rdr: &mut R) -> Option <String> {
|
||||||
rdr.read_exact (&mut file_name_sz).ok ()?;
|
let len = match parse_int (rdr)? {
|
||||||
usize::try_from (file_name_sz [0] - 0x80 - 1).ok ()?
|
0 => 0,
|
||||||
|
x => x - 1,
|
||||||
};
|
};
|
||||||
|
|
||||||
{
|
let mut buf = vec! [0u8; len as usize];
|
||||||
let mut file_name = vec! [0u8; file_name_sz];
|
rdr.read_exact (&mut buf).ok ()?;
|
||||||
rdr.read_exact (&mut file_name).ok ()?;
|
Some (String::from_utf8 (buf).ok ()?)
|
||||||
Some (String::from_utf8 (file_name).ok ()?)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_header (buf: [u8; 6]) -> Option <Header> {
|
// loadByte in PUC Lua
|
||||||
if buf [0] & 0x80 != 0x80 {
|
|
||||||
// Not a function header
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
Some (Header {
|
|
||||||
inst_count: buf [5] - 0x80,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// I don't know what this really is, so I'm calling it a trailer for now
|
fn parse_byte <R: Read> (rdr: &mut R) -> Option <u8>
|
||||||
// It appears in luac files after the string table / constants table
|
|
||||||
// for each function.
|
|
||||||
|
|
||||||
#[derive (Debug, PartialEq)]
|
|
||||||
struct Trailer {
|
|
||||||
upvalue_count: u8,
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_trailer (buf: [u8; 6]) -> Option <Trailer> {
|
|
||||||
Some (Trailer {
|
|
||||||
upvalue_count: buf [0] - 0x80,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn parse_block <R: Read> (rdr: &mut R) -> Option <Block>
|
|
||||||
{
|
{
|
||||||
let header = {
|
let mut buf = [0u8; 1];
|
||||||
let mut buf = [0u8; 6];
|
rdr.read_exact (&mut buf).ok ()?;
|
||||||
rdr.read_exact (&mut buf).ok ()?;
|
Some (buf [0])
|
||||||
parse_header (buf)?
|
}
|
||||||
};
|
|
||||||
|
fn parse_int <R: Read> (rdr: &mut R) -> Option <u32>
|
||||||
|
{
|
||||||
|
Some ((parse_byte (rdr)? - 0x80) as u32)
|
||||||
|
}
|
||||||
|
|
||||||
|
// I'm doing this recursively so it's easy to match with the PUC Lua
|
||||||
|
// code, but I don't like recursion in general, and I don't know
|
||||||
|
// why PUC wrote it that way.
|
||||||
|
|
||||||
|
pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>)
|
||||||
|
-> Option <()>
|
||||||
|
{
|
||||||
|
// Ignore things I haven't implemented yet
|
||||||
|
|
||||||
let mut instructions = Vec::with_capacity (header.inst_count as usize);
|
parse_string (rdr)?; // function name
|
||||||
|
parse_int (rdr)?; // start line in source code
|
||||||
|
parse_int (rdr)?; // last line in source code
|
||||||
|
parse_byte (rdr)?; // num params
|
||||||
|
parse_byte (rdr)?; // is_vararg
|
||||||
|
parse_byte (rdr)?; // maxstacksize, might be same as num slots?
|
||||||
|
|
||||||
for _ in 0..header.inst_count {
|
let inst_count = parse_int (rdr)?;
|
||||||
|
let mut instructions = Vec::with_capacity (inst_count as usize);
|
||||||
|
|
||||||
|
for _ in 0..inst_count {
|
||||||
let mut buf = [0u8; 4];
|
let mut buf = [0u8; 4];
|
||||||
rdr.read_exact (&mut buf).ok ()?;
|
rdr.read_exact (&mut buf).ok ()?;
|
||||||
instructions.push (parse_inst (buf).expect (&format! ("{buf:?}")));
|
instructions.push (parse_inst (buf).expect (&format! ("{buf:?}")));
|
||||||
}
|
}
|
||||||
|
|
||||||
let constant_count = {
|
let constant_count = parse_int (rdr)?;
|
||||||
let mut buf = [0u8; 1];
|
|
||||||
rdr.read_exact (&mut buf).ok ()?;
|
|
||||||
buf [0] - 0x80
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut constants = Vec::with_capacity (constant_count as usize);
|
let mut constants = Vec::with_capacity (constant_count as usize);
|
||||||
|
|
||||||
for _ in 0..constant_count {
|
for _ in 0..constant_count {
|
||||||
let mut buf = [0u8; 2];
|
let const_type = parse_byte (rdr)?;
|
||||||
rdr.read_exact (&mut buf).ok ()?;
|
assert_eq! (const_type, 0x04);
|
||||||
|
|
||||||
let len = ((buf [0] as u32) << 8) + (buf [1] as u32) - 0x0481;
|
let s = parse_string (rdr)?;
|
||||||
|
|
||||||
let mut s = vec! [0u8; len.try_into().ok ()?];
|
|
||||||
rdr.read_exact (&mut s).ok ()?;
|
|
||||||
|
|
||||||
let s = String::from_utf8 (s).ok ()?;
|
|
||||||
constants.push (s.into ());
|
constants.push (s.into ());
|
||||||
}
|
}
|
||||||
|
|
||||||
let trailer = {
|
let upvalue_count = parse_int (rdr)? as usize;
|
||||||
let mut buf = [0u8; 6];
|
|
||||||
rdr.read_exact (&mut buf).ok ()?;
|
|
||||||
|
|
||||||
parse_trailer (buf)?
|
|
||||||
};
|
|
||||||
|
|
||||||
Some (Block {
|
for _ in 0..upvalue_count {
|
||||||
|
// Just ignore these
|
||||||
|
|
||||||
|
for _ in 0..3 {
|
||||||
|
parse_byte (rdr)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
blocks.push (Block {
|
||||||
constants,
|
constants,
|
||||||
instructions,
|
instructions,
|
||||||
upvalue_count: trailer.upvalue_count as usize,
|
upvalue_count,
|
||||||
})
|
});
|
||||||
|
|
||||||
|
// Recursion
|
||||||
|
|
||||||
|
// Subfunctions. PUC calls them protos.
|
||||||
|
let protos_count = parse_int (rdr)?;
|
||||||
|
for _ in 0..protos_count {
|
||||||
|
parse_block (rdr, blocks)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip over debug stuff
|
||||||
|
|
||||||
|
// I think this is delta line numbers, e.g. most instructions
|
||||||
|
// have 0, but when you go to a new source line it's 1+.
|
||||||
|
|
||||||
|
let lineinfo_count = parse_int (rdr)?;
|
||||||
|
for _ in 0..lineinfo_count {
|
||||||
|
parse_byte (rdr)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Absolute line info, didn't see that in my test files
|
||||||
|
|
||||||
|
let abslineinfo_count = parse_int (rdr)?;
|
||||||
|
assert_eq! (abslineinfo_count, 0);
|
||||||
|
|
||||||
|
let local_count = parse_int (rdr)?;
|
||||||
|
for _ in 0..local_count {
|
||||||
|
parse_string(rdr)?;
|
||||||
|
parse_int (rdr)?;
|
||||||
|
parse_int (rdr)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let upvalue_count = parse_int (rdr)?;
|
||||||
|
for _ in 0..upvalue_count {
|
||||||
|
parse_string (rdr)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Some (())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -148,16 +177,13 @@ pub fn parse_chunk <R: Read> (rdr: &mut R) -> Option <Chunk> {
|
||||||
let mut hdr = [0u8; 32];
|
let mut hdr = [0u8; 32];
|
||||||
rdr.read_exact (&mut hdr).ok ()?;
|
rdr.read_exact (&mut hdr).ok ()?;
|
||||||
|
|
||||||
let file_name = parse_file_name (rdr)?;
|
|
||||||
|
|
||||||
let mut blocks = vec![];
|
let mut blocks = vec![];
|
||||||
|
|
||||||
while let Some (block) = parse_block (rdr) {
|
while let Some (_) = parse_block (rdr, &mut blocks) {
|
||||||
blocks.push (block);
|
//
|
||||||
}
|
}
|
||||||
|
|
||||||
Some (Chunk {
|
Some (Chunk {
|
||||||
file_name,
|
|
||||||
blocks,
|
blocks,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -206,36 +232,33 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_header () {
|
fn parse_nested_functions () {
|
||||||
for (input, expected) in [
|
use std::io::Read;
|
||||||
// Bytes 0 and 1 are first line and last line for debugging
|
|
||||||
// Byte 2 is numparams
|
let bytecode = include_bytes! ("../test_vectors/functions.luac");
|
||||||
// Byte 3 is is_vararg
|
|
||||||
// Byte 4 is slot count / max stack size
|
{
|
||||||
// Byte 5 is instruction count
|
let mut rdr = std::io::Cursor::new (bytecode.clone ());
|
||||||
|
|
||||||
([0x80, 0x80, 0x00, 0x01, 0x04, 0x92], (18,)),
|
let mut buf = [0u8; 32];
|
||||||
([0x81, 0x89, 0x00, 0x00, 0x03, 0x87], (7,)),
|
rdr.read_exact (&mut buf).unwrap ();
|
||||||
([0x85, 0x88, 0x00, 0x00, 0x02, 0x86], (6,)),
|
|
||||||
] {
|
let mut blocks = vec! [];
|
||||||
let actual = super::parse_header (input).unwrap ();
|
|
||||||
assert_eq! (actual, super::Header {
|
super::parse_block (&mut rdr, &mut blocks).unwrap ();
|
||||||
inst_count: expected.0,
|
|
||||||
});
|
assert_eq! (blocks [0].instructions.len (), 15);
|
||||||
|
assert_eq! (blocks [1].instructions.len (), 6);
|
||||||
|
assert_eq! (blocks [2].instructions.len (), 4);
|
||||||
|
assert_eq! (blocks [3].instructions.len (), 4);
|
||||||
|
assert_eq! (blocks [4].instructions.len (), 4);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
if false {
|
||||||
#[test]
|
let mut rdr = std::io::Cursor::new (bytecode.clone ());
|
||||||
fn parse_trailer () {
|
let file = crate::loader::parse_chunk (&mut rdr).unwrap ();
|
||||||
for (input, expected) in [
|
|
||||||
([0x81, 0x01, 0x00, 0x00, 0x81, 0x80], (1,)),
|
assert_eq! (file.blocks.len (), 5);
|
||||||
([0x81, 0x00, 0x00, 0x00, 0x81, 0x80], (1,)),
|
|
||||||
([0x82, 0x00, 0x00, 0x00, 0x01, 0x00], (2,)),
|
|
||||||
] {
|
|
||||||
let actual = super::parse_trailer (input).unwrap ();
|
|
||||||
assert_eq! (actual, super::Trailer {
|
|
||||||
upvalue_count: expected.0,
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,7 +48,9 @@ pub enum Instruction {
|
||||||
// Return just one register
|
// Return just one register
|
||||||
Return1 (u8),
|
Return1 (u8),
|
||||||
|
|
||||||
TailCall (u8, u8, u8),
|
SetTabUp (u8, u8, u8),
|
||||||
|
|
||||||
|
TailCall (u8, u8, u8, bool),
|
||||||
|
|
||||||
Test (u8, i32),
|
Test (u8, i32),
|
||||||
|
|
||||||
|
@ -136,7 +138,6 @@ pub struct Block {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Chunk {
|
pub struct Chunk {
|
||||||
pub file_name: String,
|
|
||||||
pub blocks: Vec <Block>,
|
pub blocks: Vec <Block>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@ use crate::state::{
|
||||||
Chunk,
|
Chunk,
|
||||||
Instruction as Inst,
|
Instruction as Inst,
|
||||||
State,
|
State,
|
||||||
Value,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -23,7 +22,6 @@ fn bools () {
|
||||||
*/
|
*/
|
||||||
|
|
||||||
let chunk = Chunk {
|
let chunk = Chunk {
|
||||||
file_name: "".to_string (),
|
|
||||||
blocks: vec! [
|
blocks: vec! [
|
||||||
Block {
|
Block {
|
||||||
instructions: vec! [
|
instructions: vec! [
|
||||||
|
@ -136,7 +134,6 @@ fn floats () {
|
||||||
upvalue_count: 1,
|
upvalue_count: 1,
|
||||||
};
|
};
|
||||||
let chunk = Chunk {
|
let chunk = Chunk {
|
||||||
file_name: "".to_string (),
|
|
||||||
blocks: vec! [block],
|
blocks: vec! [block],
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
print "p_1"
|
||||||
|
|
||||||
|
function aa ()
|
||||||
|
print "p_2"
|
||||||
|
|
||||||
|
function bb ()
|
||||||
|
print "p_3"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
print "p_4"
|
||||||
|
|
||||||
|
local function cc ()
|
||||||
|
print "p_5"
|
||||||
|
end
|
||||||
|
|
||||||
|
local dd = function ()
|
||||||
|
print "p_6"
|
||||||
|
end
|
||||||
|
|
||||||
|
print "p_7"
|
Binary file not shown.
|
@ -0,0 +1,84 @@
|
||||||
|
|
||||||
|
main <test_vectors/functions.lua:0,0> (15 instructions at 0x559bfdbb1c90)
|
||||||
|
0+ params, 4 slots, 1 upvalue, 2 locals, 5 constants, 3 functions
|
||||||
|
1 [1] VARARGPREP 0
|
||||||
|
2 [1] GETTABUP 0 0 0 ; _ENV "print"
|
||||||
|
3 [1] LOADK 1 1 ; "p_1"
|
||||||
|
4 [1] CALL 0 2 1 ; 1 in 0 out
|
||||||
|
5 [9] CLOSURE 0 0 ; 0x559bfdbb1f40
|
||||||
|
6 [3] SETTABUP 0 2 0 ; _ENV "aa"
|
||||||
|
7 [11] GETTABUP 0 0 0 ; _ENV "print"
|
||||||
|
8 [11] LOADK 1 3 ; "p_4"
|
||||||
|
9 [11] CALL 0 2 1 ; 1 in 0 out
|
||||||
|
10 [15] CLOSURE 0 1 ; 0x559bfdbb2240
|
||||||
|
11 [19] CLOSURE 1 2 ; 0x559bfdbb2390
|
||||||
|
12 [21] GETTABUP 2 0 0 ; _ENV "print"
|
||||||
|
13 [21] LOADK 3 4 ; "p_7"
|
||||||
|
14 [21] CALL 2 2 1 ; 1 in 0 out
|
||||||
|
15 [21] RETURN 2 1 1 ; 0 out
|
||||||
|
constants (5) for 0x559bfdbb1c90:
|
||||||
|
0 S "print"
|
||||||
|
1 S "p_1"
|
||||||
|
2 S "aa"
|
||||||
|
3 S "p_4"
|
||||||
|
4 S "p_7"
|
||||||
|
locals (2) for 0x559bfdbb1c90:
|
||||||
|
0 cc 11 16
|
||||||
|
1 dd 12 16
|
||||||
|
upvalues (1) for 0x559bfdbb1c90:
|
||||||
|
0 _ENV 1 0
|
||||||
|
|
||||||
|
function <test_vectors/functions.lua:3,9> (6 instructions at 0x559bfdbb1f40)
|
||||||
|
0 params, 2 slots, 1 upvalue, 0 locals, 3 constants, 1 function
|
||||||
|
1 [4] GETTABUP 0 0 0 ; _ENV "print"
|
||||||
|
2 [4] LOADK 1 1 ; "p_2"
|
||||||
|
3 [4] CALL 0 2 1 ; 1 in 0 out
|
||||||
|
4 [8] CLOSURE 0 0 ; 0x559bfdbb20d0
|
||||||
|
5 [6] SETTABUP 0 2 0 ; _ENV "bb"
|
||||||
|
6 [9] RETURN0
|
||||||
|
constants (3) for 0x559bfdbb1f40:
|
||||||
|
0 S "print"
|
||||||
|
1 S "p_2"
|
||||||
|
2 S "bb"
|
||||||
|
locals (0) for 0x559bfdbb1f40:
|
||||||
|
upvalues (1) for 0x559bfdbb1f40:
|
||||||
|
0 _ENV 0 0
|
||||||
|
|
||||||
|
function <test_vectors/functions.lua:6,8> (4 instructions at 0x559bfdbb20d0)
|
||||||
|
0 params, 2 slots, 1 upvalue, 0 locals, 2 constants, 0 functions
|
||||||
|
1 [7] GETTABUP 0 0 0 ; _ENV "print"
|
||||||
|
2 [7] LOADK 1 1 ; "p_3"
|
||||||
|
3 [7] CALL 0 2 1 ; 1 in 0 out
|
||||||
|
4 [8] RETURN0
|
||||||
|
constants (2) for 0x559bfdbb20d0:
|
||||||
|
0 S "print"
|
||||||
|
1 S "p_3"
|
||||||
|
locals (0) for 0x559bfdbb20d0:
|
||||||
|
upvalues (1) for 0x559bfdbb20d0:
|
||||||
|
0 _ENV 0 0
|
||||||
|
|
||||||
|
function <test_vectors/functions.lua:13,15> (4 instructions at 0x559bfdbb2240)
|
||||||
|
0 params, 2 slots, 1 upvalue, 0 locals, 2 constants, 0 functions
|
||||||
|
1 [14] GETTABUP 0 0 0 ; _ENV "print"
|
||||||
|
2 [14] LOADK 1 1 ; "p_5"
|
||||||
|
3 [14] CALL 0 2 1 ; 1 in 0 out
|
||||||
|
4 [15] RETURN0
|
||||||
|
constants (2) for 0x559bfdbb2240:
|
||||||
|
0 S "print"
|
||||||
|
1 S "p_5"
|
||||||
|
locals (0) for 0x559bfdbb2240:
|
||||||
|
upvalues (1) for 0x559bfdbb2240:
|
||||||
|
0 _ENV 0 0
|
||||||
|
|
||||||
|
function <test_vectors/functions.lua:17,19> (4 instructions at 0x559bfdbb2390)
|
||||||
|
0 params, 2 slots, 1 upvalue, 0 locals, 2 constants, 0 functions
|
||||||
|
1 [18] GETTABUP 0 0 0 ; _ENV "print"
|
||||||
|
2 [18] LOADK 1 1 ; "p_6"
|
||||||
|
3 [18] CALL 0 2 1 ; 1 in 0 out
|
||||||
|
4 [19] RETURN0
|
||||||
|
constants (2) for 0x559bfdbb2390:
|
||||||
|
0 S "print"
|
||||||
|
1 S "p_6"
|
||||||
|
locals (0) for 0x559bfdbb2390:
|
||||||
|
upvalues (1) for 0x559bfdbb2390:
|
||||||
|
0 _ENV 0 0
|
Loading…
Reference in New Issue