⭐ the nested functions thing is working, though I wish I didn't have to use recursion
parent
3870dc2c02
commit
0d88653c21
221
src/loader.rs
221
src/loader.rs
|
@ -29,12 +29,14 @@ pub fn parse_inst (buf: [u8; 4]) -> Option <Inst>
|
|||
0x09 => Inst::GetUpVal (a, b),
|
||||
0x0b => Inst::GetTabUp (a, b, c),
|
||||
0x0d => Inst::GetI (a, b, c),
|
||||
0x0f => Inst::SetTabUp (a, b, c),
|
||||
0x22 => Inst::Add (a, b, c),
|
||||
0x24 => Inst::Mul (a, b, c),
|
||||
0x2e => Inst::MmBin (a, b, c),
|
||||
0x3c => Inst::EqK (a, b, c),
|
||||
0x38 => Inst::Jmp (s_j),
|
||||
0x44 => Inst::Call (a, b, c),
|
||||
0x45 => Inst::TailCall (a, b, c, k),
|
||||
0x46 => Inst::Return (a, b, c, k),
|
||||
0x47 => Inst::Return0,
|
||||
0x48 => Inst::Return1 (a),
|
||||
|
@ -49,95 +51,122 @@ struct Header {
|
|||
inst_count: u8,
|
||||
}
|
||||
|
||||
fn parse_file_name <R: Read> (rdr: &mut R) -> Option <String> {
|
||||
let file_name_sz = {
|
||||
let mut file_name_sz = [0u8; 1];
|
||||
rdr.read_exact (&mut file_name_sz).ok ()?;
|
||||
usize::try_from (file_name_sz [0] - 0x80 - 1).ok ()?
|
||||
// loadString in PUC Lua. Doesn't work with long strings yet.
|
||||
|
||||
fn parse_string <R: Read> (rdr: &mut R) -> Option <String> {
|
||||
let len = match parse_int (rdr)? {
|
||||
0 => 0,
|
||||
x => x - 1,
|
||||
};
|
||||
|
||||
{
|
||||
let mut file_name = vec! [0u8; file_name_sz];
|
||||
rdr.read_exact (&mut file_name).ok ()?;
|
||||
Some (String::from_utf8 (file_name).ok ()?)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_header (buf: [u8; 6]) -> Option <Header> {
|
||||
if buf [0] & 0x80 != 0x80 {
|
||||
// Not a function header
|
||||
return None;
|
||||
}
|
||||
|
||||
Some (Header {
|
||||
inst_count: buf [5] - 0x80,
|
||||
})
|
||||
}
|
||||
|
||||
// I don't know what this really is, so I'm calling it a trailer for now
|
||||
// It appears in luac files after the string table / constants table
|
||||
// for each function.
|
||||
|
||||
#[derive (Debug, PartialEq)]
|
||||
struct Trailer {
|
||||
upvalue_count: u8,
|
||||
}
|
||||
|
||||
fn parse_trailer (buf: [u8; 6]) -> Option <Trailer> {
|
||||
Some (Trailer {
|
||||
upvalue_count: buf [0] - 0x80,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn parse_block <R: Read> (rdr: &mut R) -> Option <Block>
|
||||
{
|
||||
let header = {
|
||||
let mut buf = [0u8; 6];
|
||||
let mut buf = vec! [0u8; len as usize];
|
||||
rdr.read_exact (&mut buf).ok ()?;
|
||||
parse_header (buf)?
|
||||
};
|
||||
Some (String::from_utf8 (buf).ok ()?)
|
||||
}
|
||||
|
||||
let mut instructions = Vec::with_capacity (header.inst_count as usize);
|
||||
// loadByte in PUC Lua
|
||||
|
||||
for _ in 0..header.inst_count {
|
||||
fn parse_byte <R: Read> (rdr: &mut R) -> Option <u8>
|
||||
{
|
||||
let mut buf = [0u8; 1];
|
||||
rdr.read_exact (&mut buf).ok ()?;
|
||||
Some (buf [0])
|
||||
}
|
||||
|
||||
fn parse_int <R: Read> (rdr: &mut R) -> Option <u32>
|
||||
{
|
||||
Some ((parse_byte (rdr)? - 0x80) as u32)
|
||||
}
|
||||
|
||||
// I'm doing this recursively so it's easy to match with the PUC Lua
|
||||
// code, but I don't like recursion in general, and I don't know
|
||||
// why PUC wrote it that way.
|
||||
|
||||
pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>)
|
||||
-> Option <()>
|
||||
{
|
||||
// Ignore things I haven't implemented yet
|
||||
|
||||
parse_string (rdr)?; // function name
|
||||
parse_int (rdr)?; // start line in source code
|
||||
parse_int (rdr)?; // last line in source code
|
||||
parse_byte (rdr)?; // num params
|
||||
parse_byte (rdr)?; // is_vararg
|
||||
parse_byte (rdr)?; // maxstacksize, might be same as num slots?
|
||||
|
||||
let inst_count = parse_int (rdr)?;
|
||||
let mut instructions = Vec::with_capacity (inst_count as usize);
|
||||
|
||||
for _ in 0..inst_count {
|
||||
let mut buf = [0u8; 4];
|
||||
rdr.read_exact (&mut buf).ok ()?;
|
||||
instructions.push (parse_inst (buf).expect (&format! ("{buf:?}")));
|
||||
}
|
||||
|
||||
let constant_count = {
|
||||
let mut buf = [0u8; 1];
|
||||
rdr.read_exact (&mut buf).ok ()?;
|
||||
buf [0] - 0x80
|
||||
};
|
||||
let constant_count = parse_int (rdr)?;
|
||||
|
||||
let mut constants = Vec::with_capacity (constant_count as usize);
|
||||
|
||||
for _ in 0..constant_count {
|
||||
let mut buf = [0u8; 2];
|
||||
rdr.read_exact (&mut buf).ok ()?;
|
||||
let const_type = parse_byte (rdr)?;
|
||||
assert_eq! (const_type, 0x04);
|
||||
|
||||
let len = ((buf [0] as u32) << 8) + (buf [1] as u32) - 0x0481;
|
||||
|
||||
let mut s = vec! [0u8; len.try_into().ok ()?];
|
||||
rdr.read_exact (&mut s).ok ()?;
|
||||
|
||||
let s = String::from_utf8 (s).ok ()?;
|
||||
let s = parse_string (rdr)?;
|
||||
constants.push (s.into ());
|
||||
}
|
||||
|
||||
let trailer = {
|
||||
let mut buf = [0u8; 6];
|
||||
rdr.read_exact (&mut buf).ok ()?;
|
||||
let upvalue_count = parse_int (rdr)? as usize;
|
||||
|
||||
parse_trailer (buf)?
|
||||
};
|
||||
for _ in 0..upvalue_count {
|
||||
// Just ignore these
|
||||
|
||||
Some (Block {
|
||||
for _ in 0..3 {
|
||||
parse_byte (rdr)?;
|
||||
}
|
||||
}
|
||||
|
||||
blocks.push (Block {
|
||||
constants,
|
||||
instructions,
|
||||
upvalue_count: trailer.upvalue_count as usize,
|
||||
})
|
||||
upvalue_count,
|
||||
});
|
||||
|
||||
// Recursion
|
||||
|
||||
// Subfunctions. PUC calls them protos.
|
||||
let protos_count = parse_int (rdr)?;
|
||||
for _ in 0..protos_count {
|
||||
parse_block (rdr, blocks)?;
|
||||
}
|
||||
|
||||
// Skip over debug stuff
|
||||
|
||||
// I think this is delta line numbers, e.g. most instructions
|
||||
// have 0, but when you go to a new source line it's 1+.
|
||||
|
||||
let lineinfo_count = parse_int (rdr)?;
|
||||
for _ in 0..lineinfo_count {
|
||||
parse_byte (rdr)?;
|
||||
}
|
||||
|
||||
// Absolute line info, didn't see that in my test files
|
||||
|
||||
let abslineinfo_count = parse_int (rdr)?;
|
||||
assert_eq! (abslineinfo_count, 0);
|
||||
|
||||
let local_count = parse_int (rdr)?;
|
||||
for _ in 0..local_count {
|
||||
parse_string(rdr)?;
|
||||
parse_int (rdr)?;
|
||||
parse_int (rdr)?;
|
||||
}
|
||||
|
||||
let upvalue_count = parse_int (rdr)?;
|
||||
for _ in 0..upvalue_count {
|
||||
parse_string (rdr)?;
|
||||
}
|
||||
|
||||
Some (())
|
||||
}
|
||||
|
||||
|
||||
|
@ -148,16 +177,13 @@ pub fn parse_chunk <R: Read> (rdr: &mut R) -> Option <Chunk> {
|
|||
let mut hdr = [0u8; 32];
|
||||
rdr.read_exact (&mut hdr).ok ()?;
|
||||
|
||||
let file_name = parse_file_name (rdr)?;
|
||||
|
||||
let mut blocks = vec![];
|
||||
|
||||
while let Some (block) = parse_block (rdr) {
|
||||
blocks.push (block);
|
||||
while let Some (_) = parse_block (rdr, &mut blocks) {
|
||||
//
|
||||
}
|
||||
|
||||
Some (Chunk {
|
||||
file_name,
|
||||
blocks,
|
||||
})
|
||||
}
|
||||
|
@ -206,36 +232,33 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn parse_header () {
|
||||
for (input, expected) in [
|
||||
// Bytes 0 and 1 are first line and last line for debugging
|
||||
// Byte 2 is numparams
|
||||
// Byte 3 is is_vararg
|
||||
// Byte 4 is slot count / max stack size
|
||||
// Byte 5 is instruction count
|
||||
fn parse_nested_functions () {
|
||||
use std::io::Read;
|
||||
|
||||
([0x80, 0x80, 0x00, 0x01, 0x04, 0x92], (18,)),
|
||||
([0x81, 0x89, 0x00, 0x00, 0x03, 0x87], (7,)),
|
||||
([0x85, 0x88, 0x00, 0x00, 0x02, 0x86], (6,)),
|
||||
] {
|
||||
let actual = super::parse_header (input).unwrap ();
|
||||
assert_eq! (actual, super::Header {
|
||||
inst_count: expected.0,
|
||||
});
|
||||
}
|
||||
let bytecode = include_bytes! ("../test_vectors/functions.luac");
|
||||
|
||||
{
|
||||
let mut rdr = std::io::Cursor::new (bytecode.clone ());
|
||||
|
||||
let mut buf = [0u8; 32];
|
||||
rdr.read_exact (&mut buf).unwrap ();
|
||||
|
||||
let mut blocks = vec! [];
|
||||
|
||||
super::parse_block (&mut rdr, &mut blocks).unwrap ();
|
||||
|
||||
assert_eq! (blocks [0].instructions.len (), 15);
|
||||
assert_eq! (blocks [1].instructions.len (), 6);
|
||||
assert_eq! (blocks [2].instructions.len (), 4);
|
||||
assert_eq! (blocks [3].instructions.len (), 4);
|
||||
assert_eq! (blocks [4].instructions.len (), 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_trailer () {
|
||||
for (input, expected) in [
|
||||
([0x81, 0x01, 0x00, 0x00, 0x81, 0x80], (1,)),
|
||||
([0x81, 0x00, 0x00, 0x00, 0x81, 0x80], (1,)),
|
||||
([0x82, 0x00, 0x00, 0x00, 0x01, 0x00], (2,)),
|
||||
] {
|
||||
let actual = super::parse_trailer (input).unwrap ();
|
||||
assert_eq! (actual, super::Trailer {
|
||||
upvalue_count: expected.0,
|
||||
});
|
||||
if false {
|
||||
let mut rdr = std::io::Cursor::new (bytecode.clone ());
|
||||
let file = crate::loader::parse_chunk (&mut rdr).unwrap ();
|
||||
|
||||
assert_eq! (file.blocks.len (), 5);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -48,7 +48,9 @@ pub enum Instruction {
|
|||
// Return just one register
|
||||
Return1 (u8),
|
||||
|
||||
TailCall (u8, u8, u8),
|
||||
SetTabUp (u8, u8, u8),
|
||||
|
||||
TailCall (u8, u8, u8, bool),
|
||||
|
||||
Test (u8, i32),
|
||||
|
||||
|
@ -136,7 +138,6 @@ pub struct Block {
|
|||
}
|
||||
|
||||
pub struct Chunk {
|
||||
pub file_name: String,
|
||||
pub blocks: Vec <Block>,
|
||||
}
|
||||
|
||||
|
|
|
@ -3,7 +3,6 @@ use crate::state::{
|
|||
Chunk,
|
||||
Instruction as Inst,
|
||||
State,
|
||||
Value,
|
||||
};
|
||||
|
||||
#[test]
|
||||
|
@ -23,7 +22,6 @@ fn bools () {
|
|||
*/
|
||||
|
||||
let chunk = Chunk {
|
||||
file_name: "".to_string (),
|
||||
blocks: vec! [
|
||||
Block {
|
||||
instructions: vec! [
|
||||
|
@ -136,7 +134,6 @@ fn floats () {
|
|||
upvalue_count: 1,
|
||||
};
|
||||
let chunk = Chunk {
|
||||
file_name: "".to_string (),
|
||||
blocks: vec! [block],
|
||||
};
|
||||
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
print "p_1"
|
||||
|
||||
function aa ()
|
||||
print "p_2"
|
||||
|
||||
function bb ()
|
||||
print "p_3"
|
||||
end
|
||||
end
|
||||
|
||||
print "p_4"
|
||||
|
||||
local function cc ()
|
||||
print "p_5"
|
||||
end
|
||||
|
||||
local dd = function ()
|
||||
print "p_6"
|
||||
end
|
||||
|
||||
print "p_7"
|
Binary file not shown.
|
@ -0,0 +1,84 @@
|
|||
|
||||
main <test_vectors/functions.lua:0,0> (15 instructions at 0x559bfdbb1c90)
|
||||
0+ params, 4 slots, 1 upvalue, 2 locals, 5 constants, 3 functions
|
||||
1 [1] VARARGPREP 0
|
||||
2 [1] GETTABUP 0 0 0 ; _ENV "print"
|
||||
3 [1] LOADK 1 1 ; "p_1"
|
||||
4 [1] CALL 0 2 1 ; 1 in 0 out
|
||||
5 [9] CLOSURE 0 0 ; 0x559bfdbb1f40
|
||||
6 [3] SETTABUP 0 2 0 ; _ENV "aa"
|
||||
7 [11] GETTABUP 0 0 0 ; _ENV "print"
|
||||
8 [11] LOADK 1 3 ; "p_4"
|
||||
9 [11] CALL 0 2 1 ; 1 in 0 out
|
||||
10 [15] CLOSURE 0 1 ; 0x559bfdbb2240
|
||||
11 [19] CLOSURE 1 2 ; 0x559bfdbb2390
|
||||
12 [21] GETTABUP 2 0 0 ; _ENV "print"
|
||||
13 [21] LOADK 3 4 ; "p_7"
|
||||
14 [21] CALL 2 2 1 ; 1 in 0 out
|
||||
15 [21] RETURN 2 1 1 ; 0 out
|
||||
constants (5) for 0x559bfdbb1c90:
|
||||
0 S "print"
|
||||
1 S "p_1"
|
||||
2 S "aa"
|
||||
3 S "p_4"
|
||||
4 S "p_7"
|
||||
locals (2) for 0x559bfdbb1c90:
|
||||
0 cc 11 16
|
||||
1 dd 12 16
|
||||
upvalues (1) for 0x559bfdbb1c90:
|
||||
0 _ENV 1 0
|
||||
|
||||
function <test_vectors/functions.lua:3,9> (6 instructions at 0x559bfdbb1f40)
|
||||
0 params, 2 slots, 1 upvalue, 0 locals, 3 constants, 1 function
|
||||
1 [4] GETTABUP 0 0 0 ; _ENV "print"
|
||||
2 [4] LOADK 1 1 ; "p_2"
|
||||
3 [4] CALL 0 2 1 ; 1 in 0 out
|
||||
4 [8] CLOSURE 0 0 ; 0x559bfdbb20d0
|
||||
5 [6] SETTABUP 0 2 0 ; _ENV "bb"
|
||||
6 [9] RETURN0
|
||||
constants (3) for 0x559bfdbb1f40:
|
||||
0 S "print"
|
||||
1 S "p_2"
|
||||
2 S "bb"
|
||||
locals (0) for 0x559bfdbb1f40:
|
||||
upvalues (1) for 0x559bfdbb1f40:
|
||||
0 _ENV 0 0
|
||||
|
||||
function <test_vectors/functions.lua:6,8> (4 instructions at 0x559bfdbb20d0)
|
||||
0 params, 2 slots, 1 upvalue, 0 locals, 2 constants, 0 functions
|
||||
1 [7] GETTABUP 0 0 0 ; _ENV "print"
|
||||
2 [7] LOADK 1 1 ; "p_3"
|
||||
3 [7] CALL 0 2 1 ; 1 in 0 out
|
||||
4 [8] RETURN0
|
||||
constants (2) for 0x559bfdbb20d0:
|
||||
0 S "print"
|
||||
1 S "p_3"
|
||||
locals (0) for 0x559bfdbb20d0:
|
||||
upvalues (1) for 0x559bfdbb20d0:
|
||||
0 _ENV 0 0
|
||||
|
||||
function <test_vectors/functions.lua:13,15> (4 instructions at 0x559bfdbb2240)
|
||||
0 params, 2 slots, 1 upvalue, 0 locals, 2 constants, 0 functions
|
||||
1 [14] GETTABUP 0 0 0 ; _ENV "print"
|
||||
2 [14] LOADK 1 1 ; "p_5"
|
||||
3 [14] CALL 0 2 1 ; 1 in 0 out
|
||||
4 [15] RETURN0
|
||||
constants (2) for 0x559bfdbb2240:
|
||||
0 S "print"
|
||||
1 S "p_5"
|
||||
locals (0) for 0x559bfdbb2240:
|
||||
upvalues (1) for 0x559bfdbb2240:
|
||||
0 _ENV 0 0
|
||||
|
||||
function <test_vectors/functions.lua:17,19> (4 instructions at 0x559bfdbb2390)
|
||||
0 params, 2 slots, 1 upvalue, 0 locals, 2 constants, 0 functions
|
||||
1 [18] GETTABUP 0 0 0 ; _ENV "print"
|
||||
2 [18] LOADK 1 1 ; "p_6"
|
||||
3 [18] CALL 0 2 1 ; 1 in 0 out
|
||||
4 [19] RETURN0
|
||||
constants (2) for 0x559bfdbb2390:
|
||||
0 S "print"
|
||||
1 S "p_6"
|
||||
locals (0) for 0x559bfdbb2390:
|
||||
upvalues (1) for 0x559bfdbb2390:
|
||||
0 _ENV 0 0
|
Loading…
Reference in New Issue