the nested functions thing is working, though I wish I didn't have to use recursion

main
_ 2023-09-25 03:05:00 -05:00
parent 3870dc2c02
commit 0d88653c21
6 changed files with 229 additions and 103 deletions

View File

@ -29,12 +29,14 @@ pub fn parse_inst (buf: [u8; 4]) -> Option <Inst>
0x09 => Inst::GetUpVal (a, b),
0x0b => Inst::GetTabUp (a, b, c),
0x0d => Inst::GetI (a, b, c),
0x0f => Inst::SetTabUp (a, b, c),
0x22 => Inst::Add (a, b, c),
0x24 => Inst::Mul (a, b, c),
0x2e => Inst::MmBin (a, b, c),
0x3c => Inst::EqK (a, b, c),
0x38 => Inst::Jmp (s_j),
0x44 => Inst::Call (a, b, c),
0x45 => Inst::TailCall (a, b, c, k),
0x46 => Inst::Return (a, b, c, k),
0x47 => Inst::Return0,
0x48 => Inst::Return1 (a),
@ -49,95 +51,122 @@ struct Header {
inst_count: u8,
}
fn parse_file_name <R: Read> (rdr: &mut R) -> Option <String> {
let file_name_sz = {
let mut file_name_sz = [0u8; 1];
rdr.read_exact (&mut file_name_sz).ok ()?;
usize::try_from (file_name_sz [0] - 0x80 - 1).ok ()?
// loadString in PUC Lua. Doesn't work with long strings yet.
fn parse_string <R: Read> (rdr: &mut R) -> Option <String> {
let len = match parse_int (rdr)? {
0 => 0,
x => x - 1,
};
{
let mut file_name = vec! [0u8; file_name_sz];
rdr.read_exact (&mut file_name).ok ()?;
Some (String::from_utf8 (file_name).ok ()?)
}
let mut buf = vec! [0u8; len as usize];
rdr.read_exact (&mut buf).ok ()?;
Some (String::from_utf8 (buf).ok ()?)
}
fn parse_header (buf: [u8; 6]) -> Option <Header> {
if buf [0] & 0x80 != 0x80 {
// Not a function header
return None;
}
Some (Header {
inst_count: buf [5] - 0x80,
})
}
// loadByte in PUC Lua
// I don't know what this really is, so I'm calling it a trailer for now
// It appears in luac files after the string table / constants table
// for each function.
#[derive (Debug, PartialEq)]
struct Trailer {
upvalue_count: u8,
}
fn parse_trailer (buf: [u8; 6]) -> Option <Trailer> {
Some (Trailer {
upvalue_count: buf [0] - 0x80,
})
}
pub fn parse_block <R: Read> (rdr: &mut R) -> Option <Block>
fn parse_byte <R: Read> (rdr: &mut R) -> Option <u8>
{
let header = {
let mut buf = [0u8; 6];
rdr.read_exact (&mut buf).ok ()?;
parse_header (buf)?
};
let mut buf = [0u8; 1];
rdr.read_exact (&mut buf).ok ()?;
Some (buf [0])
}
fn parse_int <R: Read> (rdr: &mut R) -> Option <u32>
{
Some ((parse_byte (rdr)? - 0x80) as u32)
}
// I'm doing this recursively so it's easy to match with the PUC Lua
// code, but I don't like recursion in general, and I don't know
// why PUC wrote it that way.
pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>)
-> Option <()>
{
// Ignore things I haven't implemented yet
let mut instructions = Vec::with_capacity (header.inst_count as usize);
parse_string (rdr)?; // function name
parse_int (rdr)?; // start line in source code
parse_int (rdr)?; // last line in source code
parse_byte (rdr)?; // num params
parse_byte (rdr)?; // is_vararg
parse_byte (rdr)?; // maxstacksize, might be same as num slots?
for _ in 0..header.inst_count {
let inst_count = parse_int (rdr)?;
let mut instructions = Vec::with_capacity (inst_count as usize);
for _ in 0..inst_count {
let mut buf = [0u8; 4];
rdr.read_exact (&mut buf).ok ()?;
instructions.push (parse_inst (buf).expect (&format! ("{buf:?}")));
}
let constant_count = {
let mut buf = [0u8; 1];
rdr.read_exact (&mut buf).ok ()?;
buf [0] - 0x80
};
let constant_count = parse_int (rdr)?;
let mut constants = Vec::with_capacity (constant_count as usize);
for _ in 0..constant_count {
let mut buf = [0u8; 2];
rdr.read_exact (&mut buf).ok ()?;
let const_type = parse_byte (rdr)?;
assert_eq! (const_type, 0x04);
let len = ((buf [0] as u32) << 8) + (buf [1] as u32) - 0x0481;
let mut s = vec! [0u8; len.try_into().ok ()?];
rdr.read_exact (&mut s).ok ()?;
let s = String::from_utf8 (s).ok ()?;
let s = parse_string (rdr)?;
constants.push (s.into ());
}
let trailer = {
let mut buf = [0u8; 6];
rdr.read_exact (&mut buf).ok ()?;
parse_trailer (buf)?
};
let upvalue_count = parse_int (rdr)? as usize;
Some (Block {
for _ in 0..upvalue_count {
// Just ignore these
for _ in 0..3 {
parse_byte (rdr)?;
}
}
blocks.push (Block {
constants,
instructions,
upvalue_count: trailer.upvalue_count as usize,
})
upvalue_count,
});
// Recursion
// Subfunctions. PUC calls them protos.
let protos_count = parse_int (rdr)?;
for _ in 0..protos_count {
parse_block (rdr, blocks)?;
}
// Skip over debug stuff
// I think this is delta line numbers, e.g. most instructions
// have 0, but when you go to a new source line it's 1+.
let lineinfo_count = parse_int (rdr)?;
for _ in 0..lineinfo_count {
parse_byte (rdr)?;
}
// Absolute line info, didn't see that in my test files
let abslineinfo_count = parse_int (rdr)?;
assert_eq! (abslineinfo_count, 0);
let local_count = parse_int (rdr)?;
for _ in 0..local_count {
parse_string(rdr)?;
parse_int (rdr)?;
parse_int (rdr)?;
}
let upvalue_count = parse_int (rdr)?;
for _ in 0..upvalue_count {
parse_string (rdr)?;
}
Some (())
}
@ -148,16 +177,13 @@ pub fn parse_chunk <R: Read> (rdr: &mut R) -> Option <Chunk> {
let mut hdr = [0u8; 32];
rdr.read_exact (&mut hdr).ok ()?;
let file_name = parse_file_name (rdr)?;
let mut blocks = vec![];
while let Some (block) = parse_block (rdr) {
blocks.push (block);
while let Some (_) = parse_block (rdr, &mut blocks) {
//
}
Some (Chunk {
file_name,
blocks,
})
}
@ -206,36 +232,33 @@ mod tests {
}
#[test]
fn parse_header () {
for (input, expected) in [
// Bytes 0 and 1 are first line and last line for debugging
// Byte 2 is numparams
// Byte 3 is is_vararg
// Byte 4 is slot count / max stack size
// Byte 5 is instruction count
fn parse_nested_functions () {
use std::io::Read;
let bytecode = include_bytes! ("../test_vectors/functions.luac");
{
let mut rdr = std::io::Cursor::new (bytecode.clone ());
([0x80, 0x80, 0x00, 0x01, 0x04, 0x92], (18,)),
([0x81, 0x89, 0x00, 0x00, 0x03, 0x87], (7,)),
([0x85, 0x88, 0x00, 0x00, 0x02, 0x86], (6,)),
] {
let actual = super::parse_header (input).unwrap ();
assert_eq! (actual, super::Header {
inst_count: expected.0,
});
let mut buf = [0u8; 32];
rdr.read_exact (&mut buf).unwrap ();
let mut blocks = vec! [];
super::parse_block (&mut rdr, &mut blocks).unwrap ();
assert_eq! (blocks [0].instructions.len (), 15);
assert_eq! (blocks [1].instructions.len (), 6);
assert_eq! (blocks [2].instructions.len (), 4);
assert_eq! (blocks [3].instructions.len (), 4);
assert_eq! (blocks [4].instructions.len (), 4);
}
}
#[test]
fn parse_trailer () {
for (input, expected) in [
([0x81, 0x01, 0x00, 0x00, 0x81, 0x80], (1,)),
([0x81, 0x00, 0x00, 0x00, 0x81, 0x80], (1,)),
([0x82, 0x00, 0x00, 0x00, 0x01, 0x00], (2,)),
] {
let actual = super::parse_trailer (input).unwrap ();
assert_eq! (actual, super::Trailer {
upvalue_count: expected.0,
});
if false {
let mut rdr = std::io::Cursor::new (bytecode.clone ());
let file = crate::loader::parse_chunk (&mut rdr).unwrap ();
assert_eq! (file.blocks.len (), 5);
}
}
}

View File

@ -48,7 +48,9 @@ pub enum Instruction {
// Return just one register
Return1 (u8),
TailCall (u8, u8, u8),
SetTabUp (u8, u8, u8),
TailCall (u8, u8, u8, bool),
Test (u8, i32),
@ -136,7 +138,6 @@ pub struct Block {
}
pub struct Chunk {
pub file_name: String,
pub blocks: Vec <Block>,
}

View File

@ -3,7 +3,6 @@ use crate::state::{
Chunk,
Instruction as Inst,
State,
Value,
};
#[test]
@ -23,7 +22,6 @@ fn bools () {
*/
let chunk = Chunk {
file_name: "".to_string (),
blocks: vec! [
Block {
instructions: vec! [
@ -136,7 +134,6 @@ fn floats () {
upvalue_count: 1,
};
let chunk = Chunk {
file_name: "".to_string (),
blocks: vec! [block],
};

View File

@ -0,0 +1,21 @@
print "p_1"
function aa ()
print "p_2"
function bb ()
print "p_3"
end
end
print "p_4"
local function cc ()
print "p_5"
end
local dd = function ()
print "p_6"
end
print "p_7"

BIN
test_vectors/functions.luac Normal file

Binary file not shown.

View File

@ -0,0 +1,84 @@
main <test_vectors/functions.lua:0,0> (15 instructions at 0x559bfdbb1c90)
0+ params, 4 slots, 1 upvalue, 2 locals, 5 constants, 3 functions
1 [1] VARARGPREP 0
2 [1] GETTABUP 0 0 0 ; _ENV "print"
3 [1] LOADK 1 1 ; "p_1"
4 [1] CALL 0 2 1 ; 1 in 0 out
5 [9] CLOSURE 0 0 ; 0x559bfdbb1f40
6 [3] SETTABUP 0 2 0 ; _ENV "aa"
7 [11] GETTABUP 0 0 0 ; _ENV "print"
8 [11] LOADK 1 3 ; "p_4"
9 [11] CALL 0 2 1 ; 1 in 0 out
10 [15] CLOSURE 0 1 ; 0x559bfdbb2240
11 [19] CLOSURE 1 2 ; 0x559bfdbb2390
12 [21] GETTABUP 2 0 0 ; _ENV "print"
13 [21] LOADK 3 4 ; "p_7"
14 [21] CALL 2 2 1 ; 1 in 0 out
15 [21] RETURN 2 1 1 ; 0 out
constants (5) for 0x559bfdbb1c90:
0 S "print"
1 S "p_1"
2 S "aa"
3 S "p_4"
4 S "p_7"
locals (2) for 0x559bfdbb1c90:
0 cc 11 16
1 dd 12 16
upvalues (1) for 0x559bfdbb1c90:
0 _ENV 1 0
function <test_vectors/functions.lua:3,9> (6 instructions at 0x559bfdbb1f40)
0 params, 2 slots, 1 upvalue, 0 locals, 3 constants, 1 function
1 [4] GETTABUP 0 0 0 ; _ENV "print"
2 [4] LOADK 1 1 ; "p_2"
3 [4] CALL 0 2 1 ; 1 in 0 out
4 [8] CLOSURE 0 0 ; 0x559bfdbb20d0
5 [6] SETTABUP 0 2 0 ; _ENV "bb"
6 [9] RETURN0
constants (3) for 0x559bfdbb1f40:
0 S "print"
1 S "p_2"
2 S "bb"
locals (0) for 0x559bfdbb1f40:
upvalues (1) for 0x559bfdbb1f40:
0 _ENV 0 0
function <test_vectors/functions.lua:6,8> (4 instructions at 0x559bfdbb20d0)
0 params, 2 slots, 1 upvalue, 0 locals, 2 constants, 0 functions
1 [7] GETTABUP 0 0 0 ; _ENV "print"
2 [7] LOADK 1 1 ; "p_3"
3 [7] CALL 0 2 1 ; 1 in 0 out
4 [8] RETURN0
constants (2) for 0x559bfdbb20d0:
0 S "print"
1 S "p_3"
locals (0) for 0x559bfdbb20d0:
upvalues (1) for 0x559bfdbb20d0:
0 _ENV 0 0
function <test_vectors/functions.lua:13,15> (4 instructions at 0x559bfdbb2240)
0 params, 2 slots, 1 upvalue, 0 locals, 2 constants, 0 functions
1 [14] GETTABUP 0 0 0 ; _ENV "print"
2 [14] LOADK 1 1 ; "p_5"
3 [14] CALL 0 2 1 ; 1 in 0 out
4 [15] RETURN0
constants (2) for 0x559bfdbb2240:
0 S "print"
1 S "p_5"
locals (0) for 0x559bfdbb2240:
upvalues (1) for 0x559bfdbb2240:
0 _ENV 0 0
function <test_vectors/functions.lua:17,19> (4 instructions at 0x559bfdbb2390)
0 params, 2 slots, 1 upvalue, 0 locals, 2 constants, 0 functions
1 [18] GETTABUP 0 0 0 ; _ENV "print"
2 [18] LOADK 1 1 ; "p_6"
3 [18] CALL 0 2 1 ; 1 in 0 out
4 [19] RETURN0
constants (2) for 0x559bfdbb2390:
0 S "print"
1 S "p_6"
locals (0) for 0x559bfdbb2390:
upvalues (1) for 0x559bfdbb2390:
0 _ENV 0 0