the nested functions thing is working, though I wish I didn't have to use recursion

main
_ 2023-09-25 03:05:00 -05:00
parent 3870dc2c02
commit 0d88653c21
6 changed files with 229 additions and 103 deletions

View File

@ -29,12 +29,14 @@ pub fn parse_inst (buf: [u8; 4]) -> Option <Inst>
0x09 => Inst::GetUpVal (a, b), 0x09 => Inst::GetUpVal (a, b),
0x0b => Inst::GetTabUp (a, b, c), 0x0b => Inst::GetTabUp (a, b, c),
0x0d => Inst::GetI (a, b, c), 0x0d => Inst::GetI (a, b, c),
0x0f => Inst::SetTabUp (a, b, c),
0x22 => Inst::Add (a, b, c), 0x22 => Inst::Add (a, b, c),
0x24 => Inst::Mul (a, b, c), 0x24 => Inst::Mul (a, b, c),
0x2e => Inst::MmBin (a, b, c), 0x2e => Inst::MmBin (a, b, c),
0x3c => Inst::EqK (a, b, c), 0x3c => Inst::EqK (a, b, c),
0x38 => Inst::Jmp (s_j), 0x38 => Inst::Jmp (s_j),
0x44 => Inst::Call (a, b, c), 0x44 => Inst::Call (a, b, c),
0x45 => Inst::TailCall (a, b, c, k),
0x46 => Inst::Return (a, b, c, k), 0x46 => Inst::Return (a, b, c, k),
0x47 => Inst::Return0, 0x47 => Inst::Return0,
0x48 => Inst::Return1 (a), 0x48 => Inst::Return1 (a),
@ -49,95 +51,122 @@ struct Header {
inst_count: u8, inst_count: u8,
} }
fn parse_file_name <R: Read> (rdr: &mut R) -> Option <String> { // loadString in PUC Lua. Doesn't work with long strings yet.
let file_name_sz = {
let mut file_name_sz = [0u8; 1]; fn parse_string <R: Read> (rdr: &mut R) -> Option <String> {
rdr.read_exact (&mut file_name_sz).ok ()?; let len = match parse_int (rdr)? {
usize::try_from (file_name_sz [0] - 0x80 - 1).ok ()? 0 => 0,
x => x - 1,
}; };
{ let mut buf = vec! [0u8; len as usize];
let mut file_name = vec! [0u8; file_name_sz]; rdr.read_exact (&mut buf).ok ()?;
rdr.read_exact (&mut file_name).ok ()?; Some (String::from_utf8 (buf).ok ()?)
Some (String::from_utf8 (file_name).ok ()?)
}
} }
fn parse_header (buf: [u8; 6]) -> Option <Header> { // loadByte in PUC Lua
if buf [0] & 0x80 != 0x80 {
// Not a function header
return None;
}
Some (Header {
inst_count: buf [5] - 0x80,
})
}
// I don't know what this really is, so I'm calling it a trailer for now fn parse_byte <R: Read> (rdr: &mut R) -> Option <u8>
// It appears in luac files after the string table / constants table
// for each function.
#[derive (Debug, PartialEq)]
struct Trailer {
upvalue_count: u8,
}
fn parse_trailer (buf: [u8; 6]) -> Option <Trailer> {
Some (Trailer {
upvalue_count: buf [0] - 0x80,
})
}
pub fn parse_block <R: Read> (rdr: &mut R) -> Option <Block>
{ {
let header = { let mut buf = [0u8; 1];
let mut buf = [0u8; 6]; rdr.read_exact (&mut buf).ok ()?;
rdr.read_exact (&mut buf).ok ()?; Some (buf [0])
parse_header (buf)? }
};
fn parse_int <R: Read> (rdr: &mut R) -> Option <u32>
{
Some ((parse_byte (rdr)? - 0x80) as u32)
}
// I'm doing this recursively so it's easy to match with the PUC Lua
// code, but I don't like recursion in general, and I don't know
// why PUC wrote it that way.
pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>)
-> Option <()>
{
// Ignore things I haven't implemented yet
let mut instructions = Vec::with_capacity (header.inst_count as usize); parse_string (rdr)?; // function name
parse_int (rdr)?; // start line in source code
parse_int (rdr)?; // last line in source code
parse_byte (rdr)?; // num params
parse_byte (rdr)?; // is_vararg
parse_byte (rdr)?; // maxstacksize, might be same as num slots?
for _ in 0..header.inst_count { let inst_count = parse_int (rdr)?;
let mut instructions = Vec::with_capacity (inst_count as usize);
for _ in 0..inst_count {
let mut buf = [0u8; 4]; let mut buf = [0u8; 4];
rdr.read_exact (&mut buf).ok ()?; rdr.read_exact (&mut buf).ok ()?;
instructions.push (parse_inst (buf).expect (&format! ("{buf:?}"))); instructions.push (parse_inst (buf).expect (&format! ("{buf:?}")));
} }
let constant_count = { let constant_count = parse_int (rdr)?;
let mut buf = [0u8; 1];
rdr.read_exact (&mut buf).ok ()?;
buf [0] - 0x80
};
let mut constants = Vec::with_capacity (constant_count as usize); let mut constants = Vec::with_capacity (constant_count as usize);
for _ in 0..constant_count { for _ in 0..constant_count {
let mut buf = [0u8; 2]; let const_type = parse_byte (rdr)?;
rdr.read_exact (&mut buf).ok ()?; assert_eq! (const_type, 0x04);
let len = ((buf [0] as u32) << 8) + (buf [1] as u32) - 0x0481; let s = parse_string (rdr)?;
let mut s = vec! [0u8; len.try_into().ok ()?];
rdr.read_exact (&mut s).ok ()?;
let s = String::from_utf8 (s).ok ()?;
constants.push (s.into ()); constants.push (s.into ());
} }
let trailer = { let upvalue_count = parse_int (rdr)? as usize;
let mut buf = [0u8; 6];
rdr.read_exact (&mut buf).ok ()?;
parse_trailer (buf)?
};
Some (Block { for _ in 0..upvalue_count {
// Just ignore these
for _ in 0..3 {
parse_byte (rdr)?;
}
}
blocks.push (Block {
constants, constants,
instructions, instructions,
upvalue_count: trailer.upvalue_count as usize, upvalue_count,
}) });
// Recursion
// Subfunctions. PUC calls them protos.
let protos_count = parse_int (rdr)?;
for _ in 0..protos_count {
parse_block (rdr, blocks)?;
}
// Skip over debug stuff
// I think this is delta line numbers, e.g. most instructions
// have 0, but when you go to a new source line it's 1+.
let lineinfo_count = parse_int (rdr)?;
for _ in 0..lineinfo_count {
parse_byte (rdr)?;
}
// Absolute line info, didn't see that in my test files
let abslineinfo_count = parse_int (rdr)?;
assert_eq! (abslineinfo_count, 0);
let local_count = parse_int (rdr)?;
for _ in 0..local_count {
parse_string(rdr)?;
parse_int (rdr)?;
parse_int (rdr)?;
}
let upvalue_count = parse_int (rdr)?;
for _ in 0..upvalue_count {
parse_string (rdr)?;
}
Some (())
} }
@ -148,16 +177,13 @@ pub fn parse_chunk <R: Read> (rdr: &mut R) -> Option <Chunk> {
let mut hdr = [0u8; 32]; let mut hdr = [0u8; 32];
rdr.read_exact (&mut hdr).ok ()?; rdr.read_exact (&mut hdr).ok ()?;
let file_name = parse_file_name (rdr)?;
let mut blocks = vec![]; let mut blocks = vec![];
while let Some (block) = parse_block (rdr) { while let Some (_) = parse_block (rdr, &mut blocks) {
blocks.push (block); //
} }
Some (Chunk { Some (Chunk {
file_name,
blocks, blocks,
}) })
} }
@ -206,36 +232,33 @@ mod tests {
} }
#[test] #[test]
fn parse_header () { fn parse_nested_functions () {
for (input, expected) in [ use std::io::Read;
// Bytes 0 and 1 are first line and last line for debugging
// Byte 2 is numparams let bytecode = include_bytes! ("../test_vectors/functions.luac");
// Byte 3 is is_vararg
// Byte 4 is slot count / max stack size {
// Byte 5 is instruction count let mut rdr = std::io::Cursor::new (bytecode.clone ());
([0x80, 0x80, 0x00, 0x01, 0x04, 0x92], (18,)), let mut buf = [0u8; 32];
([0x81, 0x89, 0x00, 0x00, 0x03, 0x87], (7,)), rdr.read_exact (&mut buf).unwrap ();
([0x85, 0x88, 0x00, 0x00, 0x02, 0x86], (6,)),
] { let mut blocks = vec! [];
let actual = super::parse_header (input).unwrap ();
assert_eq! (actual, super::Header { super::parse_block (&mut rdr, &mut blocks).unwrap ();
inst_count: expected.0,
}); assert_eq! (blocks [0].instructions.len (), 15);
assert_eq! (blocks [1].instructions.len (), 6);
assert_eq! (blocks [2].instructions.len (), 4);
assert_eq! (blocks [3].instructions.len (), 4);
assert_eq! (blocks [4].instructions.len (), 4);
} }
}
if false {
#[test] let mut rdr = std::io::Cursor::new (bytecode.clone ());
fn parse_trailer () { let file = crate::loader::parse_chunk (&mut rdr).unwrap ();
for (input, expected) in [
([0x81, 0x01, 0x00, 0x00, 0x81, 0x80], (1,)), assert_eq! (file.blocks.len (), 5);
([0x81, 0x00, 0x00, 0x00, 0x81, 0x80], (1,)),
([0x82, 0x00, 0x00, 0x00, 0x01, 0x00], (2,)),
] {
let actual = super::parse_trailer (input).unwrap ();
assert_eq! (actual, super::Trailer {
upvalue_count: expected.0,
});
} }
} }
} }

View File

@ -48,7 +48,9 @@ pub enum Instruction {
// Return just one register // Return just one register
Return1 (u8), Return1 (u8),
TailCall (u8, u8, u8), SetTabUp (u8, u8, u8),
TailCall (u8, u8, u8, bool),
Test (u8, i32), Test (u8, i32),
@ -136,7 +138,6 @@ pub struct Block {
} }
pub struct Chunk { pub struct Chunk {
pub file_name: String,
pub blocks: Vec <Block>, pub blocks: Vec <Block>,
} }

View File

@ -3,7 +3,6 @@ use crate::state::{
Chunk, Chunk,
Instruction as Inst, Instruction as Inst,
State, State,
Value,
}; };
#[test] #[test]
@ -23,7 +22,6 @@ fn bools () {
*/ */
let chunk = Chunk { let chunk = Chunk {
file_name: "".to_string (),
blocks: vec! [ blocks: vec! [
Block { Block {
instructions: vec! [ instructions: vec! [
@ -136,7 +134,6 @@ fn floats () {
upvalue_count: 1, upvalue_count: 1,
}; };
let chunk = Chunk { let chunk = Chunk {
file_name: "".to_string (),
blocks: vec! [block], blocks: vec! [block],
}; };

View File

@ -0,0 +1,21 @@
print "p_1"
function aa ()
print "p_2"
function bb ()
print "p_3"
end
end
print "p_4"
local function cc ()
print "p_5"
end
local dd = function ()
print "p_6"
end
print "p_7"

BIN
test_vectors/functions.luac Normal file

Binary file not shown.

View File

@ -0,0 +1,84 @@
main <test_vectors/functions.lua:0,0> (15 instructions at 0x559bfdbb1c90)
0+ params, 4 slots, 1 upvalue, 2 locals, 5 constants, 3 functions
1 [1] VARARGPREP 0
2 [1] GETTABUP 0 0 0 ; _ENV "print"
3 [1] LOADK 1 1 ; "p_1"
4 [1] CALL 0 2 1 ; 1 in 0 out
5 [9] CLOSURE 0 0 ; 0x559bfdbb1f40
6 [3] SETTABUP 0 2 0 ; _ENV "aa"
7 [11] GETTABUP 0 0 0 ; _ENV "print"
8 [11] LOADK 1 3 ; "p_4"
9 [11] CALL 0 2 1 ; 1 in 0 out
10 [15] CLOSURE 0 1 ; 0x559bfdbb2240
11 [19] CLOSURE 1 2 ; 0x559bfdbb2390
12 [21] GETTABUP 2 0 0 ; _ENV "print"
13 [21] LOADK 3 4 ; "p_7"
14 [21] CALL 2 2 1 ; 1 in 0 out
15 [21] RETURN 2 1 1 ; 0 out
constants (5) for 0x559bfdbb1c90:
0 S "print"
1 S "p_1"
2 S "aa"
3 S "p_4"
4 S "p_7"
locals (2) for 0x559bfdbb1c90:
0 cc 11 16
1 dd 12 16
upvalues (1) for 0x559bfdbb1c90:
0 _ENV 1 0
function <test_vectors/functions.lua:3,9> (6 instructions at 0x559bfdbb1f40)
0 params, 2 slots, 1 upvalue, 0 locals, 3 constants, 1 function
1 [4] GETTABUP 0 0 0 ; _ENV "print"
2 [4] LOADK 1 1 ; "p_2"
3 [4] CALL 0 2 1 ; 1 in 0 out
4 [8] CLOSURE 0 0 ; 0x559bfdbb20d0
5 [6] SETTABUP 0 2 0 ; _ENV "bb"
6 [9] RETURN0
constants (3) for 0x559bfdbb1f40:
0 S "print"
1 S "p_2"
2 S "bb"
locals (0) for 0x559bfdbb1f40:
upvalues (1) for 0x559bfdbb1f40:
0 _ENV 0 0
function <test_vectors/functions.lua:6,8> (4 instructions at 0x559bfdbb20d0)
0 params, 2 slots, 1 upvalue, 0 locals, 2 constants, 0 functions
1 [7] GETTABUP 0 0 0 ; _ENV "print"
2 [7] LOADK 1 1 ; "p_3"
3 [7] CALL 0 2 1 ; 1 in 0 out
4 [8] RETURN0
constants (2) for 0x559bfdbb20d0:
0 S "print"
1 S "p_3"
locals (0) for 0x559bfdbb20d0:
upvalues (1) for 0x559bfdbb20d0:
0 _ENV 0 0
function <test_vectors/functions.lua:13,15> (4 instructions at 0x559bfdbb2240)
0 params, 2 slots, 1 upvalue, 0 locals, 2 constants, 0 functions
1 [14] GETTABUP 0 0 0 ; _ENV "print"
2 [14] LOADK 1 1 ; "p_5"
3 [14] CALL 0 2 1 ; 1 in 0 out
4 [15] RETURN0
constants (2) for 0x559bfdbb2240:
0 S "print"
1 S "p_5"
locals (0) for 0x559bfdbb2240:
upvalues (1) for 0x559bfdbb2240:
0 _ENV 0 0
function <test_vectors/functions.lua:17,19> (4 instructions at 0x559bfdbb2390)
0 params, 2 slots, 1 upvalue, 0 locals, 2 constants, 0 functions
1 [18] GETTABUP 0 0 0 ; _ENV "print"
2 [18] LOADK 1 1 ; "p_6"
3 [18] CALL 0 2 1 ; 1 in 0 out
4 [19] RETURN0
constants (2) for 0x559bfdbb2390:
0 S "print"
1 S "p_6"
locals (0) for 0x559bfdbb2390:
upvalues (1) for 0x559bfdbb2390:
0 _ENV 0 0