2023-09-25 01:40:28 +00:00
|
|
|
use std::io::Read;
|
|
|
|
|
|
|
|
use crate::state::{
|
|
|
|
Block,
|
2023-09-25 03:14:08 +00:00
|
|
|
Chunk,
|
2023-09-25 01:40:28 +00:00
|
|
|
Instruction as Inst,
|
|
|
|
};
|
|
|
|
|
2023-09-26 18:33:59 +00:00
|
|
|
/// Invoke `luac` as a subprocess
|
|
|
|
/// Luckily luac is single-pass, so we can just pipe in and out
|
|
|
|
|
|
|
|
pub (crate) fn compile_bytecode (source: Vec <u8>) -> Vec <u8> {
|
|
|
|
use std::{
|
|
|
|
io::Write,
|
|
|
|
process::{
|
|
|
|
Command,
|
|
|
|
Stdio,
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
|
|
|
let mut child = Command::new ("luac")
|
|
|
|
.arg ("-o") // Output to...
|
|
|
|
.arg ("-") // Standard output
|
|
|
|
.arg ("-") // Input from standard input
|
|
|
|
.stdin (Stdio::piped ())
|
|
|
|
.stdout (Stdio::piped ())
|
|
|
|
.spawn ()
|
|
|
|
.expect ("failed to execute `luac`. Is Lua installed?");
|
|
|
|
|
|
|
|
let mut stdin = child.stdin.take ().expect ("failed to get stdin");
|
|
|
|
std::thread::spawn (move || {
|
|
|
|
stdin.write_all (&source).expect ("failed to write to stdin");
|
|
|
|
});
|
|
|
|
|
|
|
|
let output = child
|
|
|
|
.wait_with_output ()
|
|
|
|
.expect ("failed to wait on child");
|
|
|
|
|
|
|
|
output.stdout.as_slice ().to_vec ()
|
|
|
|
}
|
|
|
|
|
2023-09-25 01:40:28 +00:00
|
|
|
pub fn parse_inst (buf: [u8; 4]) -> Option <Inst>
|
|
|
|
{
|
|
|
|
let opcode = buf [0] & 0x7f;
|
|
|
|
|
|
|
|
let a = (buf [0] >> 7) | ((buf [1] & 0x7f) << 1);
|
|
|
|
let b = buf [2];
|
|
|
|
let c = buf [3];
|
|
|
|
let bx =
|
|
|
|
(((buf [1] >> 7) as u32) << 0) |
|
|
|
|
((buf [2] as u32) << 1) |
|
|
|
|
((buf [3] as u32) << 9);
|
|
|
|
let bx = bx.try_into().ok ()?;
|
|
|
|
let sbx = bx - 65535;
|
|
|
|
let k = (buf [1] & 0x80) >> 7 == 1;
|
2023-09-25 05:23:53 +00:00
|
|
|
let s_j = a as i32 + ((b as i32) << 8) + 1;
|
2023-09-25 01:40:28 +00:00
|
|
|
|
|
|
|
Some (match opcode {
|
|
|
|
0x00 => Inst::Move (a, b),
|
|
|
|
0x01 => Inst::LoadI (a, sbx),
|
2023-09-26 18:46:01 +00:00
|
|
|
0x02 => Inst::LoadF (a, sbx),
|
2023-09-25 01:40:28 +00:00
|
|
|
0x03 => Inst::LoadK (a, bx),
|
2023-09-26 18:46:01 +00:00
|
|
|
0x05 => Inst::LoadFalse (a),
|
|
|
|
0x07 => Inst::LoadTrue (a),
|
|
|
|
0x08 => Inst::LoadNil (a),
|
2023-09-25 01:40:28 +00:00
|
|
|
0x09 => Inst::GetUpVal (a, b),
|
2023-09-25 05:23:53 +00:00
|
|
|
0x0b => Inst::GetTabUp (a, b, c),
|
|
|
|
0x0d => Inst::GetI (a, b, c),
|
2023-09-25 08:05:00 +00:00
|
|
|
0x0f => Inst::SetTabUp (a, b, c),
|
2023-09-25 05:23:53 +00:00
|
|
|
0x22 => Inst::Add (a, b, c),
|
2023-09-25 06:57:57 +00:00
|
|
|
0x24 => Inst::Mul (a, b, c),
|
2023-09-25 05:23:53 +00:00
|
|
|
0x2e => Inst::MmBin (a, b, c),
|
|
|
|
0x3c => Inst::EqK (a, b, c),
|
|
|
|
0x38 => Inst::Jmp (s_j),
|
2023-09-25 01:40:28 +00:00
|
|
|
0x44 => Inst::Call (a, b, c),
|
2023-09-25 08:05:00 +00:00
|
|
|
0x45 => Inst::TailCall (a, b, c, k),
|
2023-09-25 01:40:28 +00:00
|
|
|
0x46 => Inst::Return (a, b, c, k),
|
|
|
|
0x47 => Inst::Return0,
|
|
|
|
0x48 => Inst::Return1 (a),
|
2023-09-25 05:23:53 +00:00
|
|
|
0x4f => Inst::Closure (a, bx),
|
|
|
|
0x51 => Inst::VarArgPrep (a.into ()),
|
2023-09-25 01:40:28 +00:00
|
|
|
_ => return None,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive (Debug, PartialEq)]
|
|
|
|
struct Header {
|
|
|
|
inst_count: u8,
|
|
|
|
}
|
|
|
|
|
2023-09-25 08:05:00 +00:00
|
|
|
// loadString in PUC Lua. Doesn't work with long strings yet.
|
2023-09-25 01:40:28 +00:00
|
|
|
|
2023-09-25 08:05:00 +00:00
|
|
|
fn parse_string <R: Read> (rdr: &mut R) -> Option <String> {
|
|
|
|
let len = match parse_int (rdr)? {
|
|
|
|
0 => 0,
|
|
|
|
x => x - 1,
|
|
|
|
};
|
2023-09-25 01:40:28 +00:00
|
|
|
|
2023-09-25 08:05:00 +00:00
|
|
|
let mut buf = vec! [0u8; len as usize];
|
|
|
|
rdr.read_exact (&mut buf).ok ()?;
|
|
|
|
Some (String::from_utf8 (buf).ok ()?)
|
2023-09-25 01:40:28 +00:00
|
|
|
}
|
|
|
|
|
2023-09-25 08:05:00 +00:00
|
|
|
// loadByte in PUC Lua
|
2023-09-25 01:40:28 +00:00
|
|
|
|
2023-09-25 08:05:00 +00:00
|
|
|
fn parse_byte <R: Read> (rdr: &mut R) -> Option <u8>
|
|
|
|
{
|
|
|
|
let mut buf = [0u8; 1];
|
|
|
|
rdr.read_exact (&mut buf).ok ()?;
|
|
|
|
Some (buf [0])
|
2023-09-25 01:40:28 +00:00
|
|
|
}
|
|
|
|
|
2023-09-25 08:05:00 +00:00
|
|
|
fn parse_int <R: Read> (rdr: &mut R) -> Option <u32>
|
|
|
|
{
|
|
|
|
Some ((parse_byte (rdr)? - 0x80) as u32)
|
2023-09-25 01:40:28 +00:00
|
|
|
}
|
|
|
|
|
2023-09-25 08:05:00 +00:00
|
|
|
// I'm doing this recursively so it's easy to match with the PUC Lua
|
|
|
|
// code, but I don't like recursion in general, and I don't know
|
|
|
|
// why PUC wrote it that way.
|
|
|
|
|
|
|
|
pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>)
|
|
|
|
-> Option <()>
|
2023-09-25 01:40:28 +00:00
|
|
|
{
|
2023-09-25 08:05:00 +00:00
|
|
|
// Ignore things I haven't implemented yet
|
2023-09-25 01:40:28 +00:00
|
|
|
|
2023-09-25 08:05:00 +00:00
|
|
|
parse_string (rdr)?; // function name
|
|
|
|
parse_int (rdr)?; // start line in source code
|
|
|
|
parse_int (rdr)?; // last line in source code
|
|
|
|
parse_byte (rdr)?; // num params
|
|
|
|
parse_byte (rdr)?; // is_vararg
|
|
|
|
parse_byte (rdr)?; // maxstacksize, might be same as num slots?
|
2023-09-25 01:40:28 +00:00
|
|
|
|
2023-09-25 08:05:00 +00:00
|
|
|
let inst_count = parse_int (rdr)?;
|
|
|
|
let mut instructions = Vec::with_capacity (inst_count as usize);
|
|
|
|
|
|
|
|
for _ in 0..inst_count {
|
2023-09-25 01:40:28 +00:00
|
|
|
let mut buf = [0u8; 4];
|
|
|
|
rdr.read_exact (&mut buf).ok ()?;
|
2023-09-25 05:23:53 +00:00
|
|
|
instructions.push (parse_inst (buf).expect (&format! ("{buf:?}")));
|
2023-09-25 01:40:28 +00:00
|
|
|
}
|
|
|
|
|
2023-09-25 08:05:00 +00:00
|
|
|
let constant_count = parse_int (rdr)?;
|
2023-09-25 01:40:28 +00:00
|
|
|
|
|
|
|
let mut constants = Vec::with_capacity (constant_count as usize);
|
|
|
|
|
|
|
|
for _ in 0..constant_count {
|
2023-09-25 08:05:00 +00:00
|
|
|
let const_type = parse_byte (rdr)?;
|
|
|
|
assert_eq! (const_type, 0x04);
|
2023-09-25 01:40:28 +00:00
|
|
|
|
2023-09-25 08:05:00 +00:00
|
|
|
let s = parse_string (rdr)?;
|
2023-09-25 01:40:28 +00:00
|
|
|
constants.push (s.into ());
|
|
|
|
}
|
|
|
|
|
2023-09-25 08:05:00 +00:00
|
|
|
let upvalue_count = parse_int (rdr)? as usize;
|
|
|
|
|
|
|
|
for _ in 0..upvalue_count {
|
|
|
|
// Just ignore these
|
2023-09-25 01:40:28 +00:00
|
|
|
|
2023-09-25 08:05:00 +00:00
|
|
|
for _ in 0..3 {
|
|
|
|
parse_byte (rdr)?;
|
|
|
|
}
|
|
|
|
}
|
2023-09-25 01:40:28 +00:00
|
|
|
|
2023-09-25 08:05:00 +00:00
|
|
|
blocks.push (Block {
|
2023-09-25 01:40:28 +00:00
|
|
|
constants,
|
|
|
|
instructions,
|
2023-09-25 08:05:00 +00:00
|
|
|
upvalue_count,
|
|
|
|
});
|
|
|
|
|
|
|
|
// Recursion
|
|
|
|
|
|
|
|
// Subfunctions. PUC calls them protos.
|
|
|
|
let protos_count = parse_int (rdr)?;
|
|
|
|
for _ in 0..protos_count {
|
|
|
|
parse_block (rdr, blocks)?;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Skip over debug stuff
|
|
|
|
|
|
|
|
// I think this is delta line numbers, e.g. most instructions
|
|
|
|
// have 0, but when you go to a new source line it's 1+.
|
|
|
|
|
|
|
|
let lineinfo_count = parse_int (rdr)?;
|
|
|
|
for _ in 0..lineinfo_count {
|
|
|
|
parse_byte (rdr)?;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Absolute line info, didn't see that in my test files
|
|
|
|
|
|
|
|
let abslineinfo_count = parse_int (rdr)?;
|
|
|
|
assert_eq! (abslineinfo_count, 0);
|
|
|
|
|
|
|
|
let local_count = parse_int (rdr)?;
|
|
|
|
for _ in 0..local_count {
|
|
|
|
parse_string(rdr)?;
|
|
|
|
parse_int (rdr)?;
|
|
|
|
parse_int (rdr)?;
|
|
|
|
}
|
|
|
|
|
|
|
|
let upvalue_count = parse_int (rdr)?;
|
|
|
|
for _ in 0..upvalue_count {
|
|
|
|
parse_string (rdr)?;
|
|
|
|
}
|
|
|
|
|
|
|
|
Some (())
|
2023-09-25 01:40:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2023-09-25 03:14:08 +00:00
|
|
|
pub fn parse_chunk <R: Read> (rdr: &mut R) -> Option <Chunk> {
|
2023-09-25 01:40:28 +00:00
|
|
|
// Discard 32 bytes from the start of the file.
|
|
|
|
// This is magic number, version number, etc.
|
|
|
|
|
|
|
|
let mut hdr = [0u8; 32];
|
|
|
|
rdr.read_exact (&mut hdr).ok ()?;
|
|
|
|
|
|
|
|
let mut blocks = vec![];
|
|
|
|
|
2023-09-25 08:05:00 +00:00
|
|
|
while let Some (_) = parse_block (rdr, &mut blocks) {
|
|
|
|
//
|
2023-09-25 01:40:28 +00:00
|
|
|
}
|
|
|
|
|
2023-09-25 03:14:08 +00:00
|
|
|
Some (Chunk {
|
2023-09-25 01:40:28 +00:00
|
|
|
blocks,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg (test)]
|
|
|
|
mod tests {
|
|
|
|
#[test]
|
|
|
|
fn parse_inst () {
|
|
|
|
use super::Inst;
|
|
|
|
|
|
|
|
for (input, expected) in [
|
|
|
|
([0x51, 0x00, 0x00, 0x00], Inst::VarArgPrep (0)),
|
|
|
|
([0x4f, 0x00, 0x00, 0x00], Inst::Closure (0, 0)),
|
2023-09-25 05:23:53 +00:00
|
|
|
([0xcf, 0x00, 0x00, 0x00], Inst::Closure (1, 0)),
|
2023-09-25 01:40:28 +00:00
|
|
|
([0x8b, 0x00, 0x00, 0x00], Inst::GetTabUp (1, 0, 0)),
|
|
|
|
([0x03, 0x81, 0x00, 0x00], Inst::LoadK (2, 1)),
|
|
|
|
([0xc4, 0x00, 0x02, 0x01], Inst::Call (1, 2, 1)),
|
|
|
|
([0x80, 0x00, 0x00, 0x00], Inst::Move (1, 0)),
|
|
|
|
([0xc4, 0x00, 0x01, 0x02], Inst::Call (1, 1, 2)),
|
|
|
|
([0x0b, 0x01, 0x00, 0x00], Inst::GetTabUp (2, 0, 0)),
|
|
|
|
([0x83, 0x01, 0x01, 0x00], Inst::LoadK (3, 2)),
|
|
|
|
([0x44, 0x01, 0x02, 0x01], Inst::Call (2, 2, 1)),
|
|
|
|
([0x0b, 0x01, 0x00, 0x00], Inst::GetTabUp (2, 0, 0)),
|
|
|
|
([0x80, 0x01, 0x01, 0x00], Inst::Move (3, 1)),
|
|
|
|
([0xc4, 0x01, 0x01, 0x00], Inst::Call (3, 1, 0)),
|
|
|
|
([0x44, 0x01, 0x00, 0x01], Inst::Call (2, 0, 1)),
|
|
|
|
([0x0b, 0x01, 0x00, 0x00], Inst::GetTabUp (2, 0, 0)),
|
|
|
|
([0x83, 0x81, 0x01, 0x00], Inst::LoadK (3, 3)),
|
|
|
|
([0x44, 0x01, 0x02, 0x01], Inst::Call (2, 2, 1)),
|
|
|
|
([0x46, 0x01, 0x01, 0x01], Inst::Return (2, 1, 1, false)),
|
|
|
|
|
|
|
|
([0x01, 0x00, 0x02, 0x80], Inst::LoadI (0, 5)),
|
|
|
|
([0xc6, 0x80, 0x02, 0x00], Inst::Return (1, 2, 0, true)),
|
|
|
|
|
|
|
|
([0x09, 0x00, 0x01, 0x00], Inst::GetUpVal (0, 1)),
|
|
|
|
([0x48, 0x00, 0x02, 0x00], Inst::Return1 (0)),
|
|
|
|
([0x47, 0x00, 0x01, 0x00], Inst::Return0),
|
2023-09-25 05:23:53 +00:00
|
|
|
([0x8d, 0x00, 0x01, 0x01], Inst::GetI (1, 1, 1)),
|
|
|
|
([0xbc, 0x00, 0x01, 0x00], Inst::EqK (1, 1, 0)),
|
|
|
|
([0xb8, 0x02, 0x00, 0x80], Inst::Jmp (6)),
|
|
|
|
([0x38, 0x02, 0x00, 0x80], Inst::Jmp (5)),
|
2023-09-25 01:40:28 +00:00
|
|
|
] {
|
|
|
|
let actual = super::parse_inst (input).unwrap ();
|
|
|
|
assert_eq!(actual, expected);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2023-09-25 08:05:00 +00:00
|
|
|
fn parse_nested_functions () {
|
|
|
|
use std::io::Read;
|
|
|
|
|
|
|
|
let bytecode = include_bytes! ("../test_vectors/functions.luac");
|
|
|
|
|
|
|
|
{
|
|
|
|
let mut rdr = std::io::Cursor::new (bytecode.clone ());
|
2023-09-25 01:40:28 +00:00
|
|
|
|
2023-09-25 08:05:00 +00:00
|
|
|
let mut buf = [0u8; 32];
|
|
|
|
rdr.read_exact (&mut buf).unwrap ();
|
|
|
|
|
|
|
|
let mut blocks = vec! [];
|
|
|
|
|
|
|
|
super::parse_block (&mut rdr, &mut blocks).unwrap ();
|
|
|
|
|
|
|
|
assert_eq! (blocks [0].instructions.len (), 15);
|
|
|
|
assert_eq! (blocks [1].instructions.len (), 6);
|
|
|
|
assert_eq! (blocks [2].instructions.len (), 4);
|
|
|
|
assert_eq! (blocks [3].instructions.len (), 4);
|
|
|
|
assert_eq! (blocks [4].instructions.len (), 4);
|
2023-09-25 01:40:28 +00:00
|
|
|
}
|
2023-09-25 08:05:00 +00:00
|
|
|
|
|
|
|
if false {
|
|
|
|
let mut rdr = std::io::Cursor::new (bytecode.clone ());
|
|
|
|
let file = crate::loader::parse_chunk (&mut rdr).unwrap ();
|
|
|
|
|
|
|
|
assert_eq! (file.blocks.len (), 5);
|
2023-09-25 01:40:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|