lunar_wave/src/loader.rs

304 lines
7.6 KiB
Rust

use std::io::Read;
use crate::state::{
Block,
Chunk,
Instruction as Inst,
};
/// Invoke `luac` as a subprocess
/// Luckily luac is single-pass, so we can just pipe in and out
pub (crate) fn compile_bytecode (source: Vec <u8>) -> Vec <u8> {
use std::{
io::Write,
process::{
Command,
Stdio,
},
};
let mut child = Command::new ("luac")
.arg ("-o") // Output to...
.arg ("-") // Standard output
.arg ("-") // Input from standard input
.stdin (Stdio::piped ())
.stdout (Stdio::piped ())
.spawn ()
.expect ("failed to execute `luac`. Is Lua installed?");
let mut stdin = child.stdin.take ().expect ("failed to get stdin");
std::thread::spawn (move || {
stdin.write_all (&source).expect ("failed to write to stdin");
});
let output = child
.wait_with_output ()
.expect ("failed to wait on child");
output.stdout.as_slice ().to_vec ()
}
pub fn parse_inst (buf: [u8; 4]) -> Option <Inst>
{
let opcode = buf [0] & 0x7f;
let a = (buf [0] >> 7) | ((buf [1] & 0x7f) << 1);
let b = buf [2];
let c = buf [3];
let bx =
(((buf [1] >> 7) as u32) << 0) |
((buf [2] as u32) << 1) |
((buf [3] as u32) << 9);
let bx = bx.try_into().ok ()?;
let sbx = bx - 65535;
let k = (buf [1] & 0x80) >> 7 == 1;
let s_j = a as i32 + ((b as i32) << 8) + 1;
Some (match opcode {
0x00 => Inst::Move (a, b),
0x01 => Inst::LoadI (a, sbx),
0x02 => Inst::LoadF (a, sbx),
0x03 => Inst::LoadK (a, bx),
0x05 => Inst::LoadFalse (a),
0x07 => Inst::LoadTrue (a),
0x08 => Inst::LoadNil (a),
0x09 => Inst::GetUpVal (a, b),
0x0b => Inst::GetTabUp (a, b, c),
0x0d => Inst::GetI (a, b, c),
0x0f => Inst::SetTabUp (a, b, c),
0x22 => Inst::Add (a, b, c),
0x24 => Inst::Mul (a, b, c),
0x2e => Inst::MmBin (a, b, c),
0x33 => Inst::Not (a, b),
0x3c => Inst::EqK (a, b, c),
0x38 => Inst::Jmp (s_j),
0x42 => Inst::Test (a, k),
0x44 => Inst::Call (a, b, c),
0x45 => Inst::TailCall (a, b, c, k),
0x46 => Inst::Return (a, b, c, k),
0x47 => Inst::Return0,
0x48 => Inst::Return1 (a),
0x4f => Inst::Closure (a, bx),
0x51 => Inst::VarArgPrep (a.into ()),
_ => return None,
})
}
#[derive (Debug, PartialEq)]
struct Header {
inst_count: u8,
}
// loadString in PUC Lua. Doesn't work with long strings yet.
fn parse_string <R: Read> (rdr: &mut R) -> Option <String> {
let len = match parse_int (rdr)? {
0 => 0,
x => x - 1,
};
let mut buf = vec! [0u8; len as usize];
rdr.read_exact (&mut buf).ok ()?;
Some (String::from_utf8 (buf).ok ()?)
}
// loadByte in PUC Lua
fn parse_byte <R: Read> (rdr: &mut R) -> Option <u8>
{
let mut buf = [0u8; 1];
rdr.read_exact (&mut buf).ok ()?;
Some (buf [0])
}
fn parse_int <R: Read> (rdr: &mut R) -> Option <u32>
{
Some ((parse_byte (rdr)? - 0x80) as u32)
}
// I'm doing this recursively so it's easy to match with the PUC Lua
// code, but I don't like recursion in general, and I don't know
// why PUC wrote it that way.
pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>)
-> Option <()>
{
// Ignore things I haven't implemented yet
parse_string (rdr)?; // function name
parse_int (rdr)?; // start line in source code
parse_int (rdr)?; // last line in source code
parse_byte (rdr)?; // num params
parse_byte (rdr)?; // is_vararg
parse_byte (rdr)?; // maxstacksize, might be same as num slots?
let inst_count = parse_int (rdr)?;
let mut instructions = Vec::with_capacity (inst_count as usize);
for _ in 0..inst_count {
let mut buf = [0u8; 4];
rdr.read_exact (&mut buf).ok ()?;
instructions.push (parse_inst (buf).expect (&format! ("{buf:?}")));
}
let constant_count = parse_int (rdr)?;
let mut constants = Vec::with_capacity (constant_count as usize);
for _ in 0..constant_count {
let const_type = parse_byte (rdr)?;
assert_eq! (const_type, 0x04);
let s = parse_string (rdr)?;
constants.push (s.into ());
}
let upvalue_count = parse_int (rdr)? as usize;
for _ in 0..upvalue_count {
// Just ignore these
for _ in 0..3 {
parse_byte (rdr)?;
}
}
blocks.push (Block {
constants,
instructions,
upvalue_count,
});
// Recursion
// Subfunctions. PUC calls them protos.
let protos_count = parse_int (rdr)?;
for _ in 0..protos_count {
parse_block (rdr, blocks)?;
}
// Skip over debug stuff
// I think this is delta line numbers, e.g. most instructions
// have 0, but when you go to a new source line it's 1+.
let lineinfo_count = parse_int (rdr)?;
for _ in 0..lineinfo_count {
parse_byte (rdr)?;
}
// Absolute line info, didn't see that in my test files
let abslineinfo_count = parse_int (rdr)?;
assert_eq! (abslineinfo_count, 0);
let local_count = parse_int (rdr)?;
for _ in 0..local_count {
parse_string(rdr)?;
parse_int (rdr)?;
parse_int (rdr)?;
}
let upvalue_count = parse_int (rdr)?;
for _ in 0..upvalue_count {
parse_string (rdr)?;
}
Some (())
}
pub fn parse_chunk <R: Read> (rdr: &mut R) -> Option <Chunk> {
// Discard 32 bytes from the start of the file.
// This is magic number, version number, etc.
let mut hdr = [0u8; 32];
rdr.read_exact (&mut hdr).ok ()?;
let mut blocks = vec![];
while let Some (_) = parse_block (rdr, &mut blocks) {
//
}
Some (Chunk {
blocks,
})
}
#[cfg (test)]
mod tests {
#[test]
fn parse_inst () {
use super::Inst;
for (input, expected) in [
([0x51, 0x00, 0x00, 0x00], Inst::VarArgPrep (0)),
([0x4f, 0x00, 0x00, 0x00], Inst::Closure (0, 0)),
([0xcf, 0x00, 0x00, 0x00], Inst::Closure (1, 0)),
([0x8b, 0x00, 0x00, 0x00], Inst::GetTabUp (1, 0, 0)),
([0x03, 0x81, 0x00, 0x00], Inst::LoadK (2, 1)),
([0xc4, 0x00, 0x02, 0x01], Inst::Call (1, 2, 1)),
([0x80, 0x00, 0x00, 0x00], Inst::Move (1, 0)),
([0xc4, 0x00, 0x01, 0x02], Inst::Call (1, 1, 2)),
([0x0b, 0x01, 0x00, 0x00], Inst::GetTabUp (2, 0, 0)),
([0x83, 0x01, 0x01, 0x00], Inst::LoadK (3, 2)),
([0x44, 0x01, 0x02, 0x01], Inst::Call (2, 2, 1)),
([0x0b, 0x01, 0x00, 0x00], Inst::GetTabUp (2, 0, 0)),
([0x80, 0x01, 0x01, 0x00], Inst::Move (3, 1)),
([0xc4, 0x01, 0x01, 0x00], Inst::Call (3, 1, 0)),
([0x44, 0x01, 0x00, 0x01], Inst::Call (2, 0, 1)),
([0x0b, 0x01, 0x00, 0x00], Inst::GetTabUp (2, 0, 0)),
([0x83, 0x81, 0x01, 0x00], Inst::LoadK (3, 3)),
([0x44, 0x01, 0x02, 0x01], Inst::Call (2, 2, 1)),
([0x46, 0x01, 0x01, 0x01], Inst::Return (2, 1, 1, false)),
([0x01, 0x00, 0x02, 0x80], Inst::LoadI (0, 5)),
([0xc6, 0x80, 0x02, 0x00], Inst::Return (1, 2, 0, true)),
([0x09, 0x00, 0x01, 0x00], Inst::GetUpVal (0, 1)),
([0x48, 0x00, 0x02, 0x00], Inst::Return1 (0)),
([0x47, 0x00, 0x01, 0x00], Inst::Return0),
([0x8d, 0x00, 0x01, 0x01], Inst::GetI (1, 1, 1)),
([0xbc, 0x00, 0x01, 0x00], Inst::EqK (1, 1, 0)),
([0xb8, 0x02, 0x00, 0x80], Inst::Jmp (6)),
([0x38, 0x02, 0x00, 0x80], Inst::Jmp (5)),
] {
let actual = super::parse_inst (input).unwrap ();
assert_eq!(actual, expected);
}
}
#[test]
fn parse_nested_functions () {
use std::io::Read;
let bytecode = include_bytes! ("../test_vectors/functions.luac");
{
let mut rdr = std::io::Cursor::new (bytecode.clone ());
let mut buf = [0u8; 32];
rdr.read_exact (&mut buf).unwrap ();
let mut blocks = vec! [];
super::parse_block (&mut rdr, &mut blocks).unwrap ();
assert_eq! (blocks [0].instructions.len (), 15);
assert_eq! (blocks [1].instructions.len (), 6);
assert_eq! (blocks [2].instructions.len (), 4);
assert_eq! (blocks [3].instructions.len (), 4);
assert_eq! (blocks [4].instructions.len (), 4);
}
if false {
let mut rdr = std::io::Cursor::new (bytecode.clone ());
let file = crate::loader::parse_chunk (&mut rdr).unwrap ();
assert_eq! (file.blocks.len (), 5);
}
}
}