lunar_wave/src/loader.rs

351 lines
8.9 KiB
Rust

use std::io::Read;
use crate::{
instruction::Instruction as Inst,
state::{
Block,
Chunk,
}
};
/// Invoke `luac` as a subprocess
/// Luckily luac is single-pass, so we can just pipe in and out
///
/// `source` is a Vec because we move it to a worker thread
pub (crate) fn compile_bytecode (source: Vec <u8>) -> Vec <u8> {
use std::{
io::Write,
process::{
Command,
Stdio,
},
};
let mut child = Command::new ("luac5.4")
.arg ("-o") // Output to...
.arg ("-") // Standard output
.arg ("-") // Input from standard input
.stdin (Stdio::piped ())
.stdout (Stdio::piped ())
.spawn ()
.expect ("failed to execute `luac5.4`. Is Lua installed?");
let mut stdin = child.stdin.take ().expect ("failed to get stdin");
std::thread::spawn (move || {
stdin.write_all (&source).expect ("failed to write to stdin");
});
let output = child
.wait_with_output ()
.expect ("failed to wait on child");
output.stdout.as_slice ().to_vec ()
}
fn i_sb (buf: [u8; 4]) -> Option <i8> {
let b = buf [2];
i8::try_from (i32::try_from (b).ok ()? - 127).ok ()
}
pub fn parse_inst (buf: [u8; 4]) -> Option <Inst>
{
let opcode = buf [0] & 0x7f;
let a = (buf [0] >> 7) | ((buf [1] & 0x7f) << 1);
let b = buf [2];
let ax = a as u32 + ((b as u32) << 8);
let c = buf [3];
let bx =
(((buf [1] >> 7) as u32) << 0) |
((buf [2] as u32) << 1) |
((buf [3] as u32) << 9);
let sbx = i32::try_from (bx).ok ()? - 65535;
let k = (buf [1] & 0x80) >> 7 == 1;
let s_j = a as i32 + ((b as i32) << 8) + 1;
Some (match opcode {
0x00 => Inst::Move (a, b),
0x01 => Inst::LoadI (a, sbx),
0x02 => Inst::LoadF (a, sbx),
0x03 => Inst::LoadK (a, bx),
0x05 => Inst::LoadFalse (a),
0x07 => Inst::LoadTrue (a),
0x08 => Inst::LoadNil (a),
0x09 => Inst::GetUpVal (a, b),
0x0b => Inst::GetTabUp (a, b, c),
0x0c => Inst::GetTable (a, b, c),
0x0d => Inst::GetI (a, b, c),
0x0e => Inst::GetField (a, b, c),
0x0f => Inst::SetTabUp (a, b, c),
0x11 => Inst::SetI (a, b, c, k),
0x12 => Inst::SetField (a, b, c, k),
0x13 => Inst::NewTable (a),
0x19 => Inst::ModK (a, b, c),
0x22 => Inst::Add (a, b, c),
0x24 => Inst::Mul (a, b, c),
0x2e => Inst::MmBin (a, b, c),
0x30 => Inst::MmBinK (a, b, c, k),
0x33 => Inst::Not (a, b),
0x3c => Inst::EqK (a, b, k),
0x3d => Inst::EqI (a, i_sb (buf)?, k),
0x38 => Inst::Jmp (s_j),
0x42 => Inst::Test (a, k),
0x44 => Inst::Call (a, b, c),
0x45 => Inst::TailCall (a, b, c, k),
0x46 => Inst::Return (a, b, c, k),
0x47 => Inst::Return0,
0x48 => Inst::Return1 (a),
0x49 => Inst::ForLoop (a, bx),
0x4a => Inst::ForPrep (a, bx),
0x4e => Inst::SetList (a, b, c, k),
0x4f => Inst::Closure (a, bx),
0x51 => Inst::VarArgPrep (a.into ()),
0x52 => Inst::ExtraArg (ax),
_ => return None,
})
}
#[derive (Debug, PartialEq)]
struct Header {
inst_count: u8,
}
// loadString in PUC Lua. Doesn't work with long strings yet.
fn parse_string <R: Read> (rdr: &mut R) -> Option <String> {
let len = match parse_int (rdr)? {
0 => 0,
x => x - 1,
};
let mut buf = vec! [0u8; len as usize];
rdr.read_exact (&mut buf).ok ()?;
Some (String::from_utf8 (buf).ok ()?)
}
// loadByte in PUC Lua
fn parse_byte <R: Read> (rdr: &mut R) -> Option <u8>
{
let mut buf = [0u8; 1];
rdr.read_exact (&mut buf).ok ()?;
Some (buf [0])
}
fn parse_float <R: Read> (rdr: &mut R) -> Option <f64> {
let mut buf = [0u8; 8];
rdr.read_exact (&mut buf).ok ()?;
Some (f64::from_ne_bytes(buf))
}
fn parse_int <R: Read> (rdr: &mut R) -> Option <u32>
{
Some ((parse_byte (rdr)? - 0x80) as u32)
}
fn parse_i64 <R: Read> (rdr: &mut R) -> Option <i64> {
let mut buf = [0u8; 8];
rdr.read_exact (&mut buf).ok ()?;
Some (i64::from_ne_bytes(buf))
}
// I'm doing this recursively so it's easy to match with the PUC Lua
// code, but I don't like recursion in general, and I don't know
// why PUC wrote it that way.
pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>)
-> Option <()>
{
// Ignore things I haven't implemented yet
use crate::value::Value;
parse_string (rdr)?; // function name
parse_int (rdr)?; // start line in source code
parse_int (rdr)?; // last line in source code
parse_byte (rdr)?; // num params
parse_byte (rdr)?; // is_vararg
parse_byte (rdr)?; // maxstacksize, might be same as num slots?
let inst_count = parse_int (rdr)?;
let mut instructions = Vec::with_capacity (inst_count as usize);
for _ in 0..inst_count {
let mut buf = [0u8; 4];
rdr.read_exact (&mut buf).ok ()?;
instructions.push (parse_inst (buf).expect (&format! ("{buf:?}")));
}
let constant_count = parse_int (rdr)?;
let mut constants = Vec::with_capacity (constant_count as usize);
for i in 0..constant_count {
// LUA_TNIL and friends from `lua.h` in PUC Lua
let const_type = parse_byte (rdr)?;
let val = match const_type {
3 => Value::from (parse_i64 (rdr)?),
4 => parse_string (rdr)?.into (),
19 => Value::from (parse_float (rdr)?),
x => panic! ("Constant {} has type {}", i, x),
};
constants.push (val);
}
let upvalue_count = parse_int (rdr)? as usize;
for _ in 0..upvalue_count {
// Just ignore these
for _ in 0..3 {
parse_byte (rdr)?;
}
}
blocks.push (Block {
constants,
instructions,
upvalue_count,
});
// Recursion
// Subfunctions. PUC calls them protos.
let protos_count = parse_int (rdr)?;
for _ in 0..protos_count {
parse_block (rdr, blocks)?;
}
// Skip over debug stuff
// I think this is delta line numbers, e.g. most instructions
// have 0, but when you go to a new source line it's 1+.
let lineinfo_count = parse_int (rdr)?;
for _ in 0..lineinfo_count {
parse_byte (rdr)?;
}
// Absolute line info, didn't see that in my test files
let abslineinfo_count = parse_int (rdr)?;
assert_eq! (abslineinfo_count, 0);
let local_count = parse_int (rdr)?;
for _ in 0..local_count {
parse_string(rdr)?;
parse_int (rdr)?;
parse_int (rdr)?;
}
let upvalue_count = parse_int (rdr)?;
for _ in 0..upvalue_count {
parse_string (rdr)?;
}
Some (())
}
pub fn parse_chunk <R: Read> (rdr: &mut R) -> Option <Chunk> {
// Discard 32 bytes from the start of the file.
// This is magic number, version number, etc.
let mut hdr = [0u8; 32];
rdr.read_exact (&mut hdr).ok ()?;
let mut blocks = vec![];
while let Some (_) = parse_block (rdr, &mut blocks) {
//
}
Some (Chunk {
blocks,
})
}
pub fn parse_chunk_from_bytes (b: &[u8]) -> Option <Chunk> {
let mut rdr = std::io::Cursor::new (b);
parse_chunk (&mut rdr)
}
#[cfg (test)]
mod tests {
#[test]
fn parse_inst () {
use super::Inst;
for (input, expected) in [
([0x51, 0x00, 0x00, 0x00], Inst::VarArgPrep (0)),
([0x4f, 0x00, 0x00, 0x00], Inst::Closure (0, 0)),
([0xcf, 0x00, 0x00, 0x00], Inst::Closure (1, 0)),
([0x8b, 0x00, 0x00, 0x00], Inst::GetTabUp (1, 0, 0)),
([0x03, 0x81, 0x00, 0x00], Inst::LoadK (2, 1)),
([0xc4, 0x00, 0x02, 0x01], Inst::Call (1, 2, 1)),
([0x80, 0x00, 0x00, 0x00], Inst::Move (1, 0)),
([0xc4, 0x00, 0x01, 0x02], Inst::Call (1, 1, 2)),
([0x0b, 0x01, 0x00, 0x00], Inst::GetTabUp (2, 0, 0)),
([0x83, 0x01, 0x01, 0x00], Inst::LoadK (3, 2)),
([0x44, 0x01, 0x02, 0x01], Inst::Call (2, 2, 1)),
([0x0b, 0x01, 0x00, 0x00], Inst::GetTabUp (2, 0, 0)),
([0x80, 0x01, 0x01, 0x00], Inst::Move (3, 1)),
([0xc4, 0x01, 0x01, 0x00], Inst::Call (3, 1, 0)),
([0x44, 0x01, 0x00, 0x01], Inst::Call (2, 0, 1)),
([0x0b, 0x01, 0x00, 0x00], Inst::GetTabUp (2, 0, 0)),
([0x83, 0x81, 0x01, 0x00], Inst::LoadK (3, 3)),
([0x44, 0x01, 0x02, 0x01], Inst::Call (2, 2, 1)),
([0x46, 0x01, 0x01, 0x01], Inst::Return (2, 1, 1, false)),
([0x01, 0x00, 0x02, 0x80], Inst::LoadI (0, 5)),
([0xc6, 0x80, 0x02, 0x00], Inst::Return (1, 2, 0, true)),
([0x09, 0x00, 0x01, 0x00], Inst::GetUpVal (0, 1)),
([0x48, 0x00, 0x02, 0x00], Inst::Return1 (0)),
([0x47, 0x00, 0x01, 0x00], Inst::Return0),
([0x8d, 0x00, 0x01, 0x01], Inst::GetI (1, 1, 1)),
([0xbc, 0x00, 0x01, 0x00], Inst::EqK (1, 1, false)),
([0xb8, 0x02, 0x00, 0x80], Inst::Jmp (6)),
([0x38, 0x02, 0x00, 0x80], Inst::Jmp (5)),
([0x52, 0x00, 0x00, 0x00], Inst::ExtraArg (0)),
] {
let actual = super::parse_inst (input).unwrap ();
assert_eq!(actual, expected);
}
}
#[test]
fn parse_nested_functions () {
use std::io::Read;
let bytecode = include_bytes! ("../test_vectors/functions.luac");
{
let mut rdr = std::io::Cursor::new (bytecode.clone ());
let mut buf = [0u8; 32];
rdr.read_exact (&mut buf).unwrap ();
let mut blocks = vec! [];
super::parse_block (&mut rdr, &mut blocks).unwrap ();
assert_eq! (blocks [0].instructions.len (), 15);
assert_eq! (blocks [1].instructions.len (), 6);
assert_eq! (blocks [2].instructions.len (), 4);
assert_eq! (blocks [3].instructions.len (), 4);
assert_eq! (blocks [4].instructions.len (), 4);
}
if false {
let mut rdr = std::io::Cursor::new (bytecode.clone ());
let file = crate::loader::parse_chunk (&mut rdr).unwrap ();
assert_eq! (file.blocks.len (), 5);
}
}
}