lunar_wave/lunar_wave_vm/src/loader.rs

541 lines
13 KiB
Rust

use std::{
io::Read,
rc::Rc,
};
use crate::{
instruction::Instruction as Inst,
state::{
Block,
Chunk,
},
string_interner::Interner,
};
pub fn compile_bytecode_from_file (path: &str) -> Vec <u8> {
use std::process::{
Command,
Stdio,
};
let child = Command::new ("luac5.4")
.arg ("-o") // Output to...
.arg ("-") // Standard output
.arg (path)
.stdout (Stdio::piped ())
.spawn ()
.expect ("failed to execute `luac5.4`. Is Lua installed?");
let output = child
.wait_with_output ()
.expect ("failed to wait on child");
output.stdout.as_slice ().to_vec ()
}
#[derive (Debug, thiserror::Error)]
pub enum Error {
#[error ("compile")]
Compile (String)
}
/// Invoke `luac` as a subprocess
/// Luckily luac is single-pass, so we can just pipe in and out
///
/// `source` is a Vec because we move it to a worker thread
pub fn compile_bytecode (source: Vec <u8>) -> Result <Vec <u8>, Error> {
use std::{
io::Write,
process::{
Command,
Stdio,
},
};
let mut child = Command::new ("luac5.4")
.arg ("-o") // Output to...
.arg ("-") // Standard output
.arg ("-") // Input from standard input
.stdin (Stdio::piped ())
.stderr (Stdio::piped ())
.stdout (Stdio::piped ())
.spawn ()
.expect ("failed to execute `luac5.4`. Is Lua installed?");
let mut stdin = child.stdin.take ().expect ("failed to get stdin");
std::thread::spawn (move || {
stdin.write_all (&source).expect ("failed to write to stdin");
});
let output = child
.wait_with_output ()
.expect ("failed to wait on child");
if output.status.success () && output.status.code () == Some (0)
{
Ok (output.stdout)
}
else {
Err (Error::Compile (String::from_utf8 (output.stderr).unwrap ()))
}
}
/// Checks whether the input is already bytecode, or is possibly
/// Lua source code. If it's source code, compiles and returns bytecode.
/// If it's bytecode, just returns the input.
pub fn ensure_bytecode (buffer: Vec <u8>) -> Result <Vec <u8>, Error> {
let bytecode_header = &[0x1b, 0x4c, 0x75, 0x61, 0x54, 0x00, 0x19, 0x93];
if buffer.starts_with (bytecode_header) {
return Ok (buffer);
}
compile_bytecode (buffer)
}
fn i_sb (buf: [u8; 4]) -> Option <i8> {
let b = buf [2];
i8::try_from (i32::try_from (b).ok ()? - 127).ok ()
}
fn i_sc (buf: [u8; 4]) -> Option <i8> {
let c = buf [3];
i8::try_from (i32::try_from (c).ok ()? - 127).ok ()
}
pub trait DecodeInstruction {
fn opcode (self) -> u8;
fn a (self) -> u8;
fn ax (self) -> u32;
fn b (self) -> u8;
fn bx (self) -> u32;
fn c (self) -> u8;
fn k (self) -> bool;
fn sb (self) -> i8;
fn sbx (self) -> i32;
fn sc (self) -> i8;
fn sj (self) -> i32;
}
impl DecodeInstruction for u32 {
fn opcode (self) -> u8 {
((self >> 0) & 0x7f) as u8
}
fn a (self) -> u8 {
((self >> 7) & 0xff) as u8
}
fn ax (self) -> u32 {
self >> 7
}
fn b (self) -> u8 {
((self >> 16) & 0xff) as u8
}
fn bx (self) -> u32 {
(self >> 15) as u32
}
fn c (self) -> u8 {
(self >> 24) as u8
}
fn k (self) -> bool {
((self >> 15) & 0x1) == 1
}
fn sb (self) -> i8 {
((((self >> 16) & 0xff) as i16) - 127) as i8
}
fn sbx (self) -> i32 {
(self >> 15) as i32 - 65535
}
fn sc (self) -> i8 {
(((self >> 24) as i16) - 127) as i8
}
fn sj (self) -> i32 {
((self >> 7) as i32) - 0xffffff
}
}
pub fn parse_inst (buf: [u8; 4]) -> Option <Inst>
{
let opcode = buf [0] & 0x7f;
let a = (buf [0] >> 7) | ((buf [1] & 0x7f) << 1);
let b = buf [2];
let ax = a as u32 + ((b as u32) << 8);
let c = buf [3];
let bx =
(((buf [1] >> 7) as u32) << 0) |
((buf [2] as u32) << 1) |
((buf [3] as u32) << 9);
let sbx = i32::try_from (bx).ok ()? - 65535;
let k = (buf [1] & 0x80) >> 7 == 1;
let s_j = a as i32 + ((b as i32) << 8) + 1;
Some (match opcode {
0x00 => Inst::Move (a, b),
0x01 => Inst::LoadI (a, sbx),
0x02 => Inst::LoadF (a, sbx),
0x03 => Inst::LoadK (a, bx),
0x05 => Inst::LoadFalse (a),
0x07 => Inst::LoadTrue (a),
0x08 => Inst::LoadNil (a),
0x09 => Inst::GetUpVal (a, b),
0x0b => Inst::GetTabUp (a, b, c),
0x0c => Inst::GetTable (a, b, c),
0x0d => Inst::GetI (a, b, c),
0x0e => Inst::GetField (a, b, c),
0x0f => Inst::SetTabUp (a, b, c, k),
0x11 => Inst::SetI (a, b, c, k),
0x12 => Inst::SetField (a, b, c, k),
0x13 => Inst::NewTable (a),
0x15 => Inst::AddI (a, b, i_sc (buf)?),
0x18 => Inst::MulK (a, b, c),
0x19 => Inst::ModK (a, b, c),
0x22 => Inst::Add (a, b, c),
0x23 => Inst::Sub (a, b, c),
0x24 => Inst::Mul (a, b, c),
0x27 => Inst::Div (a, b, c),
0x2e => Inst::MmBin (a, b, c),
0x2f => Inst::MmBinI (a, i_sb (buf)?, c, k),
0x30 => Inst::MmBinK (a, b, c, k),
0x31 => Inst::UnM (a, b),
0x33 => Inst::Not (a, b),
0x34 => Inst::Len (a, b),
0x35 => Inst::Concat (a, b),
0x38 => Inst::Jmp (s_j),
0x3c => Inst::EqK (a, b, k),
0x3d => Inst::EqI (a, i_sb (buf)?, k),
0x42 => Inst::Test (a, k),
0x44 => Inst::Call (a, b, c),
0x45 => Inst::TailCall (a, b, c, k),
0x46 => Inst::Return (a, b, c, k),
0x47 => Inst::Return0,
0x48 => Inst::Return1 (a),
0x49 => Inst::ForLoop (a, bx),
0x4a => Inst::ForPrep (a, bx),
0x4e => Inst::SetList (a, b, c, k),
0x4f => Inst::Closure (a, bx),
0x50 => unimplemented! ("OP_VARARG"),
0x51 => Inst::VarArgPrep (a.into ()),
0x52 => Inst::ExtraArg (ax),
_ => return None,
})
}
#[derive (Debug, PartialEq)]
struct Header {
inst_count: u8,
}
/// loadUnsigned in PUC Lua
/// Decodes a varint format that has 7 bits per bytes and the 8th bit
/// is set to 1 on the last byte.
fn load_unsigned <R: Read> (rdr: &mut R, limit: usize) -> usize {
// Shrink the limit so we can tell when we pass it
let limit = limit >> 7;
let mut x = 0;
for _ in 0..32 {
let b = parse_byte (rdr).unwrap ();
if x >= limit {
panic! ("integer overflow {x} >= {limit}");
}
x = (x << 7) | (b as usize & 0x7f);
if (b & 0x80) != 0 {
break;
}
}
x
}
fn load_size <R: Read> (rdr: &mut R) -> usize {
load_unsigned (rdr, usize::MAX)
}
// loadString in PUC Lua. Doesn't work with long strings yet.
fn parse_string <R: Read> (rdr: &mut R) -> Option <String> {
let len = match load_size (rdr) {
0 => return Some (String::new ()),
x => x - 1,
};
let mut buf = vec! [0u8; len];
rdr.read_exact (&mut buf).ok ()?;
Some (String::from_utf8 (buf).ok ()?)
}
// loadByte in PUC Lua
fn parse_byte <R: Read> (rdr: &mut R) -> Option <u8>
{
let mut buf = [0u8; 1];
rdr.read_exact (&mut buf).ok ()?;
Some (buf [0])
}
fn parse_float <R: Read> (rdr: &mut R) -> Option <f64> {
let mut buf = [0u8; 8];
rdr.read_exact (&mut buf).ok ()?;
Some (f64::from_ne_bytes(buf))
}
fn parse_int <R: Read> (rdr: &mut R) -> Option <u32>
{
Some ((parse_byte (rdr)? - 0x80) as u32)
}
fn parse_i64 <R: Read> (rdr: &mut R) -> Option <i64> {
let mut buf = [0u8; 8];
rdr.read_exact (&mut buf).ok ()?;
Some (i64::from_ne_bytes(buf))
}
// I'm doing this recursively so it's easy to match with the PUC Lua
// code, but I don't like recursion in general, and I don't know
// why PUC wrote it that way.
pub fn parse_block <R: Read> (rdr: &mut R, si: &mut Interner, blocks: &mut Vec <Rc <Block>>)
-> Option <()>
{
// Ignore things I haven't implemented yet
parse_string (rdr)?; // function name
parse_int (rdr).unwrap (); // start line in source code
parse_int (rdr).unwrap (); // last line in source code
parse_byte (rdr).unwrap (); // num params
parse_byte (rdr).unwrap (); // is_vararg
parse_byte (rdr).unwrap (); // maxstacksize, might be same as num slots?
let inst_count = load_size (rdr);
let mut instructions = Vec::with_capacity (inst_count as usize);
for _ in 0..inst_count {
let mut buf = [0u8; 4];
rdr.read_exact (&mut buf).ok ().unwrap ();
instructions.push (u32::from_le_bytes (buf));
}
let instructions = Rc::from (instructions);
let constant_count = parse_int (rdr).unwrap ();
let mut constants = Vec::with_capacity (constant_count as usize);
for i in 0..constant_count {
// LUA_TNIL and friends from `lua.h` in PUC Lua
let const_type = parse_byte (rdr)?;
let val = match const_type {
3 => parse_i64 (rdr).unwrap ().into (),
4 => si.to_value (parse_string (rdr).unwrap ().as_str ()),
// For LUA_TNUMBER, PUC Lua uses a macro that adds 16 to signify a float
19 => parse_float (rdr).unwrap ().into (),
// 0x10 + 4 = long string
20 => si.to_value (parse_string (rdr).unwrap ().as_str ()),
x => panic! ("Constant {} has type {}", i, x),
};
constants.push (val);
}
let upvalue_count = parse_int (rdr).unwrap () as usize;
let mut upvalues = Vec::with_capacity (upvalue_count);
for _ in 0..upvalue_count {
let in_stack = parse_byte (rdr).unwrap () == 1;
let idx = parse_byte (rdr).unwrap ();
let kind = parse_byte (rdr).unwrap ();
let upvalue = crate::state::Upvalue {
in_stack,
idx,
kind,
};
upvalues.push (upvalue);
}
blocks.push (Block {
constants,
instructions,
upvalues,
}.into ());
// Recursion
// Subfunctions. PUC calls them protos.
let protos_count = parse_int (rdr).unwrap ();
for _ in 0..protos_count {
parse_block (rdr, si, blocks).unwrap ();
}
// Skip over debug stuff
// I think this is delta line numbers, e.g. most instructions
// have 0, but when you go to a new source line it's 1+.
let lineinfo_count = load_size (rdr);
for _ in 0..lineinfo_count {
parse_byte (rdr).unwrap ();
}
// Absolute line info, didn't see that in my test files
let abslineinfo_count = load_size (rdr);
for _ in 0..abslineinfo_count {
load_unsigned (rdr, usize::MAX);
load_unsigned (rdr, usize::MAX);
}
let local_count = load_size (rdr);
for _ in 0..local_count {
parse_string(rdr);
load_unsigned (rdr, usize::MAX);
load_unsigned (rdr, usize::MAX);
}
let upvalue_count = load_size (rdr);
for _ in 0..upvalue_count {
parse_string (rdr).unwrap ();
}
Some (())
}
pub fn parse_chunk (buf: &[u8], si: &mut Interner) -> Option <Chunk> {
let mut rdr = std::io::Cursor::new (buf);
parse_chunk_from_reader (&mut rdr, si)
}
pub fn parse_chunk_from_reader <R: Read> (rdr: &mut R, si: &mut Interner) -> Option <Chunk> {
// Discard 32 bytes from the start of the file.
// This is magic number, version number, etc.
let mut hdr = [0u8; 32];
rdr.read_exact (&mut hdr).ok ()?;
assert_eq! (&hdr [0..8], &[0x1b, 0x4c, 0x75, 0x61, 0x54, 0x00, 0x19, 0x93], "This isn't a Lua 5.4 bytecode file");
let mut blocks = vec![];
parse_block (rdr, si, &mut blocks).unwrap ();
Some (Chunk {
blocks,
})
}
#[cfg (test)]
mod tests {
use super::*;
#[test]
fn load_size () {
let f = |input: &[u8]| {
let mut cursor = std::io::Cursor::new (input);
super::load_size (&mut cursor)
};
assert_eq! (f (&[0x80]), 0);
assert_eq! (f (&[0x81]), 1);
assert_eq! (f (&[0x82]), 2);
assert_eq! (f (&[0xff]), 127);
assert_eq! (f (&[0x01, 0x80]), 128);
assert_eq! (f (&[0x01, 0x81]), 129);
assert_eq! (f (&[0x02, 0x80]), 256);
assert_eq! (f (&[0x7f, 0xfe]), 16382);
assert_eq! (f (&[0x7f, 0xff]), 16383);
assert_eq! (f (&[0x01, 0x00, 0x80]), 16384);
}
#[test]
fn parse_inst () {
use super::Inst;
for (input, expected) in [
([0x51, 0x00, 0x00, 0x00], Inst::VarArgPrep (0)),
([0x4f, 0x00, 0x00, 0x00], Inst::Closure (0, 0)),
([0xcf, 0x00, 0x00, 0x00], Inst::Closure (1, 0)),
([0x8b, 0x00, 0x00, 0x00], Inst::GetTabUp (1, 0, 0)),
([0x03, 0x81, 0x00, 0x00], Inst::LoadK (2, 1)),
([0xc4, 0x00, 0x02, 0x01], Inst::Call (1, 2, 1)),
([0x80, 0x00, 0x00, 0x00], Inst::Move (1, 0)),
([0xc4, 0x00, 0x01, 0x02], Inst::Call (1, 1, 2)),
([0x0b, 0x01, 0x00, 0x00], Inst::GetTabUp (2, 0, 0)),
([0x83, 0x01, 0x01, 0x00], Inst::LoadK (3, 2)),
([0x44, 0x01, 0x02, 0x01], Inst::Call (2, 2, 1)),
([0x0b, 0x01, 0x00, 0x00], Inst::GetTabUp (2, 0, 0)),
([0x80, 0x01, 0x01, 0x00], Inst::Move (3, 1)),
([0xc4, 0x01, 0x01, 0x00], Inst::Call (3, 1, 0)),
([0x44, 0x01, 0x00, 0x01], Inst::Call (2, 0, 1)),
([0x0b, 0x01, 0x00, 0x00], Inst::GetTabUp (2, 0, 0)),
([0x83, 0x81, 0x01, 0x00], Inst::LoadK (3, 3)),
([0x44, 0x01, 0x02, 0x01], Inst::Call (2, 2, 1)),
([0x46, 0x01, 0x01, 0x01], Inst::Return (2, 1, 1, false)),
([0x01, 0x00, 0x02, 0x80], Inst::LoadI (0, 5)),
([0xc6, 0x80, 0x02, 0x00], Inst::Return (1, 2, 0, true)),
([0x09, 0x00, 0x01, 0x00], Inst::GetUpVal (0, 1)),
([0x48, 0x00, 0x02, 0x00], Inst::Return1 (0)),
([0x47, 0x00, 0x01, 0x00], Inst::Return0),
([0x8d, 0x00, 0x01, 0x01], Inst::GetI (1, 1, 1)),
([0xbc, 0x00, 0x01, 0x00], Inst::EqK (1, 1, false)),
([0xb8, 0x02, 0x00, 0x80], Inst::Jmp (6)),
([0x38, 0x02, 0x00, 0x80], Inst::Jmp (5)),
([0x52, 0x00, 0x00, 0x00], Inst::ExtraArg (0)),
] {
let actual = super::parse_inst (input).unwrap ();
assert_eq!(actual, expected);
}
}
#[test]
fn parse_nested_functions () {
use std::io::Read;
let mut si = Interner::default ();
let bytecode = include_bytes! ("../test_vectors/functions.luac");
{
let mut rdr = std::io::Cursor::new (bytecode.clone ());
let mut buf = [0u8; 32];
rdr.read_exact (&mut buf).unwrap ();
let mut blocks = vec! [];
super::parse_block (&mut rdr, &mut si, &mut blocks).unwrap ();
assert_eq! (blocks [0].instructions.len (), 15);
assert_eq! (blocks [1].instructions.len (), 6);
assert_eq! (blocks [2].instructions.len (), 4);
assert_eq! (blocks [3].instructions.len (), 4);
assert_eq! (blocks [4].instructions.len (), 4);
}
if false {
let file = crate::loader::parse_chunk (bytecode, &mut si).unwrap ();
assert_eq! (file.blocks.len (), 5);
}
}
}