use std::io::Read; use crate::state::{ Block, Chunk, Instruction as Inst, }; /// Invoke `luac` as a subprocess /// Luckily luac is single-pass, so we can just pipe in and out pub (crate) fn compile_bytecode (source: Vec ) -> Vec { use std::{ io::Write, process::{ Command, Stdio, }, }; let mut child = Command::new ("luac5.4") .arg ("-o") // Output to... .arg ("-") // Standard output .arg ("-") // Input from standard input .stdin (Stdio::piped ()) .stdout (Stdio::piped ()) .spawn () .expect ("failed to execute `luac5.4`. Is Lua installed?"); let mut stdin = child.stdin.take ().expect ("failed to get stdin"); std::thread::spawn (move || { stdin.write_all (&source).expect ("failed to write to stdin"); }); let output = child .wait_with_output () .expect ("failed to wait on child"); output.stdout.as_slice ().to_vec () } pub fn parse_inst (buf: [u8; 4]) -> Option { let opcode = buf [0] & 0x7f; let a = (buf [0] >> 7) | ((buf [1] & 0x7f) << 1); let b = buf [2]; let ax = a as u32 + ((b as u32) << 8); let c = buf [3]; let bx = (((buf [1] >> 7) as u32) << 0) | ((buf [2] as u32) << 1) | ((buf [3] as u32) << 9); let bx = bx.try_into().ok ()?; let sbx = bx - 65535; let k = (buf [1] & 0x80) >> 7 == 1; let s_j = a as i32 + ((b as i32) << 8) + 1; Some (match opcode { 0x00 => Inst::Move (a, b), 0x01 => Inst::LoadI (a, sbx), 0x02 => Inst::LoadF (a, sbx), 0x03 => Inst::LoadK (a, bx), 0x05 => Inst::LoadFalse (a), 0x07 => Inst::LoadTrue (a), 0x08 => Inst::LoadNil (a), 0x09 => Inst::GetUpVal (a, b), 0x0b => Inst::GetTabUp (a, b, c), 0x0d => Inst::GetI (a, b, c), 0x0f => Inst::SetTabUp (a, b, c), 0x13 => Inst::NewTable (a), 0x22 => Inst::Add (a, b, c), 0x24 => Inst::Mul (a, b, c), 0x2e => Inst::MmBin (a, b, c), 0x33 => Inst::Not (a, b), 0x3c => Inst::EqK (a, b, c), 0x38 => Inst::Jmp (s_j), 0x42 => Inst::Test (a, k), 0x44 => Inst::Call (a, b, c), 0x45 => Inst::TailCall (a, b, c, k), 0x46 => Inst::Return (a, b, c, k), 0x47 => Inst::Return0, 0x48 => Inst::Return1 (a), 0x4f => Inst::Closure (a, bx), 0x51 => Inst::VarArgPrep (a.into ()), 0x52 => Inst::ExtraArg (ax), _ => return None, }) } #[derive (Debug, PartialEq)] struct Header { inst_count: u8, } // loadString in PUC Lua. Doesn't work with long strings yet. fn parse_string (rdr: &mut R) -> Option { let len = match parse_int (rdr)? { 0 => 0, x => x - 1, }; let mut buf = vec! [0u8; len as usize]; rdr.read_exact (&mut buf).ok ()?; Some (String::from_utf8 (buf).ok ()?) } // loadByte in PUC Lua fn parse_byte (rdr: &mut R) -> Option { let mut buf = [0u8; 1]; rdr.read_exact (&mut buf).ok ()?; Some (buf [0]) } fn parse_int (rdr: &mut R) -> Option { Some ((parse_byte (rdr)? - 0x80) as u32) } // I'm doing this recursively so it's easy to match with the PUC Lua // code, but I don't like recursion in general, and I don't know // why PUC wrote it that way. pub fn parse_block (rdr: &mut R, blocks: &mut Vec ) -> Option <()> { // Ignore things I haven't implemented yet parse_string (rdr)?; // function name parse_int (rdr)?; // start line in source code parse_int (rdr)?; // last line in source code parse_byte (rdr)?; // num params parse_byte (rdr)?; // is_vararg parse_byte (rdr)?; // maxstacksize, might be same as num slots? let inst_count = parse_int (rdr)?; let mut instructions = Vec::with_capacity (inst_count as usize); for _ in 0..inst_count { let mut buf = [0u8; 4]; rdr.read_exact (&mut buf).ok ()?; instructions.push (parse_inst (buf).expect (&format! ("{buf:?}"))); } let constant_count = parse_int (rdr)?; let mut constants = Vec::with_capacity (constant_count as usize); for _ in 0..constant_count { let const_type = parse_byte (rdr)?; assert_eq! (const_type, 0x04); let s = parse_string (rdr)?; constants.push (s.into ()); } let upvalue_count = parse_int (rdr)? as usize; for _ in 0..upvalue_count { // Just ignore these for _ in 0..3 { parse_byte (rdr)?; } } blocks.push (Block { constants, instructions, upvalue_count, }); // Recursion // Subfunctions. PUC calls them protos. let protos_count = parse_int (rdr)?; for _ in 0..protos_count { parse_block (rdr, blocks)?; } // Skip over debug stuff // I think this is delta line numbers, e.g. most instructions // have 0, but when you go to a new source line it's 1+. let lineinfo_count = parse_int (rdr)?; for _ in 0..lineinfo_count { parse_byte (rdr)?; } // Absolute line info, didn't see that in my test files let abslineinfo_count = parse_int (rdr)?; assert_eq! (abslineinfo_count, 0); let local_count = parse_int (rdr)?; for _ in 0..local_count { parse_string(rdr)?; parse_int (rdr)?; parse_int (rdr)?; } let upvalue_count = parse_int (rdr)?; for _ in 0..upvalue_count { parse_string (rdr)?; } Some (()) } pub fn parse_chunk (rdr: &mut R) -> Option { // Discard 32 bytes from the start of the file. // This is magic number, version number, etc. let mut hdr = [0u8; 32]; rdr.read_exact (&mut hdr).ok ()?; let mut blocks = vec![]; while let Some (_) = parse_block (rdr, &mut blocks) { // } Some (Chunk { blocks, }) } #[cfg (test)] mod tests { #[test] fn parse_inst () { use super::Inst; for (input, expected) in [ ([0x51, 0x00, 0x00, 0x00], Inst::VarArgPrep (0)), ([0x4f, 0x00, 0x00, 0x00], Inst::Closure (0, 0)), ([0xcf, 0x00, 0x00, 0x00], Inst::Closure (1, 0)), ([0x8b, 0x00, 0x00, 0x00], Inst::GetTabUp (1, 0, 0)), ([0x03, 0x81, 0x00, 0x00], Inst::LoadK (2, 1)), ([0xc4, 0x00, 0x02, 0x01], Inst::Call (1, 2, 1)), ([0x80, 0x00, 0x00, 0x00], Inst::Move (1, 0)), ([0xc4, 0x00, 0x01, 0x02], Inst::Call (1, 1, 2)), ([0x0b, 0x01, 0x00, 0x00], Inst::GetTabUp (2, 0, 0)), ([0x83, 0x01, 0x01, 0x00], Inst::LoadK (3, 2)), ([0x44, 0x01, 0x02, 0x01], Inst::Call (2, 2, 1)), ([0x0b, 0x01, 0x00, 0x00], Inst::GetTabUp (2, 0, 0)), ([0x80, 0x01, 0x01, 0x00], Inst::Move (3, 1)), ([0xc4, 0x01, 0x01, 0x00], Inst::Call (3, 1, 0)), ([0x44, 0x01, 0x00, 0x01], Inst::Call (2, 0, 1)), ([0x0b, 0x01, 0x00, 0x00], Inst::GetTabUp (2, 0, 0)), ([0x83, 0x81, 0x01, 0x00], Inst::LoadK (3, 3)), ([0x44, 0x01, 0x02, 0x01], Inst::Call (2, 2, 1)), ([0x46, 0x01, 0x01, 0x01], Inst::Return (2, 1, 1, false)), ([0x01, 0x00, 0x02, 0x80], Inst::LoadI (0, 5)), ([0xc6, 0x80, 0x02, 0x00], Inst::Return (1, 2, 0, true)), ([0x09, 0x00, 0x01, 0x00], Inst::GetUpVal (0, 1)), ([0x48, 0x00, 0x02, 0x00], Inst::Return1 (0)), ([0x47, 0x00, 0x01, 0x00], Inst::Return0), ([0x8d, 0x00, 0x01, 0x01], Inst::GetI (1, 1, 1)), ([0xbc, 0x00, 0x01, 0x00], Inst::EqK (1, 1, 0)), ([0xb8, 0x02, 0x00, 0x80], Inst::Jmp (6)), ([0x38, 0x02, 0x00, 0x80], Inst::Jmp (5)), ([0x52, 0x00, 0x00, 0x00], Inst::ExtraArg (0)), ] { let actual = super::parse_inst (input).unwrap (); assert_eq!(actual, expected); } } #[test] fn parse_nested_functions () { use std::io::Read; let bytecode = include_bytes! ("../test_vectors/functions.luac"); { let mut rdr = std::io::Cursor::new (bytecode.clone ()); let mut buf = [0u8; 32]; rdr.read_exact (&mut buf).unwrap (); let mut blocks = vec! []; super::parse_block (&mut rdr, &mut blocks).unwrap (); assert_eq! (blocks [0].instructions.len (), 15); assert_eq! (blocks [1].instructions.len (), 6); assert_eq! (blocks [2].instructions.len (), 4); assert_eq! (blocks [3].instructions.len (), 4); assert_eq! (blocks [4].instructions.len (), 4); } if false { let mut rdr = std::io::Cursor::new (bytecode.clone ()); let file = crate::loader::parse_chunk (&mut rdr).unwrap (); assert_eq! (file.blocks.len (), 5); } } }