Compare commits
	
		
			3 Commits 
		
	
	
		
			565fd19e66
			...
			b639d02027
		
	
	| Author | SHA1 | Date | 
|---|---|---|
|  _ | b639d02027 | |
|  _ | 96c82c27c8 | |
|  _ | 5649f38698 | 
|  | @ -31,6 +31,8 @@ pub enum Instruction { | |||
| 	// Jump
 | ||||
| 	Jmp (i32), | ||||
| 	
 | ||||
| 	Len (u8, u8), | ||||
| 	
 | ||||
| 	// Load F (Float?)
 | ||||
| 	LoadF (u8, i32), | ||||
| 	
 | ||||
|  |  | |||
							
								
								
									
										139
									
								
								src/loader.rs
								
								
								
								
							
							
						
						
									
										139
									
								
								src/loader.rs
								
								
								
								
							|  | @ -8,12 +8,35 @@ use crate::{ | |||
| 	} | ||||
| }; | ||||
| 
 | ||||
| pub (crate) fn compile_bytecode_from_file (path: &str) -> Vec <u8> { | ||||
| 	use std::{ | ||||
| 		process::{ | ||||
| 			Command, | ||||
| 			Stdio, | ||||
| 		}, | ||||
| 	}; | ||||
| 	
 | ||||
| 	let child = Command::new ("luac5.4") | ||||
| 	.arg ("-o") // Output to...
 | ||||
| 	.arg ("-")  // Standard output
 | ||||
| 	.arg (path) 
 | ||||
| 	.stdout (Stdio::piped ()) | ||||
| 	.spawn () | ||||
| 	.expect ("failed to execute `luac5.4`. Is Lua installed?"); | ||||
| 	
 | ||||
| 	let output = child | ||||
| 	.wait_with_output () | ||||
| 	.expect ("failed to wait on child"); | ||||
| 	
 | ||||
| 	output.stdout.as_slice ().to_vec () | ||||
| } | ||||
| 
 | ||||
| /// Invoke `luac` as a subprocess
 | ||||
| /// Luckily luac is single-pass, so we can just pipe in and out
 | ||||
| /// 
 | ||||
| /// `source` is a Vec because we move it to a worker thread
 | ||||
| 
 | ||||
| pub (crate) fn compile_bytecode (source: Vec <u8>) -> Vec <u8> { | ||||
| pub (crate) fn compile_bytecode_from_stdin (source: Vec <u8>) -> Vec <u8> { | ||||
| 	use std::{ | ||||
| 		io::Write, | ||||
| 		process::{ | ||||
|  | @ -87,6 +110,7 @@ pub fn parse_inst (buf: [u8; 4]) -> Option <Inst> | |||
| 		0x2e => Inst::MmBin (a, b, c), | ||||
| 		0x30 => Inst::MmBinK (a, b, c, k), | ||||
| 		0x33 => Inst::Not (a, b), | ||||
| 		0x34 => Inst::Len (a, b), | ||||
| 		0x3c => Inst::EqK (a, b, k), | ||||
| 		0x3d => Inst::EqI (a, i_sb (buf)?, k), | ||||
| 		0x38 => Inst::Jmp (s_j), | ||||
|  | @ -111,15 +135,42 @@ struct Header { | |||
| 	inst_count: u8, | ||||
| } | ||||
| 
 | ||||
| /// loadUnsigned in PUC Lua
 | ||||
| /// Decodes a varint format that has 7 bits per bytes and the 8th bit
 | ||||
| /// is set to 1 on the last byte.
 | ||||
| 
 | ||||
| fn load_unsigned <R: Read> (rdr: &mut R, limit: usize) -> usize { | ||||
| 	// Shrink the limit so we can tell when we pass it
 | ||||
| 	let limit = limit >> 7; | ||||
| 	
 | ||||
| 	let mut x = 0; | ||||
| 	for _ in 0..32 { | ||||
| 		let b = parse_byte (rdr).unwrap (); | ||||
| 		if x >= limit { | ||||
| 			panic! ("integer overflow {x} >= {limit}"); | ||||
| 		} | ||||
| 		x = (x << 7) | (b as usize & 0x7f); | ||||
| 		if (b & 0x80) != 0 { | ||||
| 			break; | ||||
| 		} | ||||
| 	} | ||||
| 	
 | ||||
| 	x | ||||
| } | ||||
| 
 | ||||
| fn load_size <R: Read> (rdr: &mut R) -> usize { | ||||
| 	load_unsigned (rdr, usize::MAX) | ||||
| } | ||||
| 
 | ||||
| // loadString in PUC Lua. Doesn't work with long strings yet.
 | ||||
| 
 | ||||
| fn parse_string <R: Read> (rdr: &mut R) -> Option <String> { | ||||
| 	let len = match parse_int (rdr)? { | ||||
| 		0 => 0, | ||||
| 	let len = match load_size (rdr) { | ||||
| 		0 => return Some (String::new ()), | ||||
| 		x => x - 1, | ||||
| 	}; | ||||
| 	
 | ||||
| 	let mut buf = vec! [0u8; len as usize]; | ||||
| 	let mut buf = vec! [0u8; len]; | ||||
| 	rdr.read_exact (&mut buf).ok ()?; | ||||
| 	Some (String::from_utf8 (buf).ok ()?) | ||||
| } | ||||
|  | @ -162,22 +213,22 @@ pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>) | |||
| 	use crate::value::Value; | ||||
| 	
 | ||||
| 	parse_string (rdr)?; // function name
 | ||||
| 	parse_int (rdr)?;    // start line in source code
 | ||||
| 	parse_int (rdr)?;    // last line in source code
 | ||||
| 	parse_byte (rdr)?;   // num params
 | ||||
| 	parse_byte (rdr)?;   // is_vararg
 | ||||
| 	parse_byte (rdr)?;   // maxstacksize, might be same as num slots?
 | ||||
| 	parse_int (rdr).unwrap ();    // start line in source code
 | ||||
| 	parse_int (rdr).unwrap ();    // last line in source code
 | ||||
| 	parse_byte (rdr).unwrap ();   // num params
 | ||||
| 	parse_byte (rdr).unwrap ();   // is_vararg
 | ||||
| 	parse_byte (rdr).unwrap ();   // maxstacksize, might be same as num slots?
 | ||||
| 	
 | ||||
| 	let inst_count = parse_int (rdr)?; | ||||
| 	let inst_count = parse_int (rdr).unwrap (); | ||||
| 	let mut instructions = Vec::with_capacity (inst_count as usize); | ||||
| 	
 | ||||
| 	for _ in 0..inst_count { | ||||
| 		let mut buf = [0u8; 4]; | ||||
| 		rdr.read_exact (&mut buf).ok ()?; | ||||
| 		rdr.read_exact (&mut buf).ok ().unwrap (); | ||||
| 		instructions.push (parse_inst (buf).expect (&format! ("{buf:?}"))); | ||||
| 	} | ||||
| 	
 | ||||
| 	let constant_count = parse_int (rdr)?; | ||||
| 	let constant_count = parse_int (rdr).unwrap (); | ||||
| 	
 | ||||
| 	let mut constants = Vec::with_capacity (constant_count as usize); | ||||
| 	
 | ||||
|  | @ -186,22 +237,26 @@ pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>) | |||
| 		let const_type = parse_byte (rdr)?; | ||||
| 		
 | ||||
| 		let val = match const_type { | ||||
| 			 3 => Value::from (parse_i64 (rdr)?), | ||||
| 			 4 => parse_string (rdr)?.into (), | ||||
| 			19 => Value::from (parse_float (rdr)?), | ||||
| 			 3 => parse_i64 (rdr).unwrap ().into (), | ||||
| 			 4 => parse_string (rdr).unwrap ().into (), | ||||
| 			 
 | ||||
| 			// For LUA_TNUMBER, PUC Lua uses a macro that adds 16 to signify a float
 | ||||
| 			19 => parse_float (rdr).unwrap ().into (), | ||||
| 			// 0x10 + 4 = long string
 | ||||
| 			20 => parse_string (rdr).unwrap ().into (), | ||||
| 			x => panic! ("Constant {} has type {}", i, x), | ||||
| 		}; | ||||
| 		
 | ||||
| 		constants.push (val); | ||||
| 	} | ||||
| 	
 | ||||
| 	let upvalue_count = parse_int (rdr)? as usize; | ||||
| 	let upvalue_count = parse_int (rdr).unwrap () as usize; | ||||
| 	
 | ||||
| 	for _ in 0..upvalue_count { | ||||
| 		// Just ignore these
 | ||||
| 		
 | ||||
| 		for _ in 0..3 { | ||||
| 			parse_byte (rdr)?; | ||||
| 			parse_byte (rdr).unwrap (); | ||||
| 		} | ||||
| 	} | ||||
| 	
 | ||||
|  | @ -214,9 +269,9 @@ pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>) | |||
| 	// Recursion
 | ||||
| 	
 | ||||
| 	// Subfunctions. PUC calls them protos.
 | ||||
| 	let protos_count = parse_int (rdr)?; | ||||
| 	let protos_count = parse_int (rdr).unwrap (); | ||||
| 	for _ in 0..protos_count { | ||||
| 		parse_block (rdr, blocks)?; | ||||
| 		parse_block (rdr, blocks).unwrap (); | ||||
| 	} | ||||
| 	
 | ||||
| 	// Skip over debug stuff
 | ||||
|  | @ -224,26 +279,26 @@ pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>) | |||
| 	// I think this is delta line numbers, e.g. most instructions
 | ||||
| 	// have 0, but when you go to a new source line it's 1+.
 | ||||
| 	
 | ||||
| 	let lineinfo_count = parse_int (rdr)?; | ||||
| 	let lineinfo_count = parse_int (rdr).unwrap (); | ||||
| 	for _ in 0..lineinfo_count { | ||||
| 		parse_byte (rdr)?; | ||||
| 		parse_byte (rdr).unwrap (); | ||||
| 	} | ||||
| 	
 | ||||
| 	// Absolute line info, didn't see that in my test files
 | ||||
| 	
 | ||||
| 	let abslineinfo_count = parse_int (rdr)?; | ||||
| 	let abslineinfo_count = parse_int (rdr).unwrap (); | ||||
| 	assert_eq! (abslineinfo_count, 0); | ||||
| 	
 | ||||
| 	let local_count = parse_int (rdr)?; | ||||
| 	let local_count = parse_int (rdr).unwrap (); | ||||
| 	for _ in 0..local_count { | ||||
| 		parse_string(rdr)?; | ||||
| 		parse_int (rdr)?; | ||||
| 		parse_int (rdr)?; | ||||
| 		parse_string(rdr).unwrap (); | ||||
| 		parse_int (rdr).unwrap (); | ||||
| 		parse_int (rdr).unwrap (); | ||||
| 	} | ||||
| 	
 | ||||
| 	let upvalue_count = parse_int (rdr)?; | ||||
| 	let upvalue_count = parse_int (rdr).unwrap (); | ||||
| 	for _ in 0..upvalue_count { | ||||
| 		parse_string (rdr)?; | ||||
| 		parse_string (rdr).unwrap (); | ||||
| 	} | ||||
| 	
 | ||||
| 	Some (()) | ||||
|  | @ -257,11 +312,11 @@ pub fn parse_chunk <R: Read> (rdr: &mut R) -> Option <Chunk> { | |||
| 	let mut hdr = [0u8; 32]; | ||||
| 	rdr.read_exact (&mut hdr).ok ()?; | ||||
| 	
 | ||||
| 	assert_eq! (&hdr [0..8], &[0x1b, 0x4c, 0x75, 0x61, 0x54, 0x00, 0x19, 0x93], "This isn't a Lua 5.4 bytecode file"); | ||||
| 	
 | ||||
| 	let mut blocks = vec![]; | ||||
| 	
 | ||||
| 	while let Some (_) = parse_block (rdr, &mut blocks) { | ||||
| 		// 
 | ||||
| 	} | ||||
| 	parse_block (rdr, &mut blocks).unwrap (); | ||||
| 	
 | ||||
| 	Some (Chunk { | ||||
| 		blocks, | ||||
|  | @ -275,6 +330,28 @@ pub fn parse_chunk_from_bytes (b: &[u8]) -> Option <Chunk> { | |||
| 
 | ||||
| #[cfg (test)] | ||||
| mod tests { | ||||
| 	#[test] | ||||
| 	fn load_size () { | ||||
| 		let f = |input: &[u8]| { | ||||
| 			let mut cursor = std::io::Cursor::new (input); | ||||
| 			super::load_size (&mut cursor) | ||||
| 		}; | ||||
| 		
 | ||||
| 		assert_eq! (f (&[0x80]), 0); | ||||
| 		assert_eq! (f (&[0x81]), 1); | ||||
| 		assert_eq! (f (&[0x82]), 2); | ||||
| 		
 | ||||
| 		assert_eq! (f (&[0xff]), 127); | ||||
| 		
 | ||||
| 		assert_eq! (f (&[0x01, 0x80]), 128); | ||||
| 		assert_eq! (f (&[0x01, 0x81]), 129); | ||||
| 		assert_eq! (f (&[0x02, 0x80]), 256); | ||||
| 		
 | ||||
| 		assert_eq! (f (&[0x7f, 0xfe]), 16382); | ||||
| 		assert_eq! (f (&[0x7f, 0xff]), 16383); | ||||
| 		assert_eq! (f (&[0x01, 0x00, 0x80]), 16384); | ||||
| 	} | ||||
| 	
 | ||||
| 	#[test] | ||||
| 	fn parse_inst () { | ||||
| 		use super::Inst; | ||||
|  |  | |||
							
								
								
									
										13
									
								
								src/main.rs
								
								
								
								
							
							
						
						
									
										13
									
								
								src/main.rs
								
								
								
								
							|  | @ -11,6 +11,7 @@ mod tests; | |||
| fn main () { | ||||
| 	use state::State; | ||||
| 	
 | ||||
| 	let mut list_bytecode = false; | ||||
| 	let mut pipe_bytecode = false; | ||||
| 	let mut script = None; | ||||
| 	
 | ||||
|  | @ -19,6 +20,7 @@ fn main () { | |||
| 	
 | ||||
| 	while let Some (arg) = args.next () { | ||||
| 		match arg.as_str () { | ||||
| 			"--list-bytecode" => list_bytecode = true, | ||||
| 			"--pipe-bytecode" => pipe_bytecode = true, | ||||
| 			"--script" => script = Some (args.next ().unwrap ()), | ||||
| 			"--" => break, | ||||
|  | @ -26,9 +28,8 @@ fn main () { | |||
| 		} | ||||
| 	} | ||||
| 	
 | ||||
| 	let lua_file = if let Some (script) = script { | ||||
| 		let source = std::fs::read (script).expect ("couldn't load Lua source code"); | ||||
| 		let bytecode = loader::compile_bytecode(source); | ||||
| 	let chunk = if let Some (script) = script { | ||||
| 		let bytecode = loader::compile_bytecode_from_file (&script); | ||||
| 		let mut rdr = std::io::Cursor::new (bytecode); | ||||
| 		loader::parse_chunk (&mut rdr).unwrap () | ||||
| 	} | ||||
|  | @ -40,6 +41,10 @@ fn main () { | |||
| 		unimplemented!(); | ||||
| 	}; | ||||
| 	
 | ||||
| 	if list_bytecode { | ||||
| 		dbg! (&chunk); | ||||
| 	} | ||||
| 	
 | ||||
| 	let mut vm = State::default (); | ||||
| 	if std::env::var("LUA_DEBUG").is_ok() { | ||||
| 		vm.debug_print = true; | ||||
|  | @ -52,5 +57,5 @@ fn main () { | |||
| 		program_counter: 0, | ||||
| 	}); | ||||
| 	
 | ||||
| 	vm.execute_chunk (&lua_file, &upvalues); | ||||
| 	vm.execute_chunk (&chunk, &upvalues); | ||||
| } | ||||
|  |  | |||
							
								
								
									
										24
									
								
								src/state.rs
								
								
								
								
							
							
						
						
									
										24
									
								
								src/state.rs
								
								
								
								
							|  | @ -8,12 +8,14 @@ use crate::{ | |||
| 	}, | ||||
| }; | ||||
| 
 | ||||
| #[derive (Debug)] | ||||
| pub struct Block { | ||||
| 	pub instructions: Vec <Instruction>, | ||||
| 	pub constants: Vec <Value>, | ||||
| 	pub upvalue_count: usize, | ||||
| } | ||||
| 
 | ||||
| #[derive (Debug)] | ||||
| pub struct Chunk { | ||||
| 	pub blocks: Vec <Block>, | ||||
| } | ||||
|  | @ -155,10 +157,18 @@ impl State { | |||
| 			
 | ||||
| 			match instruction { | ||||
| 				Instruction::Add (a, b, c) => { | ||||
| 					let v_b = self.reg (*b).as_float ().unwrap (); | ||||
| 					let v_c = self.reg (*c).as_float ().unwrap (); | ||||
| 					let v_b = self.reg (*b); | ||||
| 					let v_c = self.reg (*c); | ||||
| 					
 | ||||
| 					*self.reg_mut (*a) = Value::from (v_b + v_c); | ||||
| 					let sum = if let (Some (v_b), Some (v_c)) = (v_b.as_int (), v_c.as_int ()) 
 | ||||
| 					{ | ||||
| 						Value::from (v_b + v_c) | ||||
| 					} | ||||
| 					else { | ||||
| 						Value::from (v_b.as_float ().unwrap () + v_c.as_float ().unwrap ()) | ||||
| 					}; | ||||
| 					
 | ||||
| 					*self.reg_mut (*a) = sum; | ||||
| 				}, | ||||
| 				Instruction::Call (a, b, _c) => { | ||||
| 					let b = usize::from (*b); | ||||
|  | @ -338,6 +348,14 @@ impl State { | |||
| 					*self.reg_mut (*a) = upvalues [b].clone (); | ||||
| 				}, | ||||
| 				Instruction::Jmp (s_j) => next_pc += s_j, | ||||
| 				Instruction::Len (a, b) => { | ||||
| 					let len = match self.reg (*b) { | ||||
| 						Value::String (s) => s.len (), | ||||
| 						_ => unimplemented!(), | ||||
| 					}; | ||||
| 					
 | ||||
| 					*self.reg_mut (*a) = len.into (); | ||||
| 				} | ||||
| 				Instruction::LoadF (a, sbx) => { | ||||
| 					*self.reg_mut (*a) = Value::Float (*sbx as f64); | ||||
| 				} | ||||
|  |  | |||
							
								
								
									
										23
									
								
								src/tests.rs
								
								
								
								
							
							
						
						
									
										23
									
								
								src/tests.rs
								
								
								
								
							|  | @ -42,7 +42,7 @@ fn run_bytecode (args: &[&str], bc: &[u8]) -> Vec <Value> { | |||
| /// and returns the output
 | ||||
| 
 | ||||
| fn run_source (args: &[&str], s: &str) -> Vec <Value> { | ||||
| 	let bc = loader::compile_bytecode (s.as_bytes ().to_vec ()); | ||||
| 	let bc = loader::compile_bytecode_from_stdin (s.as_bytes ().to_vec ()); | ||||
| 	run_bytecode (args, &bc) | ||||
| } | ||||
| 
 | ||||
|  | @ -128,22 +128,11 @@ fn bools () { | |||
| 
 | ||||
| #[test] | ||||
| fn closure () { | ||||
| 	let bytecode = include_bytes! ("../test_vectors/closure.luac"); | ||||
| 	let mut rdr = std::io::Cursor::new (bytecode); | ||||
| 	let file = crate::loader::parse_chunk (&mut rdr).unwrap (); | ||||
| 	let source = include_bytes! ("../test_vectors/closure.lua"); | ||||
| 	let bytecode = &crate::loader::compile_bytecode_from_stdin (source.to_vec ()); | ||||
| 	let chunk = crate::loader::parse_chunk_from_bytes (bytecode).unwrap (); | ||||
| 	
 | ||||
| 	for (arg, expected) in [ | ||||
| 		// Run the same test twice so clippy won't complain about a vec of 1 element
 | ||||
| 		(vec! ["_exe_name"], vec! [23.0.into ()]), | ||||
| 		(vec! ["_exe_name"], vec! [23.0.into ()]), | ||||
| 	] { | ||||
| 		let expected: Vec <Value> = expected; | ||||
| 		let mut vm = State::default (); | ||||
| 		let upvalues = State::upvalues_from_args (arg.into_iter ().map (|s| s.to_string ())); | ||||
| 		let actual = vm.execute_chunk (&file, &upvalues); | ||||
| 		
 | ||||
| 		assert_eq! (actual, expected); | ||||
| 	} | ||||
| 	assert_eq! (run_chunk (&["_exe_name"], &chunk), vec! [Value::from (23i64)]); | ||||
| } | ||||
| 
 | ||||
| #[test] | ||||
|  | @ -289,7 +278,7 @@ fn is_93 () { | |||
| 	end | ||||
| 	"#;
 | ||||
| 	
 | ||||
| 	let bc = loader::compile_bytecode (src.as_bytes ().to_vec ()); | ||||
| 	let bc = loader::compile_bytecode_from_stdin (src.as_bytes ().to_vec ()); | ||||
| 	let chunk = loader::parse_chunk_from_bytes (&bc).unwrap (); | ||||
| 	
 | ||||
| 	assert_eq! (chunk.blocks [0].instructions [3], Inst::EqK (0, 1, false)); | ||||
|  |  | |||
|  | @ -156,6 +156,8 @@ impl PartialEq <i64> for Value { | |||
| } | ||||
| 
 | ||||
| impl Value { | ||||
| 	/// Coerces ints to float
 | ||||
| 	
 | ||||
| 	pub fn as_float (&self) -> Option <f64> { | ||||
| 		match self { | ||||
| 			Self::Float (x) => Some (*x), | ||||
|  | @ -165,6 +167,8 @@ impl Value { | |||
| 		} | ||||
| 	} | ||||
| 	
 | ||||
| 	/// Does not coerce floats
 | ||||
| 	
 | ||||
| 	pub fn as_int (&self) -> Option <i64> { | ||||
| 		match self { | ||||
| 			Self::Integer (x) => Some (*x), | ||||
|  |  | |||
										
											Binary file not shown.
										
									
								
							|  | @ -0,0 +1,2 @@ | |||
| local s = "a very long string with more than 128 characters, which will require 2 bytes to encode in Lua's bytecode format. This allows me to debug my bytecode loader, which doesn't seem to be handling string sizes properly." | ||||
| print (#s) | ||||
		Loading…
	
		Reference in New Issue