🐛 bug: fix long string loading and int adding
Long strings didn't work before because I hadn't implemented Lua's varint decoding. It's an easy translation from the original C. Int adding previously created a float because I always coerced to float. Also an easy fix.main
							parent
							
								
									5649f38698
								
							
						
					
					
						commit
						96c82c27c8
					
				
							
								
								
									
										113
									
								
								src/loader.rs
								
								
								
								
							
							
						
						
									
										113
									
								
								src/loader.rs
								
								
								
								
							|  | @ -135,15 +135,42 @@ struct Header { | |||
| 	inst_count: u8, | ||||
| } | ||||
| 
 | ||||
| /// loadUnsigned in PUC Lua
 | ||||
| /// Decodes a varint format that has 7 bits per bytes and the 8th bit
 | ||||
| /// is set to 1 on the last byte.
 | ||||
| 
 | ||||
| fn load_unsigned <R: Read> (rdr: &mut R, limit: usize) -> usize { | ||||
| 	// Shrink the limit so we can tell when we pass it
 | ||||
| 	let limit = limit >> 7; | ||||
| 	
 | ||||
| 	let mut x = 0; | ||||
| 	for _ in 0..32 { | ||||
| 		let b = parse_byte (rdr).unwrap (); | ||||
| 		if x >= limit { | ||||
| 			panic! ("integer overflow {x} >= {limit}"); | ||||
| 		} | ||||
| 		x = (x << 7) | (b as usize & 0x7f); | ||||
| 		if (b & 0x80) != 0 { | ||||
| 			break; | ||||
| 		} | ||||
| 	} | ||||
| 	
 | ||||
| 	x | ||||
| } | ||||
| 
 | ||||
| fn load_size <R: Read> (rdr: &mut R) -> usize { | ||||
| 	load_unsigned (rdr, usize::MAX) | ||||
| } | ||||
| 
 | ||||
| // loadString in PUC Lua. Doesn't work with long strings yet.
 | ||||
| 
 | ||||
| fn parse_string <R: Read> (rdr: &mut R) -> Option <String> { | ||||
| 	let len = match parse_int (rdr)? { | ||||
| 		0 => 0, | ||||
| 	let len = match load_size (rdr) { | ||||
| 		0 => return Some (String::new ()), | ||||
| 		x => x - 1, | ||||
| 	}; | ||||
| 	
 | ||||
| 	let mut buf = vec! [0u8; len as usize]; | ||||
| 	let mut buf = vec! [0u8; len]; | ||||
| 	rdr.read_exact (&mut buf).ok ()?; | ||||
| 	Some (String::from_utf8 (buf).ok ()?) | ||||
| } | ||||
|  | @ -186,22 +213,22 @@ pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>) | |||
| 	use crate::value::Value; | ||||
| 	
 | ||||
| 	parse_string (rdr)?; // function name
 | ||||
| 	parse_int (rdr)?;    // start line in source code
 | ||||
| 	parse_int (rdr)?;    // last line in source code
 | ||||
| 	parse_byte (rdr)?;   // num params
 | ||||
| 	parse_byte (rdr)?;   // is_vararg
 | ||||
| 	parse_byte (rdr)?;   // maxstacksize, might be same as num slots?
 | ||||
| 	parse_int (rdr).unwrap ();    // start line in source code
 | ||||
| 	parse_int (rdr).unwrap ();    // last line in source code
 | ||||
| 	parse_byte (rdr).unwrap ();   // num params
 | ||||
| 	parse_byte (rdr).unwrap ();   // is_vararg
 | ||||
| 	parse_byte (rdr).unwrap ();   // maxstacksize, might be same as num slots?
 | ||||
| 	
 | ||||
| 	let inst_count = parse_int (rdr)?; | ||||
| 	let inst_count = parse_int (rdr).unwrap (); | ||||
| 	let mut instructions = Vec::with_capacity (inst_count as usize); | ||||
| 	
 | ||||
| 	for _ in 0..inst_count { | ||||
| 		let mut buf = [0u8; 4]; | ||||
| 		rdr.read_exact (&mut buf).ok ()?; | ||||
| 		rdr.read_exact (&mut buf).ok ().unwrap (); | ||||
| 		instructions.push (parse_inst (buf).expect (&format! ("{buf:?}"))); | ||||
| 	} | ||||
| 	
 | ||||
| 	let constant_count = parse_int (rdr)?; | ||||
| 	let constant_count = parse_int (rdr).unwrap (); | ||||
| 	
 | ||||
| 	let mut constants = Vec::with_capacity (constant_count as usize); | ||||
| 	
 | ||||
|  | @ -210,22 +237,26 @@ pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>) | |||
| 		let const_type = parse_byte (rdr)?; | ||||
| 		
 | ||||
| 		let val = match const_type { | ||||
| 			 3 => Value::from (parse_i64 (rdr)?), | ||||
| 			 4 => parse_string (rdr)?.into (), | ||||
| 			19 => Value::from (parse_float (rdr)?), | ||||
| 			 3 => parse_i64 (rdr).unwrap ().into (), | ||||
| 			 4 => parse_string (rdr).unwrap ().into (), | ||||
| 			 
 | ||||
| 			// For LUA_TNUMBER, PUC Lua uses a macro that adds 16 to signify a float
 | ||||
| 			19 => parse_float (rdr).unwrap ().into (), | ||||
| 			// 0x10 + 4 = long string
 | ||||
| 			20 => parse_string (rdr).unwrap ().into (), | ||||
| 			x => panic! ("Constant {} has type {}", i, x), | ||||
| 		}; | ||||
| 		
 | ||||
| 		constants.push (val); | ||||
| 	} | ||||
| 	
 | ||||
| 	let upvalue_count = parse_int (rdr)? as usize; | ||||
| 	let upvalue_count = parse_int (rdr).unwrap () as usize; | ||||
| 	
 | ||||
| 	for _ in 0..upvalue_count { | ||||
| 		// Just ignore these
 | ||||
| 		
 | ||||
| 		for _ in 0..3 { | ||||
| 			parse_byte (rdr)?; | ||||
| 			parse_byte (rdr).unwrap (); | ||||
| 		} | ||||
| 	} | ||||
| 	
 | ||||
|  | @ -238,9 +269,9 @@ pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>) | |||
| 	// Recursion
 | ||||
| 	
 | ||||
| 	// Subfunctions. PUC calls them protos.
 | ||||
| 	let protos_count = parse_int (rdr)?; | ||||
| 	let protos_count = parse_int (rdr).unwrap (); | ||||
| 	for _ in 0..protos_count { | ||||
| 		parse_block (rdr, blocks)?; | ||||
| 		parse_block (rdr, blocks).unwrap (); | ||||
| 	} | ||||
| 	
 | ||||
| 	// Skip over debug stuff
 | ||||
|  | @ -248,26 +279,26 @@ pub fn parse_block <R: Read> (rdr: &mut R, blocks: &mut Vec <Block>) | |||
| 	// I think this is delta line numbers, e.g. most instructions
 | ||||
| 	// have 0, but when you go to a new source line it's 1+.
 | ||||
| 	
 | ||||
| 	let lineinfo_count = parse_int (rdr)?; | ||||
| 	let lineinfo_count = parse_int (rdr).unwrap (); | ||||
| 	for _ in 0..lineinfo_count { | ||||
| 		parse_byte (rdr)?; | ||||
| 		parse_byte (rdr).unwrap (); | ||||
| 	} | ||||
| 	
 | ||||
| 	// Absolute line info, didn't see that in my test files
 | ||||
| 	
 | ||||
| 	let abslineinfo_count = parse_int (rdr)?; | ||||
| 	let abslineinfo_count = parse_int (rdr).unwrap (); | ||||
| 	assert_eq! (abslineinfo_count, 0); | ||||
| 	
 | ||||
| 	let local_count = parse_int (rdr)?; | ||||
| 	let local_count = parse_int (rdr).unwrap (); | ||||
| 	for _ in 0..local_count { | ||||
| 		parse_string(rdr)?; | ||||
| 		parse_int (rdr)?; | ||||
| 		parse_int (rdr)?; | ||||
| 		parse_string(rdr).unwrap (); | ||||
| 		parse_int (rdr).unwrap (); | ||||
| 		parse_int (rdr).unwrap (); | ||||
| 	} | ||||
| 	
 | ||||
| 	let upvalue_count = parse_int (rdr)?; | ||||
| 	let upvalue_count = parse_int (rdr).unwrap (); | ||||
| 	for _ in 0..upvalue_count { | ||||
| 		parse_string (rdr)?; | ||||
| 		parse_string (rdr).unwrap (); | ||||
| 	} | ||||
| 	
 | ||||
| 	Some (()) | ||||
|  | @ -281,11 +312,11 @@ pub fn parse_chunk <R: Read> (rdr: &mut R) -> Option <Chunk> { | |||
| 	let mut hdr = [0u8; 32]; | ||||
| 	rdr.read_exact (&mut hdr).ok ()?; | ||||
| 	
 | ||||
| 	assert_eq! (&hdr [0..8], &[0x1b, 0x4c, 0x75, 0x61, 0x54, 0x00, 0x19, 0x93], "This isn't a Lua 5.4 bytecode file"); | ||||
| 	
 | ||||
| 	let mut blocks = vec![]; | ||||
| 	
 | ||||
| 	while let Some (_) = parse_block (rdr, &mut blocks) { | ||||
| 		// 
 | ||||
| 	} | ||||
| 	parse_block (rdr, &mut blocks).unwrap (); | ||||
| 	
 | ||||
| 	Some (Chunk { | ||||
| 		blocks, | ||||
|  | @ -299,6 +330,28 @@ pub fn parse_chunk_from_bytes (b: &[u8]) -> Option <Chunk> { | |||
| 
 | ||||
| #[cfg (test)] | ||||
| mod tests { | ||||
| 	#[test] | ||||
| 	fn load_size () { | ||||
| 		let f = |input: &[u8]| { | ||||
| 			let mut cursor = std::io::Cursor::new (input); | ||||
| 			super::load_size (&mut cursor) | ||||
| 		}; | ||||
| 		
 | ||||
| 		assert_eq! (f (&[0x80]), 0); | ||||
| 		assert_eq! (f (&[0x81]), 1); | ||||
| 		assert_eq! (f (&[0x82]), 2); | ||||
| 		
 | ||||
| 		assert_eq! (f (&[0xff]), 127); | ||||
| 		
 | ||||
| 		assert_eq! (f (&[0x01, 0x80]), 128); | ||||
| 		assert_eq! (f (&[0x01, 0x81]), 129); | ||||
| 		assert_eq! (f (&[0x02, 0x80]), 256); | ||||
| 		
 | ||||
| 		assert_eq! (f (&[0x7f, 0xfe]), 16382); | ||||
| 		assert_eq! (f (&[0x7f, 0xff]), 16383); | ||||
| 		assert_eq! (f (&[0x01, 0x00, 0x80]), 16384); | ||||
| 	} | ||||
| 	
 | ||||
| 	#[test] | ||||
| 	fn parse_inst () { | ||||
| 		use super::Inst; | ||||
|  |  | |||
|  | @ -30,7 +30,6 @@ fn main () { | |||
| 	
 | ||||
| 	let chunk = if let Some (script) = script { | ||||
| 		let bytecode = loader::compile_bytecode_from_file (&script); | ||||
| 		dbg! (&bytecode [0..48]); | ||||
| 		let mut rdr = std::io::Cursor::new (bytecode); | ||||
| 		loader::parse_chunk (&mut rdr).unwrap () | ||||
| 	} | ||||
|  |  | |||
							
								
								
									
										14
									
								
								src/state.rs
								
								
								
								
							
							
						
						
									
										14
									
								
								src/state.rs
								
								
								
								
							|  | @ -157,10 +157,18 @@ impl State { | |||
| 			
 | ||||
| 			match instruction { | ||||
| 				Instruction::Add (a, b, c) => { | ||||
| 					let v_b = self.reg (*b).as_float ().unwrap (); | ||||
| 					let v_c = self.reg (*c).as_float ().unwrap (); | ||||
| 					let v_b = self.reg (*b); | ||||
| 					let v_c = self.reg (*c); | ||||
| 					
 | ||||
| 					*self.reg_mut (*a) = Value::from (v_b + v_c); | ||||
| 					let sum = if let (Some (v_b), Some (v_c)) = (v_b.as_int (), v_c.as_int ()) 
 | ||||
| 					{ | ||||
| 						Value::from (v_b + v_c) | ||||
| 					} | ||||
| 					else { | ||||
| 						Value::from (v_b.as_float ().unwrap () + v_c.as_float ().unwrap ()) | ||||
| 					}; | ||||
| 					
 | ||||
| 					*self.reg_mut (*a) = sum; | ||||
| 				}, | ||||
| 				Instruction::Call (a, b, _c) => { | ||||
| 					let b = usize::from (*b); | ||||
|  |  | |||
							
								
								
									
										19
									
								
								src/tests.rs
								
								
								
								
							
							
						
						
									
										19
									
								
								src/tests.rs
								
								
								
								
							|  | @ -128,22 +128,11 @@ fn bools () { | |||
| 
 | ||||
| #[test] | ||||
| fn closure () { | ||||
| 	let bytecode = include_bytes! ("../test_vectors/closure.luac"); | ||||
| 	let mut rdr = std::io::Cursor::new (bytecode); | ||||
| 	let file = crate::loader::parse_chunk (&mut rdr).unwrap (); | ||||
| 	let source = include_bytes! ("../test_vectors/closure.lua"); | ||||
| 	let bytecode = &crate::loader::compile_bytecode_from_stdin (source.to_vec ()); | ||||
| 	let chunk = crate::loader::parse_chunk_from_bytes (bytecode).unwrap (); | ||||
| 	
 | ||||
| 	for (arg, expected) in [ | ||||
| 		// Run the same test twice so clippy won't complain about a vec of 1 element
 | ||||
| 		(vec! ["_exe_name"], vec! [23.0.into ()]), | ||||
| 		(vec! ["_exe_name"], vec! [23.0.into ()]), | ||||
| 	] { | ||||
| 		let expected: Vec <Value> = expected; | ||||
| 		let mut vm = State::default (); | ||||
| 		let upvalues = State::upvalues_from_args (arg.into_iter ().map (|s| s.to_string ())); | ||||
| 		let actual = vm.execute_chunk (&file, &upvalues); | ||||
| 		
 | ||||
| 		assert_eq! (actual, expected); | ||||
| 	} | ||||
| 	assert_eq! (run_chunk (&["_exe_name"], &chunk), vec! [Value::from (23i64)]); | ||||
| } | ||||
| 
 | ||||
| #[test] | ||||
|  |  | |||
|  | @ -156,6 +156,8 @@ impl PartialEq <i64> for Value { | |||
| } | ||||
| 
 | ||||
| impl Value { | ||||
| 	/// Coerces ints to float
 | ||||
| 	
 | ||||
| 	pub fn as_float (&self) -> Option <f64> { | ||||
| 		match self { | ||||
| 			Self::Float (x) => Some (*x), | ||||
|  | @ -165,6 +167,8 @@ impl Value { | |||
| 		} | ||||
| 	} | ||||
| 	
 | ||||
| 	/// Does not coerce floats
 | ||||
| 	
 | ||||
| 	pub fn as_int (&self) -> Option <i64> { | ||||
| 		match self { | ||||
| 			Self::Integer (x) => Some (*x), | ||||
|  |  | |||
|  | @ -0,0 +1,2 @@ | |||
| local s = "a very long string with more than 128 characters, which will require 2 bytes to encode in Lua's bytecode format. This allows me to debug my bytecode loader, which doesn't seem to be handling string sizes properly." | ||||
| print (#s) | ||||
		Loading…
	
		Reference in New Issue
	
	 _
						_