Lua virtual machine in Rust, why the heck not.
						commit
						b07de4810d
					
				|  | @ -0,0 +1,2 @@ | |||
| /target | ||||
| /untracked | ||||
|  | @ -0,0 +1,7 @@ | |||
| # This file is automatically @generated by Cargo. | ||||
| # It is not intended for manual editing. | ||||
| version = 3 | ||||
| 
 | ||||
| [[package]] | ||||
| name = "lua_why_not" | ||||
| version = "0.1.0" | ||||
|  | @ -0,0 +1,8 @@ | |||
| [package] | ||||
| name = "lua_why_not" | ||||
| version = "0.1.0" | ||||
| edition = "2021" | ||||
| 
 | ||||
| # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | ||||
| 
 | ||||
| [dependencies] | ||||
|  | @ -0,0 +1,176 @@ | |||
| Lua source code | ||||
| 
 | ||||
| `hello.lua` | ||||
| 
 | ||||
| ```lua | ||||
| print "Hello." | ||||
| ``` | ||||
| 
 | ||||
| `math.lua` | ||||
| 
 | ||||
| ```lua | ||||
| local function add (a, b) | ||||
| 	return a + b | ||||
| end | ||||
| 
 | ||||
| print (("1 + 2 = %i"):format (add (1, 2))) | ||||
| ``` | ||||
| 
 | ||||
| luac5.4 listing | ||||
| 
 | ||||
| ``` | ||||
| main <hello.lua:0,0> (5 instructions at 0x564f4fd74cc0) | ||||
| 0+ params, 2 slots, 1 upvalue, 0 locals, 2 constants, 0 functions | ||||
| 	1	[1]	VARARGPREP	0 | ||||
| 	2	[1]	GETTABUP 	0 0 0	; _ENV "print" | ||||
| 	3	[1]	LOADK    	1 1	; "Hello." | ||||
| 	4	[1]	CALL     	0 2 1	; 1 in 0 out | ||||
| 	5	[1]	RETURN   	0 1 1	; 0 out | ||||
| ``` | ||||
| 
 | ||||
| ``` | ||||
| main <math.lua:0,0> (12 instructions at 0x55ee2417acc0) | ||||
| 0+ params, 7 slots, 1 upvalue, 1 local, 3 constants, 1 function | ||||
|         1       [1]     VARARGPREP      0 | ||||
|         2       [3]     CLOSURE         0 0     ; 0x55ee2417b000 | ||||
|         3       [5]     GETTABUP        1 0 0   ; _ENV "print" | ||||
|         4       [5]     LOADK           2 1     ; "1 + 2 = %i" | ||||
|         5       [5]     SELF            2 2 2k  ; "format" | ||||
|         6       [5]     MOVE            4 0 | ||||
|         7       [5]     LOADI           5 1 | ||||
|         8       [5]     LOADI           6 2 | ||||
|         9       [5]     CALL            4 3 0   ; 2 in all out | ||||
|         10      [5]     CALL            2 0 0   ; all in all out | ||||
|         11      [5]     CALL            1 0 1   ; all in 0 out | ||||
|         12      [5]     RETURN          1 1 1   ; 0 out | ||||
| 
 | ||||
| function <math.lua:1,3> (4 instructions at 0x55ee2417b000) | ||||
| 2 params, 3 slots, 0 upvalues, 2 locals, 0 constants, 0 functions | ||||
|         1       [2]     ADD             2 0 1 | ||||
|         2       [2]     MMBIN           0 1 6   ; __add | ||||
|         3       [2]     RETURN1         2 | ||||
|         4       [3]     RETURN0   | ||||
| ``` | ||||
| 
 | ||||
| ``` | ||||
| main <test_vectors/is_93.lua:0,0> (14 instructions at 0x559f55e1ecd0) | ||||
| 0+ params, 2 slots, 1 upvalue, 1 local, 5 constants, 1 function | ||||
|         1       [1]     VARARGPREP      0 | ||||
|         2       [1]     GETTABUP        0 0 0   ; _ENV "arg" | ||||
|         3       [1]     GETI            0 0 1 | ||||
|         4       [1]     EQK             0 1 0   ; "93" | ||||
|         5       [1]     JMP             4       ; to 10 | ||||
|         6       [2]     GETTABUP        0 0 2   ; _ENV "print" | ||||
|         7       [2]     LOADK           1 3     ; "it's 93" | ||||
|         8       [2]     CALL            0 2 1   ; 1 in 0 out | ||||
|         9       [2]     JMP             3       ; to 13 | ||||
|         10      [4]     GETTABUP        0 0 2   ; _ENV "print" | ||||
|         11      [4]     LOADK           1 4     ; "it's not 93" | ||||
|         12      [4]     CALL            0 2 1   ; 1 in 0 out | ||||
|         13      [9]     CLOSURE         0 0     ; 0x559f55e1f3d0 | ||||
|         14      [9]     RETURN          1 1 1   ; 0 out | ||||
| constants (5) for 0x559f55e1ecd0: | ||||
|         0       S       "arg" | ||||
|         1       S       "93" | ||||
|         2       S       "print" | ||||
|         3       S       "it's 93" | ||||
|         4       S       "it's not 93" | ||||
| locals (1) for 0x559f55e1ecd0: | ||||
|         0       unused_fn       14      15 | ||||
| upvalues (1) for 0x559f55e1ecd0: | ||||
|         0       _ENV    1       0 | ||||
| 
 | ||||
| function <test_vectors/is_93.lua:7,9> (4 instructions at 0x559f55e1f3d0) | ||||
| 0 params, 2 slots, 1 upvalue, 0 locals, 2 constants, 0 functions | ||||
|         1       [8]     GETTABUP        0 0 0   ; _ENV "print" | ||||
|         2       [8]     LOADK           1 1     ; "unused" | ||||
|         3       [8]     CALL            0 2 1   ; 1 in 0 out | ||||
|         4       [9]     RETURN0   | ||||
| constants (2) for 0x559f55e1f3d0: | ||||
|         0       S       "print" | ||||
|         1       S       "unused" | ||||
| locals (0) for 0x559f55e1f3d0: | ||||
| upvalues (1) for 0x559f55e1f3d0: | ||||
|         0       _ENV    0       0 | ||||
| ``` | ||||
| 
 | ||||
| Octal dump of luac5.4 byte code | ||||
| 
 | ||||
| ``` | ||||
| 0000000 1b 4c 75 61 54 00 19 93 0d 0a 1a 0a 04 08 08 78  >.LuaT..........x< | ||||
| 0000020 56 00 00 00 00 00 00 00 00 00 00 00 28 77 40 01  >V...........(w@.< | ||||
| 0000040 8b 40 68 65 6c 6c 6f 2e 6c 75 61 80 80 00 01 02  >.@hello.lua.....< | ||||
| 0000060 85 51 00 00 00 0b 00 00 00 83 80 00 00 44 00 02  >.Q...........D..< | ||||
| 0000100 01 46 00 01 01 82 04 86 70 72 69 6e 74 04 87 48  >.F......print..H< | ||||
| 0000120 65 6c 6c 6f 2e 81 01 00 00 80 85 01 00 00 00 00  >ello............< | ||||
| 0000140 80 80 81 85 5f 45 4e 56                          >...._ENV< | ||||
| 0000150 | ||||
| ``` | ||||
| 
 | ||||
| ``` | ||||
| 0000000 1b 4c 75 61 54 00 19 93 0d 0a 1a 0a 04 08 08 78  >.LuaT..........x< | ||||
| 0000020 56 00 00 00 00 00 00 00 00 00 00 00 28 77 40 01  >V...........(w@.< | ||||
| 0000040 8a 40 6d 61 74 68 2e 6c 75 61 80 80 00 01 07 8c  >.@math.lua......< | ||||
| 0000060 51 00 00 00 4f 00 00 00 8b 00 00 00 03 81 00 00  >Q...O...........< | ||||
| 0000100 14 81 02 02 00 02 00 00 81 02 00 80 01 83 00 80  >................< | ||||
| 0000120 44 02 03 00 44 01 00 00 c4 00 00 01 c6 00 01 01  >D...D...........< | ||||
| 0000140 83 04 86 70 72 69 6e 74 04 8b 31 20 2b 20 32 20  >...print..1 + 2 < | ||||
| 0000160 3d 20 25 69 04 87 66 6f 72 6d 61 74 81 01 00 00  >= %i..format....< | ||||
| 0000200 81 80 81 83 02 00 03 84 22 01 00 01 2e 00 01 06  >........".......< | ||||
| 0000220 48 01 02 00 47 01 01 00 80 80 80 84 01 00 00 01  >H...G...........< | ||||
| 0000240 80 82 82 61 80 84 82 62 80 84 80 8c 01 02 02 00  >...a...b........< | ||||
| 0000260 00 00 00 00 00 00 00 00 80 81 84 61 64 64 82 8c  >...........add..< | ||||
| 0000300 81 85 5f 45 4e 56                                >.._ENV< | ||||
| 0000306 | ||||
| 
 | ||||
| ``` | ||||
| 
 | ||||
| # Interpretation of byte code | ||||
| 
 | ||||
| Overall structure | ||||
| 
 | ||||
| - Roughly 32 byte header with magic number, version number, etc. | ||||
| - File name | ||||
| - `80 80 00 01 02 85` header for main function | ||||
| - Packed 4-byte instructions for main function | ||||
| - `82` or `83` length prefix for string table | ||||
| - String table for main function | ||||
| - `81 01 00 00 81 80 81 83 02 00 03 84` header for "add" function | ||||
| - Packed 4-byte instructions for "add" function | ||||
| - `80 80 80 84 01 00 00 01 80` Header for file-scope debug symbols? | ||||
| - File-scope debug symbols? | ||||
| - String table for entire file? | ||||
| 
 | ||||
| ## Bytecodes | ||||
| 
 | ||||
| Per lopcodes.h, instructions are 32 bits long, always. | ||||
| The opcode is encoded in the first (highest?) 7 bits. | ||||
| 
 | ||||
| ``` | ||||
| 83 80 00 00 ; 0x03 =  3 = LOADK | ||||
| 0b 00 00 00 ; 0x0b = 11 = GETTABUP | ||||
| 22 01 00 01 ; 0x22 = 34 = ADD | ||||
| 2e 00 01 06 ; 0x2e = 46 = MMBIN | ||||
| c4 00 00 01 ; 0xc4 = 68 = CALL | ||||
| 44 00 02 01 ; 0x44 = 68 = CALL | ||||
| 46 00 01 01 ; 0x46 = 70 = RETURN | ||||
| 47 01 01 00 ; 0x47 = 71 = RETURN0 | ||||
| 48 01 02 00 ; 0x48 = 72 = RETURN1 | ||||
| 51 00 00 00 ; 0x51 = 81 = VARARGPREP | ||||
| ``` | ||||
| 
 | ||||
| ## Strings | ||||
| 
 | ||||
| Filenames are encoded at the top, and there's a string table at the bottom. | ||||
| 
 | ||||
| Strings appear to be prefixed with a variable-length length prefix. | ||||
| There is an extra byte before each string which I can't account for, | ||||
| and the lengths seem to be off by one, e.g. 0x84 is a length of 3, not 4. | ||||
| 
 | ||||
| | --- | | ||||
| | "add"        | 81 84 61 64 64 | ||||
| | "_ENV"       | 81 85 5f 45 4e 56 | ||||
| | "print"      | 04 86 70 72 69 6e 74 | ||||
| | "format"     | 04 87 66 6f 72 6d 61 74 | ||||
| | "@math.lua"  | 01 8a 40 6d 61 74 68 2e 6c 75 61 | ||||
| | "1 + 2 = %i" | 04 8b 31 20 2b 20 32 20 3d 20 25 69 | ||||
|  | @ -0,0 +1,171 @@ | |||
| enum Instruction { | ||||
| 	VarArgPrep (i32), | ||||
| 	GetTabUp (u8, u8, u8), | ||||
| 	GetI (u8, u8, u8), | ||||
| 	EqK (u8, u8, u8), | ||||
| 	Jmp (i32), | ||||
| 	LoadK (u8, i32), | ||||
| 	Call (u8, u8, u8), | ||||
| 	Closure (u8, i32), | ||||
| 	Return (u8, u8, u8), | ||||
| } | ||||
| 
 | ||||
| #[derive (Clone, Debug, PartialEq)] | ||||
| enum Value { | ||||
| 	Nil, | ||||
| 	False, | ||||
| 	True, | ||||
| 	Float (f64), | ||||
| 	String (String), | ||||
| 	
 | ||||
| 	// These are all bogus, I haven't figured out how to implement
 | ||||
| 	// tables and function pointers yet
 | ||||
| 	
 | ||||
| 	BogusArg, | ||||
| 	BogusPrint, | ||||
| } | ||||
| 
 | ||||
| impl Default for Value { | ||||
| 	fn default () -> Self { | ||||
| 		Self::Nil | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| impl From <String> for Value { | ||||
| 	fn from (x: String) -> Self { | ||||
| 		Self::String (x) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| impl From <&str> for Value { | ||||
| 	fn from (x: &str) -> Self { | ||||
| 		Self::from (String::from (x)) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| struct Chunk { | ||||
| 	instructions: Vec <Instruction>, | ||||
| 	constants: Vec <Value>, | ||||
| } | ||||
| 
 | ||||
| fn main() { | ||||
| 	let arg: Vec <_> = std::env::args ().collect (); | ||||
| 	
 | ||||
| 	let chunk = Chunk { | ||||
| 		instructions: vec! [ | ||||
| 			Instruction::VarArgPrep (0), | ||||
| 			Instruction::GetTabUp (0, 0, 0), | ||||
| 			Instruction::GetI (0, 0, 1), | ||||
| 			Instruction::EqK (0, 1, 0), | ||||
| 			Instruction::Jmp (4), | ||||
| 			Instruction::GetTabUp (0, 0, 2), | ||||
| 			Instruction::LoadK (1, 3), | ||||
| 			Instruction::Call (0, 2, 1), | ||||
| 			Instruction::Jmp (3), | ||||
| 			Instruction::GetTabUp (0, 0, 2), | ||||
| 			Instruction::LoadK (1, 4), | ||||
| 			Instruction::Call (0, 2, 1), | ||||
| 			Instruction::Return (1, 1, 1), | ||||
| 		], | ||||
| 		constants: vec! [ | ||||
| 			"arg", | ||||
| 			"93", | ||||
| 			"print", | ||||
| 			"it's 93", | ||||
| 			"it's not 93", | ||||
| 		].into_iter ().map (|s| Value::from (s)).collect (), | ||||
| 	}; | ||||
| 	
 | ||||
| 	let mut registers = vec! [Value::default (); 256]; | ||||
| 	
 | ||||
| 	
 | ||||
| 	let mut program_counter = 0i32; | ||||
| 	let max_iters = 2000; | ||||
| 	
 | ||||
| 	for _ in 0..max_iters { | ||||
| 		let instruction = chunk.instructions.get (usize::try_from (program_counter).unwrap ()).unwrap (); | ||||
| 		
 | ||||
| 		let r = &mut registers; | ||||
| 		let k = &chunk.constants; | ||||
| 		
 | ||||
| 		match instruction { | ||||
| 			Instruction::Call (a, b, c) => { | ||||
| 				// Take arguments from registers [a + 1, a + b)
 | ||||
| 				// Call the function in register [a]
 | ||||
| 				// Return values in registers [a, a + c - 1)
 | ||||
| 				// 
 | ||||
| 				// That is, call a with b - 1 arguments and expect c returns
 | ||||
| 				// 
 | ||||
| 				// e.g. CALL 0 2 1 mean "Call 0 with 1 argument, return 1 value", like for printing a constant
 | ||||
| 				
 | ||||
| 				// TODO: Only implement printing constants for now
 | ||||
| 				
 | ||||
| 				let a = usize::try_from (*a).unwrap (); | ||||
| 				
 | ||||
| 				assert_eq! (*b, 2); | ||||
| 				assert_eq! (*c, 1); | ||||
| 				
 | ||||
| 				println! ("{:?}", r [a + 1]); | ||||
| 			}, | ||||
| 			Instruction::EqK (a, b, c_k) => { | ||||
| 				let a = usize::try_from (*a).unwrap (); | ||||
| 				let b = usize::try_from (*b).unwrap (); | ||||
| 				
 | ||||
| 				let equal = r [a] == k [b]; | ||||
| 				
 | ||||
| 				match (equal, c_k) { | ||||
| 					(true, 0) => program_counter += 1, | ||||
| 					(false, 1) => program_counter += 1, | ||||
| 					_ => (), | ||||
| 				} | ||||
| 			}, | ||||
| 			Instruction::GetTabUp (a, b, c) => { | ||||
| 				let a = usize::try_from (*a).unwrap (); | ||||
| 				let b = usize::try_from (*b).unwrap (); | ||||
| 				let c = usize::try_from (*c).unwrap (); | ||||
| 				
 | ||||
| 				// Only supported upvalue is `_ENV`
 | ||||
| 				assert_eq! (b, 0); | ||||
| 				
 | ||||
| 				let key = k.get (c).unwrap (); | ||||
| 				let value = match key { | ||||
| 					Value::String (s) => match s.as_str() { | ||||
| 						"arg" => Value::BogusArg, | ||||
| 						"print" => Value::BogusPrint, | ||||
| 						_ => panic! ("key not in _ENV upvalue"), | ||||
| 					}, | ||||
| 					_ => unimplemented!(), | ||||
| 				}; | ||||
| 				
 | ||||
| 				r [a] = value; | ||||
| 			}, | ||||
| 			Instruction::GetI (a, b, c) => { | ||||
| 				let a = usize::try_from (*a).unwrap (); | ||||
| 				let b = usize::try_from (*b).unwrap (); | ||||
| 				let c = usize::try_from (*c).unwrap (); | ||||
| 				
 | ||||
| 				let table = r.get (b).unwrap (); | ||||
| 				let value = match table { | ||||
| 					Value::BogusArg => arg.get (c).unwrap ().as_str().into (), | ||||
| 					_ => unimplemented!(), | ||||
| 				}; | ||||
| 				
 | ||||
| 				r [a] = value; | ||||
| 			}, | ||||
| 			Instruction::Jmp (sJ) => program_counter += sJ, | ||||
| 			Instruction::LoadK (a, bx) => { | ||||
| 				let a  = usize::try_from  (*a).unwrap (); | ||||
| 				let bx = usize::try_from (*bx).unwrap (); | ||||
| 				
 | ||||
| 				r [a] = k [bx].clone (); | ||||
| 			}, | ||||
| 			Instruction::Return (_a, _b, _c) => { | ||||
| 				break; | ||||
| 			}, | ||||
| 			Instruction::VarArgPrep (_) => (), | ||||
| 			_ => (), | ||||
| 		} | ||||
| 		
 | ||||
| 		program_counter += 1; | ||||
| 	} | ||||
| } | ||||
										
											Binary file not shown.
										
									
								
							|  | @ -0,0 +1,15 @@ | |||
| -- Put some very long comment here so the error will | ||||
| -- have an interesting line number in the traceback | ||||
| --  | ||||
| --  | ||||
| --  | ||||
| --  | ||||
| --  | ||||
| --  | ||||
| --  | ||||
| --  | ||||
| --  | ||||
| --  | ||||
| --  | ||||
| 
 | ||||
| error ("bogus") | ||||
										
											Binary file not shown.
										
									
								
							|  | @ -0,0 +1 @@ | |||
| print "Hello." | ||||
										
											Binary file not shown.
										
									
								
							|  | @ -0,0 +1,9 @@ | |||
| if arg [1] == "93" then | ||||
| 	print "it's 93" | ||||
| else | ||||
| 	print "it's not 93" | ||||
| end | ||||
| 
 | ||||
| local function unused_fn () | ||||
| 	print "unused" | ||||
| end | ||||
										
											Binary file not shown.
										
									
								
							|  | @ -0,0 +1 @@ | |||
| return 0 | ||||
										
											Binary file not shown.
										
									
								
							|  | @ -0,0 +1,5 @@ | |||
| local function add (a, b) | ||||
| 	return a + b | ||||
| end | ||||
| 
 | ||||
| print (("1 + 2 = %i"):format (add (1, 2))) | ||||
										
											Binary file not shown.
										
									
								
							|  | @ -0,0 +1,9 @@ | |||
| local function add (a, b) | ||||
| 	return a + b | ||||
| end | ||||
| 
 | ||||
| local function sub (a, b) | ||||
| 	return a - b | ||||
| end | ||||
| 
 | ||||
| print (("1 + 2 = %i"):format (add (1, 2))) | ||||
										
											Binary file not shown.
										
									
								
							
		Loading…
	
		Reference in New Issue
	
	 _
						_