Lua virtual machine in Rust, why the heck not.
commit
b07de4810d
|
@ -0,0 +1,2 @@
|
||||||
|
/target
|
||||||
|
/untracked
|
|
@ -0,0 +1,7 @@
|
||||||
|
# This file is automatically @generated by Cargo.
|
||||||
|
# It is not intended for manual editing.
|
||||||
|
version = 3
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "lua_why_not"
|
||||||
|
version = "0.1.0"
|
|
@ -0,0 +1,8 @@
|
||||||
|
[package]
|
||||||
|
name = "lua_why_not"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
|
@ -0,0 +1,176 @@
|
||||||
|
Lua source code
|
||||||
|
|
||||||
|
`hello.lua`
|
||||||
|
|
||||||
|
```lua
|
||||||
|
print "Hello."
|
||||||
|
```
|
||||||
|
|
||||||
|
`math.lua`
|
||||||
|
|
||||||
|
```lua
|
||||||
|
local function add (a, b)
|
||||||
|
return a + b
|
||||||
|
end
|
||||||
|
|
||||||
|
print (("1 + 2 = %i"):format (add (1, 2)))
|
||||||
|
```
|
||||||
|
|
||||||
|
luac5.4 listing
|
||||||
|
|
||||||
|
```
|
||||||
|
main <hello.lua:0,0> (5 instructions at 0x564f4fd74cc0)
|
||||||
|
0+ params, 2 slots, 1 upvalue, 0 locals, 2 constants, 0 functions
|
||||||
|
1 [1] VARARGPREP 0
|
||||||
|
2 [1] GETTABUP 0 0 0 ; _ENV "print"
|
||||||
|
3 [1] LOADK 1 1 ; "Hello."
|
||||||
|
4 [1] CALL 0 2 1 ; 1 in 0 out
|
||||||
|
5 [1] RETURN 0 1 1 ; 0 out
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
main <math.lua:0,0> (12 instructions at 0x55ee2417acc0)
|
||||||
|
0+ params, 7 slots, 1 upvalue, 1 local, 3 constants, 1 function
|
||||||
|
1 [1] VARARGPREP 0
|
||||||
|
2 [3] CLOSURE 0 0 ; 0x55ee2417b000
|
||||||
|
3 [5] GETTABUP 1 0 0 ; _ENV "print"
|
||||||
|
4 [5] LOADK 2 1 ; "1 + 2 = %i"
|
||||||
|
5 [5] SELF 2 2 2k ; "format"
|
||||||
|
6 [5] MOVE 4 0
|
||||||
|
7 [5] LOADI 5 1
|
||||||
|
8 [5] LOADI 6 2
|
||||||
|
9 [5] CALL 4 3 0 ; 2 in all out
|
||||||
|
10 [5] CALL 2 0 0 ; all in all out
|
||||||
|
11 [5] CALL 1 0 1 ; all in 0 out
|
||||||
|
12 [5] RETURN 1 1 1 ; 0 out
|
||||||
|
|
||||||
|
function <math.lua:1,3> (4 instructions at 0x55ee2417b000)
|
||||||
|
2 params, 3 slots, 0 upvalues, 2 locals, 0 constants, 0 functions
|
||||||
|
1 [2] ADD 2 0 1
|
||||||
|
2 [2] MMBIN 0 1 6 ; __add
|
||||||
|
3 [2] RETURN1 2
|
||||||
|
4 [3] RETURN0
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
main <test_vectors/is_93.lua:0,0> (14 instructions at 0x559f55e1ecd0)
|
||||||
|
0+ params, 2 slots, 1 upvalue, 1 local, 5 constants, 1 function
|
||||||
|
1 [1] VARARGPREP 0
|
||||||
|
2 [1] GETTABUP 0 0 0 ; _ENV "arg"
|
||||||
|
3 [1] GETI 0 0 1
|
||||||
|
4 [1] EQK 0 1 0 ; "93"
|
||||||
|
5 [1] JMP 4 ; to 10
|
||||||
|
6 [2] GETTABUP 0 0 2 ; _ENV "print"
|
||||||
|
7 [2] LOADK 1 3 ; "it's 93"
|
||||||
|
8 [2] CALL 0 2 1 ; 1 in 0 out
|
||||||
|
9 [2] JMP 3 ; to 13
|
||||||
|
10 [4] GETTABUP 0 0 2 ; _ENV "print"
|
||||||
|
11 [4] LOADK 1 4 ; "it's not 93"
|
||||||
|
12 [4] CALL 0 2 1 ; 1 in 0 out
|
||||||
|
13 [9] CLOSURE 0 0 ; 0x559f55e1f3d0
|
||||||
|
14 [9] RETURN 1 1 1 ; 0 out
|
||||||
|
constants (5) for 0x559f55e1ecd0:
|
||||||
|
0 S "arg"
|
||||||
|
1 S "93"
|
||||||
|
2 S "print"
|
||||||
|
3 S "it's 93"
|
||||||
|
4 S "it's not 93"
|
||||||
|
locals (1) for 0x559f55e1ecd0:
|
||||||
|
0 unused_fn 14 15
|
||||||
|
upvalues (1) for 0x559f55e1ecd0:
|
||||||
|
0 _ENV 1 0
|
||||||
|
|
||||||
|
function <test_vectors/is_93.lua:7,9> (4 instructions at 0x559f55e1f3d0)
|
||||||
|
0 params, 2 slots, 1 upvalue, 0 locals, 2 constants, 0 functions
|
||||||
|
1 [8] GETTABUP 0 0 0 ; _ENV "print"
|
||||||
|
2 [8] LOADK 1 1 ; "unused"
|
||||||
|
3 [8] CALL 0 2 1 ; 1 in 0 out
|
||||||
|
4 [9] RETURN0
|
||||||
|
constants (2) for 0x559f55e1f3d0:
|
||||||
|
0 S "print"
|
||||||
|
1 S "unused"
|
||||||
|
locals (0) for 0x559f55e1f3d0:
|
||||||
|
upvalues (1) for 0x559f55e1f3d0:
|
||||||
|
0 _ENV 0 0
|
||||||
|
```
|
||||||
|
|
||||||
|
Octal dump of luac5.4 byte code
|
||||||
|
|
||||||
|
```
|
||||||
|
0000000 1b 4c 75 61 54 00 19 93 0d 0a 1a 0a 04 08 08 78 >.LuaT..........x<
|
||||||
|
0000020 56 00 00 00 00 00 00 00 00 00 00 00 28 77 40 01 >V...........(w@.<
|
||||||
|
0000040 8b 40 68 65 6c 6c 6f 2e 6c 75 61 80 80 00 01 02 >.@hello.lua.....<
|
||||||
|
0000060 85 51 00 00 00 0b 00 00 00 83 80 00 00 44 00 02 >.Q...........D..<
|
||||||
|
0000100 01 46 00 01 01 82 04 86 70 72 69 6e 74 04 87 48 >.F......print..H<
|
||||||
|
0000120 65 6c 6c 6f 2e 81 01 00 00 80 85 01 00 00 00 00 >ello............<
|
||||||
|
0000140 80 80 81 85 5f 45 4e 56 >...._ENV<
|
||||||
|
0000150
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
0000000 1b 4c 75 61 54 00 19 93 0d 0a 1a 0a 04 08 08 78 >.LuaT..........x<
|
||||||
|
0000020 56 00 00 00 00 00 00 00 00 00 00 00 28 77 40 01 >V...........(w@.<
|
||||||
|
0000040 8a 40 6d 61 74 68 2e 6c 75 61 80 80 00 01 07 8c >.@math.lua......<
|
||||||
|
0000060 51 00 00 00 4f 00 00 00 8b 00 00 00 03 81 00 00 >Q...O...........<
|
||||||
|
0000100 14 81 02 02 00 02 00 00 81 02 00 80 01 83 00 80 >................<
|
||||||
|
0000120 44 02 03 00 44 01 00 00 c4 00 00 01 c6 00 01 01 >D...D...........<
|
||||||
|
0000140 83 04 86 70 72 69 6e 74 04 8b 31 20 2b 20 32 20 >...print..1 + 2 <
|
||||||
|
0000160 3d 20 25 69 04 87 66 6f 72 6d 61 74 81 01 00 00 >= %i..format....<
|
||||||
|
0000200 81 80 81 83 02 00 03 84 22 01 00 01 2e 00 01 06 >........".......<
|
||||||
|
0000220 48 01 02 00 47 01 01 00 80 80 80 84 01 00 00 01 >H...G...........<
|
||||||
|
0000240 80 82 82 61 80 84 82 62 80 84 80 8c 01 02 02 00 >...a...b........<
|
||||||
|
0000260 00 00 00 00 00 00 00 00 80 81 84 61 64 64 82 8c >...........add..<
|
||||||
|
0000300 81 85 5f 45 4e 56 >.._ENV<
|
||||||
|
0000306
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
# Interpretation of byte code
|
||||||
|
|
||||||
|
Overall structure
|
||||||
|
|
||||||
|
- Roughly 32 byte header with magic number, version number, etc.
|
||||||
|
- File name
|
||||||
|
- `80 80 00 01 02 85` header for main function
|
||||||
|
- Packed 4-byte instructions for main function
|
||||||
|
- `82` or `83` length prefix for string table
|
||||||
|
- String table for main function
|
||||||
|
- `81 01 00 00 81 80 81 83 02 00 03 84` header for "add" function
|
||||||
|
- Packed 4-byte instructions for "add" function
|
||||||
|
- `80 80 80 84 01 00 00 01 80` Header for file-scope debug symbols?
|
||||||
|
- File-scope debug symbols?
|
||||||
|
- String table for entire file?
|
||||||
|
|
||||||
|
## Bytecodes
|
||||||
|
|
||||||
|
Per lopcodes.h, instructions are 32 bits long, always.
|
||||||
|
The opcode is encoded in the first (highest?) 7 bits.
|
||||||
|
|
||||||
|
```
|
||||||
|
83 80 00 00 ; 0x03 = 3 = LOADK
|
||||||
|
0b 00 00 00 ; 0x0b = 11 = GETTABUP
|
||||||
|
22 01 00 01 ; 0x22 = 34 = ADD
|
||||||
|
2e 00 01 06 ; 0x2e = 46 = MMBIN
|
||||||
|
c4 00 00 01 ; 0xc4 = 68 = CALL
|
||||||
|
44 00 02 01 ; 0x44 = 68 = CALL
|
||||||
|
46 00 01 01 ; 0x46 = 70 = RETURN
|
||||||
|
47 01 01 00 ; 0x47 = 71 = RETURN0
|
||||||
|
48 01 02 00 ; 0x48 = 72 = RETURN1
|
||||||
|
51 00 00 00 ; 0x51 = 81 = VARARGPREP
|
||||||
|
```
|
||||||
|
|
||||||
|
## Strings
|
||||||
|
|
||||||
|
Filenames are encoded at the top, and there's a string table at the bottom.
|
||||||
|
|
||||||
|
Strings appear to be prefixed with a variable-length length prefix.
|
||||||
|
There is an extra byte before each string which I can't account for,
|
||||||
|
and the lengths seem to be off by one, e.g. 0x84 is a length of 3, not 4.
|
||||||
|
|
||||||
|
| --- |
|
||||||
|
| "add" | 81 84 61 64 64
|
||||||
|
| "_ENV" | 81 85 5f 45 4e 56
|
||||||
|
| "print" | 04 86 70 72 69 6e 74
|
||||||
|
| "format" | 04 87 66 6f 72 6d 61 74
|
||||||
|
| "@math.lua" | 01 8a 40 6d 61 74 68 2e 6c 75 61
|
||||||
|
| "1 + 2 = %i" | 04 8b 31 20 2b 20 32 20 3d 20 25 69
|
|
@ -0,0 +1,171 @@
|
||||||
|
enum Instruction {
|
||||||
|
VarArgPrep (i32),
|
||||||
|
GetTabUp (u8, u8, u8),
|
||||||
|
GetI (u8, u8, u8),
|
||||||
|
EqK (u8, u8, u8),
|
||||||
|
Jmp (i32),
|
||||||
|
LoadK (u8, i32),
|
||||||
|
Call (u8, u8, u8),
|
||||||
|
Closure (u8, i32),
|
||||||
|
Return (u8, u8, u8),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive (Clone, Debug, PartialEq)]
|
||||||
|
enum Value {
|
||||||
|
Nil,
|
||||||
|
False,
|
||||||
|
True,
|
||||||
|
Float (f64),
|
||||||
|
String (String),
|
||||||
|
|
||||||
|
// These are all bogus, I haven't figured out how to implement
|
||||||
|
// tables and function pointers yet
|
||||||
|
|
||||||
|
BogusArg,
|
||||||
|
BogusPrint,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for Value {
|
||||||
|
fn default () -> Self {
|
||||||
|
Self::Nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From <String> for Value {
|
||||||
|
fn from (x: String) -> Self {
|
||||||
|
Self::String (x)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From <&str> for Value {
|
||||||
|
fn from (x: &str) -> Self {
|
||||||
|
Self::from (String::from (x))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Chunk {
|
||||||
|
instructions: Vec <Instruction>,
|
||||||
|
constants: Vec <Value>,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let arg: Vec <_> = std::env::args ().collect ();
|
||||||
|
|
||||||
|
let chunk = Chunk {
|
||||||
|
instructions: vec! [
|
||||||
|
Instruction::VarArgPrep (0),
|
||||||
|
Instruction::GetTabUp (0, 0, 0),
|
||||||
|
Instruction::GetI (0, 0, 1),
|
||||||
|
Instruction::EqK (0, 1, 0),
|
||||||
|
Instruction::Jmp (4),
|
||||||
|
Instruction::GetTabUp (0, 0, 2),
|
||||||
|
Instruction::LoadK (1, 3),
|
||||||
|
Instruction::Call (0, 2, 1),
|
||||||
|
Instruction::Jmp (3),
|
||||||
|
Instruction::GetTabUp (0, 0, 2),
|
||||||
|
Instruction::LoadK (1, 4),
|
||||||
|
Instruction::Call (0, 2, 1),
|
||||||
|
Instruction::Return (1, 1, 1),
|
||||||
|
],
|
||||||
|
constants: vec! [
|
||||||
|
"arg",
|
||||||
|
"93",
|
||||||
|
"print",
|
||||||
|
"it's 93",
|
||||||
|
"it's not 93",
|
||||||
|
].into_iter ().map (|s| Value::from (s)).collect (),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut registers = vec! [Value::default (); 256];
|
||||||
|
|
||||||
|
|
||||||
|
let mut program_counter = 0i32;
|
||||||
|
let max_iters = 2000;
|
||||||
|
|
||||||
|
for _ in 0..max_iters {
|
||||||
|
let instruction = chunk.instructions.get (usize::try_from (program_counter).unwrap ()).unwrap ();
|
||||||
|
|
||||||
|
let r = &mut registers;
|
||||||
|
let k = &chunk.constants;
|
||||||
|
|
||||||
|
match instruction {
|
||||||
|
Instruction::Call (a, b, c) => {
|
||||||
|
// Take arguments from registers [a + 1, a + b)
|
||||||
|
// Call the function in register [a]
|
||||||
|
// Return values in registers [a, a + c - 1)
|
||||||
|
//
|
||||||
|
// That is, call a with b - 1 arguments and expect c returns
|
||||||
|
//
|
||||||
|
// e.g. CALL 0 2 1 mean "Call 0 with 1 argument, return 1 value", like for printing a constant
|
||||||
|
|
||||||
|
// TODO: Only implement printing constants for now
|
||||||
|
|
||||||
|
let a = usize::try_from (*a).unwrap ();
|
||||||
|
|
||||||
|
assert_eq! (*b, 2);
|
||||||
|
assert_eq! (*c, 1);
|
||||||
|
|
||||||
|
println! ("{:?}", r [a + 1]);
|
||||||
|
},
|
||||||
|
Instruction::EqK (a, b, c_k) => {
|
||||||
|
let a = usize::try_from (*a).unwrap ();
|
||||||
|
let b = usize::try_from (*b).unwrap ();
|
||||||
|
|
||||||
|
let equal = r [a] == k [b];
|
||||||
|
|
||||||
|
match (equal, c_k) {
|
||||||
|
(true, 0) => program_counter += 1,
|
||||||
|
(false, 1) => program_counter += 1,
|
||||||
|
_ => (),
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Instruction::GetTabUp (a, b, c) => {
|
||||||
|
let a = usize::try_from (*a).unwrap ();
|
||||||
|
let b = usize::try_from (*b).unwrap ();
|
||||||
|
let c = usize::try_from (*c).unwrap ();
|
||||||
|
|
||||||
|
// Only supported upvalue is `_ENV`
|
||||||
|
assert_eq! (b, 0);
|
||||||
|
|
||||||
|
let key = k.get (c).unwrap ();
|
||||||
|
let value = match key {
|
||||||
|
Value::String (s) => match s.as_str() {
|
||||||
|
"arg" => Value::BogusArg,
|
||||||
|
"print" => Value::BogusPrint,
|
||||||
|
_ => panic! ("key not in _ENV upvalue"),
|
||||||
|
},
|
||||||
|
_ => unimplemented!(),
|
||||||
|
};
|
||||||
|
|
||||||
|
r [a] = value;
|
||||||
|
},
|
||||||
|
Instruction::GetI (a, b, c) => {
|
||||||
|
let a = usize::try_from (*a).unwrap ();
|
||||||
|
let b = usize::try_from (*b).unwrap ();
|
||||||
|
let c = usize::try_from (*c).unwrap ();
|
||||||
|
|
||||||
|
let table = r.get (b).unwrap ();
|
||||||
|
let value = match table {
|
||||||
|
Value::BogusArg => arg.get (c).unwrap ().as_str().into (),
|
||||||
|
_ => unimplemented!(),
|
||||||
|
};
|
||||||
|
|
||||||
|
r [a] = value;
|
||||||
|
},
|
||||||
|
Instruction::Jmp (sJ) => program_counter += sJ,
|
||||||
|
Instruction::LoadK (a, bx) => {
|
||||||
|
let a = usize::try_from (*a).unwrap ();
|
||||||
|
let bx = usize::try_from (*bx).unwrap ();
|
||||||
|
|
||||||
|
r [a] = k [bx].clone ();
|
||||||
|
},
|
||||||
|
Instruction::Return (_a, _b, _c) => {
|
||||||
|
break;
|
||||||
|
},
|
||||||
|
Instruction::VarArgPrep (_) => (),
|
||||||
|
_ => (),
|
||||||
|
}
|
||||||
|
|
||||||
|
program_counter += 1;
|
||||||
|
}
|
||||||
|
}
|
Binary file not shown.
|
@ -0,0 +1,15 @@
|
||||||
|
-- Put some very long comment here so the error will
|
||||||
|
-- have an interesting line number in the traceback
|
||||||
|
--
|
||||||
|
--
|
||||||
|
--
|
||||||
|
--
|
||||||
|
--
|
||||||
|
--
|
||||||
|
--
|
||||||
|
--
|
||||||
|
--
|
||||||
|
--
|
||||||
|
--
|
||||||
|
|
||||||
|
error ("bogus")
|
Binary file not shown.
|
@ -0,0 +1 @@
|
||||||
|
print "Hello."
|
Binary file not shown.
|
@ -0,0 +1,9 @@
|
||||||
|
if arg [1] == "93" then
|
||||||
|
print "it's 93"
|
||||||
|
else
|
||||||
|
print "it's not 93"
|
||||||
|
end
|
||||||
|
|
||||||
|
local function unused_fn ()
|
||||||
|
print "unused"
|
||||||
|
end
|
Binary file not shown.
|
@ -0,0 +1 @@
|
||||||
|
return 0
|
Binary file not shown.
|
@ -0,0 +1,5 @@
|
||||||
|
local function add (a, b)
|
||||||
|
return a + b
|
||||||
|
end
|
||||||
|
|
||||||
|
print (("1 + 2 = %i"):format (add (1, 2)))
|
Binary file not shown.
|
@ -0,0 +1,9 @@
|
||||||
|
local function add (a, b)
|
||||||
|
return a + b
|
||||||
|
end
|
||||||
|
|
||||||
|
local function sub (a, b)
|
||||||
|
return a - b
|
||||||
|
end
|
||||||
|
|
||||||
|
print (("1 + 2 = %i"):format (add (1, 2)))
|
Binary file not shown.
Loading…
Reference in New Issue