From b07de4810dce3b6d0198e2c9151999cf555335c3 Mon Sep 17 00:00:00 2001 From: _ <_@_> Date: Sun, 24 Sep 2023 16:16:10 -0500 Subject: [PATCH] Lua virtual machine in Rust, why the heck not. --- .gitignore | 2 + Cargo.lock | 7 ++ Cargo.toml | 8 ++ notes.md | 176 ++++++++++++++++++++++++++++++ src/main.rs | 171 +++++++++++++++++++++++++++++ test_vectors/combined.luac | Bin 0 -> 323 bytes test_vectors/error.lua | 15 +++ test_vectors/error.luac | Bin 0 -> 103 bytes test_vectors/hello.lua | 1 + test_vectors/hello.luac | Bin 0 -> 104 bytes test_vectors/is_93.lua | 9 ++ test_vectors/is_93.luac | Bin 0 -> 180 bytes test_vectors/long_named_file.lua | 1 + test_vectors/long_named_file.luac | Bin 0 -> 94 bytes test_vectors/math.lua | 5 + test_vectors/math.luac | Bin 0 -> 198 bytes test_vectors/math_2.lua | 9 ++ test_vectors/math_2.luac | Bin 0 -> 253 bytes 18 files changed, 404 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 notes.md create mode 100644 src/main.rs create mode 100644 test_vectors/combined.luac create mode 100644 test_vectors/error.lua create mode 100644 test_vectors/error.luac create mode 100644 test_vectors/hello.lua create mode 100644 test_vectors/hello.luac create mode 100644 test_vectors/is_93.lua create mode 100644 test_vectors/is_93.luac create mode 100644 test_vectors/long_named_file.lua create mode 100644 test_vectors/long_named_file.luac create mode 100644 test_vectors/math.lua create mode 100644 test_vectors/math.luac create mode 100644 test_vectors/math_2.lua create mode 100644 test_vectors/math_2.luac diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..350f73e --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +/untracked diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..1c68fe3 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "lua_why_not" +version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..87c9af0 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "lua_why_not" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/notes.md b/notes.md new file mode 100644 index 0000000..1d23d4d --- /dev/null +++ b/notes.md @@ -0,0 +1,176 @@ +Lua source code + +`hello.lua` + +```lua +print "Hello." +``` + +`math.lua` + +```lua +local function add (a, b) + return a + b +end + +print (("1 + 2 = %i"):format (add (1, 2))) +``` + +luac5.4 listing + +``` +main (5 instructions at 0x564f4fd74cc0) +0+ params, 2 slots, 1 upvalue, 0 locals, 2 constants, 0 functions + 1 [1] VARARGPREP 0 + 2 [1] GETTABUP 0 0 0 ; _ENV "print" + 3 [1] LOADK 1 1 ; "Hello." + 4 [1] CALL 0 2 1 ; 1 in 0 out + 5 [1] RETURN 0 1 1 ; 0 out +``` + +``` +main (12 instructions at 0x55ee2417acc0) +0+ params, 7 slots, 1 upvalue, 1 local, 3 constants, 1 function + 1 [1] VARARGPREP 0 + 2 [3] CLOSURE 0 0 ; 0x55ee2417b000 + 3 [5] GETTABUP 1 0 0 ; _ENV "print" + 4 [5] LOADK 2 1 ; "1 + 2 = %i" + 5 [5] SELF 2 2 2k ; "format" + 6 [5] MOVE 4 0 + 7 [5] LOADI 5 1 + 8 [5] LOADI 6 2 + 9 [5] CALL 4 3 0 ; 2 in all out + 10 [5] CALL 2 0 0 ; all in all out + 11 [5] CALL 1 0 1 ; all in 0 out + 12 [5] RETURN 1 1 1 ; 0 out + +function (4 instructions at 0x55ee2417b000) +2 params, 3 slots, 0 upvalues, 2 locals, 0 constants, 0 functions + 1 [2] ADD 2 0 1 + 2 [2] MMBIN 0 1 6 ; __add + 3 [2] RETURN1 2 + 4 [3] RETURN0 +``` + +``` +main (14 instructions at 0x559f55e1ecd0) +0+ params, 2 slots, 1 upvalue, 1 local, 5 constants, 1 function + 1 [1] VARARGPREP 0 + 2 [1] GETTABUP 0 0 0 ; _ENV "arg" + 3 [1] GETI 0 0 1 + 4 [1] EQK 0 1 0 ; "93" + 5 [1] JMP 4 ; to 10 + 6 [2] GETTABUP 0 0 2 ; _ENV "print" + 7 [2] LOADK 1 3 ; "it's 93" + 8 [2] CALL 0 2 1 ; 1 in 0 out + 9 [2] JMP 3 ; to 13 + 10 [4] GETTABUP 0 0 2 ; _ENV "print" + 11 [4] LOADK 1 4 ; "it's not 93" + 12 [4] CALL 0 2 1 ; 1 in 0 out + 13 [9] CLOSURE 0 0 ; 0x559f55e1f3d0 + 14 [9] RETURN 1 1 1 ; 0 out +constants (5) for 0x559f55e1ecd0: + 0 S "arg" + 1 S "93" + 2 S "print" + 3 S "it's 93" + 4 S "it's not 93" +locals (1) for 0x559f55e1ecd0: + 0 unused_fn 14 15 +upvalues (1) for 0x559f55e1ecd0: + 0 _ENV 1 0 + +function (4 instructions at 0x559f55e1f3d0) +0 params, 2 slots, 1 upvalue, 0 locals, 2 constants, 0 functions + 1 [8] GETTABUP 0 0 0 ; _ENV "print" + 2 [8] LOADK 1 1 ; "unused" + 3 [8] CALL 0 2 1 ; 1 in 0 out + 4 [9] RETURN0 +constants (2) for 0x559f55e1f3d0: + 0 S "print" + 1 S "unused" +locals (0) for 0x559f55e1f3d0: +upvalues (1) for 0x559f55e1f3d0: + 0 _ENV 0 0 +``` + +Octal dump of luac5.4 byte code + +``` +0000000 1b 4c 75 61 54 00 19 93 0d 0a 1a 0a 04 08 08 78 >.LuaT..........x< +0000020 56 00 00 00 00 00 00 00 00 00 00 00 28 77 40 01 >V...........(w@.< +0000040 8b 40 68 65 6c 6c 6f 2e 6c 75 61 80 80 00 01 02 >.@hello.lua.....< +0000060 85 51 00 00 00 0b 00 00 00 83 80 00 00 44 00 02 >.Q...........D..< +0000100 01 46 00 01 01 82 04 86 70 72 69 6e 74 04 87 48 >.F......print..H< +0000120 65 6c 6c 6f 2e 81 01 00 00 80 85 01 00 00 00 00 >ello............< +0000140 80 80 81 85 5f 45 4e 56 >...._ENV< +0000150 +``` + +``` +0000000 1b 4c 75 61 54 00 19 93 0d 0a 1a 0a 04 08 08 78 >.LuaT..........x< +0000020 56 00 00 00 00 00 00 00 00 00 00 00 28 77 40 01 >V...........(w@.< +0000040 8a 40 6d 61 74 68 2e 6c 75 61 80 80 00 01 07 8c >.@math.lua......< +0000060 51 00 00 00 4f 00 00 00 8b 00 00 00 03 81 00 00 >Q...O...........< +0000100 14 81 02 02 00 02 00 00 81 02 00 80 01 83 00 80 >................< +0000120 44 02 03 00 44 01 00 00 c4 00 00 01 c6 00 01 01 >D...D...........< +0000140 83 04 86 70 72 69 6e 74 04 8b 31 20 2b 20 32 20 >...print..1 + 2 < +0000160 3d 20 25 69 04 87 66 6f 72 6d 61 74 81 01 00 00 >= %i..format....< +0000200 81 80 81 83 02 00 03 84 22 01 00 01 2e 00 01 06 >........".......< +0000220 48 01 02 00 47 01 01 00 80 80 80 84 01 00 00 01 >H...G...........< +0000240 80 82 82 61 80 84 82 62 80 84 80 8c 01 02 02 00 >...a...b........< +0000260 00 00 00 00 00 00 00 00 80 81 84 61 64 64 82 8c >...........add..< +0000300 81 85 5f 45 4e 56 >.._ENV< +0000306 + +``` + +# Interpretation of byte code + +Overall structure + +- Roughly 32 byte header with magic number, version number, etc. +- File name +- `80 80 00 01 02 85` header for main function +- Packed 4-byte instructions for main function +- `82` or `83` length prefix for string table +- String table for main function +- `81 01 00 00 81 80 81 83 02 00 03 84` header for "add" function +- Packed 4-byte instructions for "add" function +- `80 80 80 84 01 00 00 01 80` Header for file-scope debug symbols? +- File-scope debug symbols? +- String table for entire file? + +## Bytecodes + +Per lopcodes.h, instructions are 32 bits long, always. +The opcode is encoded in the first (highest?) 7 bits. + +``` +83 80 00 00 ; 0x03 = 3 = LOADK +0b 00 00 00 ; 0x0b = 11 = GETTABUP +22 01 00 01 ; 0x22 = 34 = ADD +2e 00 01 06 ; 0x2e = 46 = MMBIN +c4 00 00 01 ; 0xc4 = 68 = CALL +44 00 02 01 ; 0x44 = 68 = CALL +46 00 01 01 ; 0x46 = 70 = RETURN +47 01 01 00 ; 0x47 = 71 = RETURN0 +48 01 02 00 ; 0x48 = 72 = RETURN1 +51 00 00 00 ; 0x51 = 81 = VARARGPREP +``` + +## Strings + +Filenames are encoded at the top, and there's a string table at the bottom. + +Strings appear to be prefixed with a variable-length length prefix. +There is an extra byte before each string which I can't account for, +and the lengths seem to be off by one, e.g. 0x84 is a length of 3, not 4. + +| --- | +| "add" | 81 84 61 64 64 +| "_ENV" | 81 85 5f 45 4e 56 +| "print" | 04 86 70 72 69 6e 74 +| "format" | 04 87 66 6f 72 6d 61 74 +| "@math.lua" | 01 8a 40 6d 61 74 68 2e 6c 75 61 +| "1 + 2 = %i" | 04 8b 31 20 2b 20 32 20 3d 20 25 69 \ No newline at end of file diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..033655b --- /dev/null +++ b/src/main.rs @@ -0,0 +1,171 @@ +enum Instruction { + VarArgPrep (i32), + GetTabUp (u8, u8, u8), + GetI (u8, u8, u8), + EqK (u8, u8, u8), + Jmp (i32), + LoadK (u8, i32), + Call (u8, u8, u8), + Closure (u8, i32), + Return (u8, u8, u8), +} + +#[derive (Clone, Debug, PartialEq)] +enum Value { + Nil, + False, + True, + Float (f64), + String (String), + + // These are all bogus, I haven't figured out how to implement + // tables and function pointers yet + + BogusArg, + BogusPrint, +} + +impl Default for Value { + fn default () -> Self { + Self::Nil + } +} + +impl From for Value { + fn from (x: String) -> Self { + Self::String (x) + } +} + +impl From <&str> for Value { + fn from (x: &str) -> Self { + Self::from (String::from (x)) + } +} + +struct Chunk { + instructions: Vec , + constants: Vec , +} + +fn main() { + let arg: Vec <_> = std::env::args ().collect (); + + let chunk = Chunk { + instructions: vec! [ + Instruction::VarArgPrep (0), + Instruction::GetTabUp (0, 0, 0), + Instruction::GetI (0, 0, 1), + Instruction::EqK (0, 1, 0), + Instruction::Jmp (4), + Instruction::GetTabUp (0, 0, 2), + Instruction::LoadK (1, 3), + Instruction::Call (0, 2, 1), + Instruction::Jmp (3), + Instruction::GetTabUp (0, 0, 2), + Instruction::LoadK (1, 4), + Instruction::Call (0, 2, 1), + Instruction::Return (1, 1, 1), + ], + constants: vec! [ + "arg", + "93", + "print", + "it's 93", + "it's not 93", + ].into_iter ().map (|s| Value::from (s)).collect (), + }; + + let mut registers = vec! [Value::default (); 256]; + + + let mut program_counter = 0i32; + let max_iters = 2000; + + for _ in 0..max_iters { + let instruction = chunk.instructions.get (usize::try_from (program_counter).unwrap ()).unwrap (); + + let r = &mut registers; + let k = &chunk.constants; + + match instruction { + Instruction::Call (a, b, c) => { + // Take arguments from registers [a + 1, a + b) + // Call the function in register [a] + // Return values in registers [a, a + c - 1) + // + // That is, call a with b - 1 arguments and expect c returns + // + // e.g. CALL 0 2 1 mean "Call 0 with 1 argument, return 1 value", like for printing a constant + + // TODO: Only implement printing constants for now + + let a = usize::try_from (*a).unwrap (); + + assert_eq! (*b, 2); + assert_eq! (*c, 1); + + println! ("{:?}", r [a + 1]); + }, + Instruction::EqK (a, b, c_k) => { + let a = usize::try_from (*a).unwrap (); + let b = usize::try_from (*b).unwrap (); + + let equal = r [a] == k [b]; + + match (equal, c_k) { + (true, 0) => program_counter += 1, + (false, 1) => program_counter += 1, + _ => (), + } + }, + Instruction::GetTabUp (a, b, c) => { + let a = usize::try_from (*a).unwrap (); + let b = usize::try_from (*b).unwrap (); + let c = usize::try_from (*c).unwrap (); + + // Only supported upvalue is `_ENV` + assert_eq! (b, 0); + + let key = k.get (c).unwrap (); + let value = match key { + Value::String (s) => match s.as_str() { + "arg" => Value::BogusArg, + "print" => Value::BogusPrint, + _ => panic! ("key not in _ENV upvalue"), + }, + _ => unimplemented!(), + }; + + r [a] = value; + }, + Instruction::GetI (a, b, c) => { + let a = usize::try_from (*a).unwrap (); + let b = usize::try_from (*b).unwrap (); + let c = usize::try_from (*c).unwrap (); + + let table = r.get (b).unwrap (); + let value = match table { + Value::BogusArg => arg.get (c).unwrap ().as_str().into (), + _ => unimplemented!(), + }; + + r [a] = value; + }, + Instruction::Jmp (sJ) => program_counter += sJ, + Instruction::LoadK (a, bx) => { + let a = usize::try_from (*a).unwrap (); + let bx = usize::try_from (*bx).unwrap (); + + r [a] = k [bx].clone (); + }, + Instruction::Return (_a, _b, _c) => { + break; + }, + Instruction::VarArgPrep (_) => (), + _ => (), + } + + program_counter += 1; + } +} diff --git a/test_vectors/combined.luac b/test_vectors/combined.luac new file mode 100644 index 0000000000000000000000000000000000000000..97bbdf80ff36e45f2e7dd7782f71e80067cd4dc1 GIT binary patch literal 323 zcmYL@!AiqG5QhJm1T7VBUerqvgo;qmdm)FYJ%~k+US+jP3(-nx(c|o9(~8N>R}!Dg zNkVX7n1TJ~|Mu{DW$v*5z1uzLhMmsG82{CKkLh#JOIPOcL0n(3F(l3z@$Nyql%%S*#CkKCxU%`B(#`S*Oi@W3 literal 0 HcmV?d00001 diff --git a/test_vectors/error.lua b/test_vectors/error.lua new file mode 100644 index 0000000..79c6a04 --- /dev/null +++ b/test_vectors/error.lua @@ -0,0 +1,15 @@ +-- Put some very long comment here so the error will +-- have an interesting line number in the traceback +-- +-- +-- +-- +-- +-- +-- +-- +-- +-- +-- + +error ("bogus") \ No newline at end of file diff --git a/test_vectors/error.luac b/test_vectors/error.luac new file mode 100644 index 0000000000000000000000000000000000000000..86a70982f36cb5d126fd8aaea44de1e22d37d960 GIT binary patch literal 103 zcmb34DNPJvketlRCB?YnU;>%K#OY)0~^)rj(EsgbZN)sCz z7#Nv)1A&UUftZ(pfzgJ6kzofTLj#b{)ZDye=`78b y#w=|GMVWaeEFGC8>ct8`eh-+DmtO+pG%_+UH1q-qpwU3g1f&`o8e8LC{lWm+&LxHb literal 0 HcmV?d00001 diff --git a/test_vectors/long_named_file.lua b/test_vectors/long_named_file.lua new file mode 100644 index 0000000..61c7ebf --- /dev/null +++ b/test_vectors/long_named_file.lua @@ -0,0 +1 @@ +return 0 diff --git a/test_vectors/long_named_file.luac b/test_vectors/long_named_file.luac new file mode 100644 index 0000000000000000000000000000000000000000..864430f5d89c563c1dd031d5d395613bae1d5293 GIT binary patch literal 94 zcmb34DNPJvketlRCB?^HZ(FaFf_CP35JG-#@2XOzc2vLxE4?V literal 0 HcmV?d00001 diff --git a/test_vectors/math.lua b/test_vectors/math.lua new file mode 100644 index 0000000..1e57079 --- /dev/null +++ b/test_vectors/math.lua @@ -0,0 +1,5 @@ +local function add (a, b) + return a + b +end + +print (("1 + 2 = %i"):format (add (1, 2))) diff --git a/test_vectors/math.luac b/test_vectors/math.luac new file mode 100644 index 0000000000000000000000000000000000000000..359c44b09bdc01287b448f797790bcdf49255c43 GIT binary patch literal 198 zcmYL?O$z~07=_P!=fQfi95vLF^BC0kudrhGM-SibkpMKL>nNq(2RkyFp= zIh|7-C2EH9t5~QMr0-wm_@{C1(xbamr%kv`6f-EFO`V=No(@j~B`pL5paqyxa8G#X z5%31+1EN&kj&Za<$!9BQ2JN5|)FXLcA7VGFT~D)41w30LPzcHmNnk((X0}cf8>85o VC0owOUH!t^s@2M5`nu?k=U?nMD#QQ) literal 0 HcmV?d00001 diff --git a/test_vectors/math_2.lua b/test_vectors/math_2.lua new file mode 100644 index 0000000..c9ee624 --- /dev/null +++ b/test_vectors/math_2.lua @@ -0,0 +1,9 @@ +local function add (a, b) + return a + b +end + +local function sub (a, b) + return a - b +end + +print (("1 + 2 = %i"):format (add (1, 2))) diff --git a/test_vectors/math_2.luac b/test_vectors/math_2.luac new file mode 100644 index 0000000000000000000000000000000000000000..24ec49aba27f1a4125058944528976c1c2649584 GIT binary patch literal 253 zcmb34DNPJvketlRCB?%q>bl^?D49Y#xkE4DO7K3=ItpEkJ3;hNh;( yhL)zJhL(ng)^?CeWr#|4JSuw`nOJ~sf`W#omc*2lrrws~(xm3z#@2XOzc2ue_d4zX literal 0 HcmV?d00001