From df36ce7d234f40cbf60443b12fba85c69e4d07e1 Mon Sep 17 00:00:00 2001 From: Ara Adkins Date: Wed, 23 Oct 2024 11:46:59 -0600 Subject: [PATCH] Introduce the basic skeleton for the compiler (#74) * Implement the skeleton of a pass infrastructure While it is incomplete, an initial skeleton for the pass management infrastructure ensures that we do not paint ourselves into any corners that might be hard to design our way out of later. It provides the infrastructure for creating and managing passes, as well as shuffling pass data between passes. Please note that this we make use of self-referential structs via Ouroboros to better encapsulate the LLVM context and the modules that exist in it. Added `libffi` and `libxml2` as build and runtime dependencies to the project as these are required by inkwell now that it is actually in use. This fixes an issue preventing compilation in tests. * Implement the module mapping pass This pass runs an analysis of LLVM modules to resolve all top-level entities. The results are written out as pass data, and are intended for use as part of a consistency check during the compilation to FLO. As part of implementing this pass, this commit also implements: - A proxy for the portions of the LLVM type system that we support, enabling us to do pre- and during-compilation consistency checking where necessary. - A parser for LLVM's data-layout specifications, allowing us to ensure that the provided modules are not making any assumptions that would be unsafe for our target machine. * Miscellaneous fixes and updates This commit performs a number of small miscellaneous refactorings and fixes in response to looking at the code with a fresh set of eyes. It also accounts for feedback given during code review. --- Cargo.lock | 222 ++- Cargo.toml | 2 + crates/cli/Cargo.toml | 1 + crates/cli/src/main.rs | 2 +- crates/compiler/Cargo.toml | 8 +- crates/compiler/input/add.ll | 70 + crates/compiler/src/compile.rs | 54 - crates/compiler/src/constant.rs | 76 + crates/compiler/src/context/mod.rs | 123 ++ crates/compiler/src/context/module.rs | 42 + crates/compiler/src/lib.rs | 233 ++- crates/compiler/src/llvm/data_layout.rs | 1364 +++++++++++++++++ crates/compiler/src/llvm/mod.rs | 38 + .../compiler/src/llvm/special_intrinsics.rs | 123 ++ crates/compiler/src/llvm/typesystem.rs | 534 +++++++ crates/compiler/src/pass/analysis/mod.rs | 5 + .../compiler/src/pass/analysis/module_map.rs | 568 +++++++ crates/compiler/src/pass/data.rs | 263 ++++ crates/compiler/src/pass/mod.rs | 338 ++++ crates/compiler/src/polyfill.rs | 52 - crates/compiler/src/polyfill/mappings.rs | 14 + crates/compiler/src/polyfill/mod.rs | 185 +++ crates/error/Cargo.toml | 1 + crates/error/src/compile.rs | 68 + crates/error/src/lib.rs | 9 +- crates/error/src/llvm_compile.rs | 12 - crates/flo/README.md | 4 +- workspace.nix | 12 +- 28 files changed, 4280 insertions(+), 143 deletions(-) create mode 100644 crates/compiler/input/add.ll delete mode 100644 crates/compiler/src/compile.rs create mode 100644 crates/compiler/src/constant.rs create mode 100644 crates/compiler/src/context/mod.rs create mode 100644 crates/compiler/src/context/module.rs create mode 100644 crates/compiler/src/llvm/data_layout.rs create mode 100644 crates/compiler/src/llvm/mod.rs create mode 100644 crates/compiler/src/llvm/special_intrinsics.rs create mode 100644 crates/compiler/src/llvm/typesystem.rs create mode 100644 crates/compiler/src/pass/analysis/mod.rs create mode 100644 crates/compiler/src/pass/analysis/module_map.rs create mode 100644 crates/compiler/src/pass/data.rs create mode 100644 crates/compiler/src/pass/mod.rs delete mode 100644 crates/compiler/src/polyfill.rs create mode 100644 crates/compiler/src/polyfill/mappings.rs create mode 100644 crates/compiler/src/polyfill/mod.rs create mode 100644 crates/error/src/compile.rs delete mode 100644 crates/error/src/llvm_compile.rs diff --git a/Cargo.lock b/Cargo.lock index 8cf3600..46f2857 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,30 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "once_cell", + "version_check 0.9.5", + "zerocopy", +] + +[[package]] +name = "aliasable" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "250f629c0161ad8107cf89319e990051fae62832fd343083bea452d93e2205fd" + +[[package]] +name = "allocator-api2" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" + [[package]] name = "anstream" version = "0.6.15" @@ -38,7 +62,7 @@ version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a" dependencies = [ - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -48,7 +72,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8" dependencies = [ "anstyle", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -85,6 +109,22 @@ dependencies = [ "shlex", ] +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chumsky" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9" +dependencies = [ + "hashbrown", + "stacker", +] + [[package]] name = "clap" version = "4.5.20" @@ -118,12 +158,45 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" +[[package]] +name = "derivative" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "downcast-rs" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2" + [[package]] name = "either" version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + [[package]] name = "inkwell" version = "0.5.0" @@ -146,7 +219,7 @@ checksum = "9dd28cfd4cfba665d47d31c08a6ba637eed16770abca2eccbbc3ca831fef1e44" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.82", ] [[package]] @@ -155,6 +228,15 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.13.0" @@ -196,7 +278,8 @@ version = "0.1.0" dependencies = [ "ariadne", "clap", - "itertools", + "itertools 0.13.0", + "ltc-compiler", "tracing", ] @@ -205,10 +288,16 @@ name = "ltc-compiler" version = "0.1.0" dependencies = [ "anyhow", + "bimap", + "chumsky", "clap", + "derivative", + "downcast-rs", "inkwell", - "itertools", + "itertools 0.13.0", "ltc-errors", + "ltc-flo", + "ouroboros", "tracing", ] @@ -217,7 +306,7 @@ name = "ltc-driver" version = "0.1.0" dependencies = [ "ariadne", - "itertools", + "itertools 0.13.0", "thiserror", "tracing", ] @@ -227,6 +316,7 @@ name = "ltc-errors" version = "0.1.0" dependencies = [ "ariadne", + "inkwell", "thiserror", ] @@ -256,7 +346,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2ad2a91a8e869eeb30b9cb3119ae87773a8f4ae617f41b1eb9c154b2905f7bd6" dependencies = [ "memchr", - "version_check", + "version_check 0.1.5", ] [[package]] @@ -265,6 +355,31 @@ version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" +[[package]] +name = "ouroboros" +version = "0.18.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "944fa20996a25aded6b4795c6d63f10014a7a83f8be9828a11860b08c5fc4a67" +dependencies = [ + "aliasable", + "ouroboros_macro", + "static_assertions", +] + +[[package]] +name = "ouroboros_macro" +version = "0.18.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39b0deead1528fd0e5947a8546a9642a9777c25f6e1e26f34c97b204bbb465bd" +dependencies = [ + "heck", + "itertools 0.12.1", + "proc-macro2", + "proc-macro2-diagnostics", + "quote", + "syn 2.0.82", +] + [[package]] name = "pin-project-lite" version = "0.2.14" @@ -280,6 +395,28 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "proc-macro2-diagnostics" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.82", + "version_check 0.9.5", + "yansi", +] + +[[package]] +name = "psm" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa37f80ca58604976033fae9515a8a2989fc13797d953f7c04fb8fa36a11f205" +dependencies = [ + "cc", +] + [[package]] name = "quote" version = "1.0.37" @@ -318,7 +455,7 @@ checksum = "7e85ad2009c50b58e87caa8cd6dac16bdf511bbfb7af6c33df902396aa480fa5" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.82", ] [[package]] @@ -337,12 +474,42 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "stacker" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "799c883d55abdb5e98af1a7b3f23b9b6de8ecada0ecac058672d7635eb48ca7b" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys 0.59.0", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strsim" version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "syn" version = "2.0.82" @@ -371,7 +538,7 @@ checksum = "ae71770322cbd277e69d762a16c444af02aa0575ac0d174f0b9562d3b37f8602" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.82", ] [[package]] @@ -393,7 +560,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.82", ] [[package]] @@ -429,6 +596,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd" +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "windows-sys" version = "0.52.0" @@ -438,6 +611,15 @@ dependencies = [ "windows-targets", ] +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-targets" version = "0.52.6" @@ -507,3 +689,23 @@ name = "yansi" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.82", +] diff --git a/Cargo.toml b/Cargo.toml index 790596d..78bb2ac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,8 +32,10 @@ anyhow = "1.0.89" ariadne = "0.4.1" bimap = { version = "0.6.3", features = ["serde"] } clap = "4.5.16" +inkwell = { version = "0.5.0", features = ["llvm18-0"] } itertools = "0.13.0" ltc-cli = { path = "crates/cli" } +ltc-compiler = { path = "crates/compiler" } ltc-driver = { path = "crates/driver" } ltc-errors = { path = "crates/error" } ltc-flo = { path = "crates/flo" } diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index 3cd0692..dbd0be0 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -17,6 +17,7 @@ rust-version.workspace = true ariadne.workspace = true clap.workspace = true itertools.workspace = true +ltc-compiler.workspace = true tracing.workspace = true [[bin]] diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs index 0c954d2..725f119 100644 --- a/crates/cli/src/main.rs +++ b/crates/cli/src/main.rs @@ -1,5 +1,5 @@ //! This is the CLI driver for the compilation of LLVM IR to Cairo. For more -//! detail, please see the crate documentation [`llvm_to_cairo`]. +//! detail, please see the documentation for the [`ltc_compiler`] crate. #![warn(clippy::all, clippy::cargo, clippy::pedantic)] #![allow(clippy::module_name_repetitions)] // Allows for better API naming diff --git a/crates/compiler/Cargo.toml b/crates/compiler/Cargo.toml index 9462306..db3016d 100644 --- a/crates/compiler/Cargo.toml +++ b/crates/compiler/Cargo.toml @@ -14,10 +14,16 @@ edition.workspace = true rust-version.workspace = true [dependencies] +bimap.workspace = true +chumsky = "0.9.3" clap.workspace = true -inkwell = { version = "0.5.0", features = ["llvm18-0"] } +derivative = "2.2.0" +downcast-rs = "1.2.1" +inkwell.workspace = true itertools.workspace = true ltc-errors.workspace = true +ltc-flo.workspace = true +ouroboros = "0.18.4" tracing.workspace = true [dev-dependencies] diff --git a/crates/compiler/input/add.ll b/crates/compiler/input/add.ll new file mode 100644 index 0000000..77ddeab --- /dev/null +++ b/crates/compiler/input/add.ll @@ -0,0 +1,70 @@ +; ModuleID = '9ox3ykpp0gbrqxqlz7ajwa9w6' +source_filename = "9ox3ykpp0gbrqxqlz7ajwa9w6" +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-none" + +@alloc_4190527422e5cc48a15bd1cb4f38f425 = private unnamed_addr constant <{ [33 x i8] }> <{ [33 x i8] c"crates/rust-test-input/src/lib.rs" }>, align 1 +@alloc_5b4544c775a23c08ca70c48dd7be27fc = private unnamed_addr constant <{ ptr, [16 x i8] }> <{ ptr @alloc_4190527422e5cc48a15bd1cb4f38f425, [16 x i8] c"!\00\00\00\00\00\00\00\05\00\00\00\05\00\00\00" }>, align 8 + +; ltc_rust_test_input::add +; Function Attrs: noredzone nounwind +define dso_local i64 @_ZN19ltc_rust_test_input3add17h828e50e9267cb510E(i64 %left, i64 %right) unnamed_addr #0 !dbg !5 { +start: + %right.dbg.spill = alloca [8 x i8], align 8 + %left.dbg.spill = alloca [8 x i8], align 8 + store i64 %left, ptr %left.dbg.spill, align 8 + call void @llvm.dbg.declare(metadata ptr %left.dbg.spill, metadata !12, metadata !DIExpression()), !dbg !15 + store i64 %right, ptr %right.dbg.spill, align 8 + call void @llvm.dbg.declare(metadata ptr %right.dbg.spill, metadata !13, metadata !DIExpression()), !dbg !16 + %0 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %left, i64 %right), !dbg !17 + %_3.0 = extractvalue { i64, i1 } %0, 0, !dbg !17 + %_3.1 = extractvalue { i64, i1 } %0, 1, !dbg !17 + br i1 %_3.1, label %panic, label %bb1, !dbg !17 + +bb1: ; preds = %start + ret i64 %_3.0, !dbg !18 + +panic: ; preds = %start +; call core::panicking::panic_const::panic_const_add_overflow + call void @_ZN4core9panicking11panic_const24panic_const_add_overflow17he7771b1d81fa091aE(ptr align 8 @alloc_5b4544c775a23c08ca70c48dd7be27fc) #3, !dbg !17 + unreachable, !dbg !17 +} + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) #1 + +; core::panicking::panic_const::panic_const_add_overflow +; Function Attrs: cold noinline noredzone noreturn nounwind +declare dso_local void @_ZN4core9panicking11panic_const24panic_const_add_overflow17he7771b1d81fa091aE(ptr align 8) unnamed_addr #2 + +attributes #0 = { noredzone nounwind "probe-stack"="inline-asm" "target-cpu"="generic" "target-features"="+v8a,+strict-align,-neon,-fp-armv8" } +attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +attributes #2 = { cold noinline noredzone noreturn nounwind "probe-stack"="inline-asm" "target-cpu"="generic" "target-features"="+v8a,+strict-align,-neon,-fp-armv8" } +attributes #3 = { noreturn nounwind } + +!llvm.ident = !{!0} +!llvm.dbg.cu = !{!1} +!llvm.module.flags = !{!3, !4} + +!0 = !{!"rustc version 1.81.0 (eeb90cda1 2024-09-04)"} +!1 = distinct !DICompileUnit(language: DW_LANG_Rust, file: !2, producer: "clang LLVM (rustc version 1.81.0 (eeb90cda1 2024-09-04))", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!2 = !DIFile(filename: "crates/rust-test-input/src/lib.rs/@/9ox3ykpp0gbrqxqlz7ajwa9w6", directory: "/Users/starfire/Development/reilabs/starkware/llvm-to-cairo") +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = distinct !DISubprogram(name: "add", linkageName: "_ZN19ltc_rust_test_input3add17h828e50e9267cb510E", scope: !7, file: !6, line: 4, type: !8, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !1, templateParams: !14, retainedNodes: !11) +!6 = !DIFile(filename: "crates/rust-test-input/src/lib.rs", directory: "/Users/starfire/Development/reilabs/starkware/llvm-to-cairo", checksumkind: CSK_MD5, checksum: "178b5b568f49bd1e17834a7529756af1") +!7 = !DINamespace(name: "ltc_rust_test_input", scope: null) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !10, !10} +!10 = !DIBasicType(name: "u64", size: 64, encoding: DW_ATE_unsigned) +!11 = !{!12, !13} +!12 = !DILocalVariable(name: "left", arg: 1, scope: !5, file: !6, line: 4, type: !10) +!13 = !DILocalVariable(name: "right", arg: 2, scope: !5, file: !6, line: 4, type: !10) +!14 = !{} +!15 = !DILocation(line: 4, column: 12, scope: !5) +!16 = !DILocation(line: 4, column: 23, scope: !5) +!17 = !DILocation(line: 5, column: 5, scope: !5) +!18 = !DILocation(line: 6, column: 2, scope: !5) diff --git a/crates/compiler/src/compile.rs b/crates/compiler/src/compile.rs deleted file mode 100644 index 475594d..0000000 --- a/crates/compiler/src/compile.rs +++ /dev/null @@ -1,54 +0,0 @@ -//! Handles the compilation of LLVM IR to Cairo's internal `FlatLowered` IR. -//! -//! In the context of LLVM to Cairo, compilation refers to the process of -//! translating from [LLVM IR](https://llvm.org/docs/LangRef.html) to Cairo's -//! internal -//! [`FlatLowered`](https://github.com/starkware-libs/cairo/blob/main/crates/cairo-lang-lowering/src/objects.rs#L135) -//! structure. -//! -//! LLVM IR is designed around a virtual processor model that is expected to -//! have a multitude of operations common to real CPUs. As we are compiling to -//! target the Cairo VM, we have to work out how to take each of these -//! operations, and represent them in our extremely restricted instruction set. -//! -//! Doing this involves two major approaches: -//! -//! 1. **Translation:** Where there is a good match between the structure of the -//! LLVM IR and the structure of `FlatLowered`, we can translate one to the -//! other. This is useful both in terms of code structure—as LLVM IR is still -//! a structured IR—and in terms of basic operations that are common to both -//! representations. -//! 2. **Polyfills:** Where LLVM expects an operation that we do not have an -//! equivalent for, we instead emit a call to an _implementation of that -//! operation_ in Cairo. We term these implementations _polyfills_ as an -//! analogy to the term used on the web, and they are _software_ -//! implementations of features and capabilities that our hardware is -//! missing. For more information on polyfills, see the [`crate::polyfill`] -//! module. -//! -//! We aim for this compilation process to both achieve a 1:1 semantic match to -//! the original LLVM IR—through use of translation and polyfills as needed—and -//! to retain as much context information as possible so to ensure the -//! possibility of a good user experience in the future. -//! -//! # Targeting `FlatLowered` instead of `Sierra` -//! -//! It might seem strange to target `FlatLowered` instead of something like -//! [Sierra](https://docs.starknet.io/architecture-and-concepts/smart-contracts/cairo-and-sierra/#why_do_we_need_sierra) -//! which is _intended_ as a target for compilation. -//! -//! While we definitely want the benefits of Sierra—particularly model checking -//! for the underlying machine, and the gas monitoring—we do not want to perform -//! all the necessary bookkeeping to make Sierra work on our own. By targeting -//! `FlatLowered` instead, we gain the benefits of the _already existing_ -//! [`sierragen`](https://github.com/starkware-libs/cairo/blob/main/crates/cairo-lang-sierra-generator/src/lib.rs) -//! functionality, which ingests `FlatLowered` and handles the required Sierra -//! bookkeeping for us. -//! -//! While this does give us less control—as we rely on the existing -//! translation—the benefits of not having to manually perform this additional -//! work far outweighs that downside. If we _do_ need any additional control, we -//! can always modify this process at a later date. - -#[cfg(test)] -mod test {} diff --git a/crates/compiler/src/constant.rs b/crates/compiler/src/constant.rs new file mode 100644 index 0000000..9e6d32c --- /dev/null +++ b/crates/compiler/src/constant.rs @@ -0,0 +1,76 @@ +//! Useful constants for use within the compiler. + +/// The size of a byte on our architecture. +pub const BYTE_SIZE: usize = 8; + +/// The default layout on LLVM for a 16-bit float. +/// +/// The numbers are, in order: the size, the ABI alignment, and the preferred +/// alignment. +pub const DEFAULT_FLOAT_16_LAYOUT: (usize, usize, usize) = (16, 16, 16); + +/// The default layout on LLVM for a 32-bit float. +/// +/// The numbers are, in order: the size, the ABI alignment, and the preferred +/// alignment. +pub const DEFAULT_FLOAT_32_LAYOUT: (usize, usize, usize) = (32, 32, 32); + +/// The default layout on LLVM for a 64-bit float. +/// +/// The numbers are, in order: the size, the ABI alignment, and the preferred +/// alignment. +pub const DEFAULT_FLOAT_64_LAYOUT: (usize, usize, usize) = (64, 64, 64); + +/// The default layout on LLVM for a 128-bit float. +/// +/// The numbers are, in order: the size, the ABI alignment, and the preferred +/// alignment. +pub const DEFAULT_FLOAT_128_LAYOUT: (usize, usize, usize) = (128, 128, 128); + +/// The default layout on LLVM for a 64-bit wide vector. +/// +/// The numbers are, in order: the size, the ABI alignment, and the preferred +/// alignment. +pub const DEFAULT_VECTOR_64_LAYOUT: (usize, usize, usize) = (64, 64, 64); + +/// The default layout on LLVM for a 128-bit wide vector. +/// +/// The numbers are, in order: the size, the ABI alignment, and the preferred +/// alignment. +pub const DEFAULT_VECTOR_128_LAYOUT: (usize, usize, usize) = (128, 128, 128); + +/// The default layout on LLVM for a 1-bit wide integer. +/// +/// The numbers are, in order: the size, the ABI alignment, and the preferred +/// alignment. +pub const DEFAULT_INTEGER_1_LAYOUT: (usize, usize, usize) = (1, 8, 8); + +/// The default layout on LLVM for an 8-bit wide integer. +/// +/// The numbers are, in order: the size, the ABI alignment, and the preferred +/// alignment. +pub const DEFAULT_INTEGER_8_LAYOUT: (usize, usize, usize) = (8, 8, 8); + +/// The default layout on LLVM for a 16-bit wide integer. +/// +/// The numbers are, in order: the size, the ABI alignment, and the preferred +/// alignment. +pub const DEFAULT_INTEGER_16_LAYOUT: (usize, usize, usize) = (16, 16, 16); + +/// The default layout on LLVM for a 32-bit wide integer. +/// +/// The numbers are, in order: the size, the ABI alignment, and the preferred +/// alignment. +pub const DEFAULT_INTEGER_32_LAYOUT: (usize, usize, usize) = (32, 32, 32); + +/// The default layout on LLVM for a 64-bit wide integer. +/// +/// The numbers are, in order: the size, the ABI alignment, and the preferred +/// alignment. +pub const DEFAULT_INTEGER_64_LAYOUT: (usize, usize, usize) = (64, 32, 64); + +/// The default layout for pointers in address space zero. +/// +/// The numbers are, in order: the address space, the size, the ABI alignment, +/// the preferred alignment, and the index size. +pub const DEFAULT_POINTER_0_LAYOUT: (usize, usize, usize, usize, usize) = (0, 64, 64, 64, 64); diff --git a/crates/compiler/src/context/mod.rs b/crates/compiler/src/context/mod.rs new file mode 100644 index 0000000..f33d004 --- /dev/null +++ b/crates/compiler/src/context/mod.rs @@ -0,0 +1,123 @@ +//! Contains the source compilation context, which is a way of tracking the +//! compilation units being processed by the compiler. + +use inkwell::{context::Context as LLVMContext, module::Module}; +use ltc_errors::compile::{Error, Result}; +use ouroboros::self_referencing; + +pub mod module; + +use module::SourceModule; + +/// The source compilation context manages the LLVM state across compiler +/// operations. +/// +/// It is intended to exist only throughout the compilation process, after which +/// it may be safely discarded. +/// +/// # Self-Referential Structure Definition +/// +/// Inkwell's [`Module`] (and many other structs returned from the Inkwell API) +/// are bound by lifetime to the LLVM context object. We don't want these +/// lifetimes to leak into our API and propagate throughout the compiler, so +/// instead we encapsulate them within this struct. +/// +/// In order to do this, we use the [`ouroboros`] crate to create a +/// self-referential struct. What this means is that the struct can contain an +/// object, and also have fields that _reference_ those objects. This is +/// disallowed by Rust's ownership model without `unsafe` code, so by using a +/// crate we encapsulate that unsafety and take advantage of the fact that it +/// has likely been looked at by more people than just us. +/// +/// As part of using this crate, @iamrecursion has checked it for correctness in +/// this use-case. +#[self_referencing] +#[derive(Debug)] +pub struct SourceContext { + /// The underlying context that contains the LLVM representation of the + /// input IR. + llvm_context: LLVMContext, + + /// The module in this LLVM context. This contains the objects that will be + /// directly compiled here. + #[borrows(llvm_context)] + #[not_covariant] + module: Module<'this>, +} + +impl SourceContext { + /// Creates a new, empty, source compilation context, wrapping the provided + /// `module` for compilation. + /// + /// Please note that this is named `create` as to avoid a naming conflict + /// with the `new` method generated by use of the [`self_referencing`] macro + /// from the [`ouroboros`] crate. + /// + /// # Errors + /// + /// - [`Error::UnableToAddModuleToContext`] if the provided `module` cannot + /// be added to the context. + pub fn create(module: impl TryInto) -> Result { + let llvm_context = LLVMContext::create(); + let module_source = module + .try_into() + .map_err(|e| Error::UnableToAddModuleToContext(e.to_string()))?; + + // Unfortunately we have to do this all in one to avoid really annoying type and + // lifetime annotations. Having it in one block like this guides type inference + // much more effectively. + let builder = SourceContextTryBuilder { + llvm_context, + module_builder: |llvm_context| { + let module = llvm_context + .create_module_from_ir(module_source.into()) + .map_err(|e| Error::UnableToAddModuleToContext(e.to_string()))?; + Ok(module) + }, + }; + + builder.try_build() + } + + /// Runs analysis on the module in the context using the provided function, + /// and returns the analysis results. + /// + /// It does not have the ability to modify the underlying module at all. + /// + /// # Function Mutability + /// + /// As we may want to pass methods with mutable receivers as the operation + /// here, we say it can be an instance of [`FnMut`]. Do note that this is a + /// super-trait of [`Fn`] and hence `op` is not _required_ to capture + /// anything mutably. + /// + /// # Errors + /// + /// - [`Error`] if the provided `op` returns an error. + pub fn analyze_module(&self, op: impl FnMut(&Module) -> Result) -> Result { + self.with_module(op) + } + + /// Runs a transformation on the module in the context using the provided + /// function, returning any results from the modification. + /// + /// # Function Mutability + /// + /// As we may want to pass methods with mutable receivers as the operation + /// here, we say it can be an instance of [`FnMut`]. Do note that this is a + /// super-trait of [`Fn`] and hence `op` is not _required_ to capture + /// anything mutably. + /// + /// # Errors + /// + /// - [`Error`] if the provided `op` returns an error. + pub fn modify_module(&mut self, op: impl FnMut(&mut Module) -> Result) -> Result { + self.with_module_mut(op) + } +} + +impl From for LLVMContext { + fn from(value: SourceContext) -> Self { + value.into_heads().llvm_context + } +} diff --git a/crates/compiler/src/context/module.rs b/crates/compiler/src/context/module.rs new file mode 100644 index 0000000..c0db115 --- /dev/null +++ b/crates/compiler/src/context/module.rs @@ -0,0 +1,42 @@ +//! There are many ways that we can add a module to the compilation context, so +//! rather than creating a proliferation of methods with subtly-different input +//! types, we instead take one type that can be created from many. + +use std::path::Path; + +use inkwell::{memory_buffer::MemoryBuffer, support::LLVMString}; + +/// A unified type for all the different ways that we support adding a module to +/// the compiler's [`crate::context::SourceContext`]. +pub struct SourceModule { + /// The underlying representation of the module to be passed to LLVM. + memory_buffer: MemoryBuffer, +} + +impl TryFrom<(String, String)> for SourceModule { + type Error = LLVMString; + + /// Try to create a module source from the provided tuple of `name` and + /// `contents`. + fn try_from((name, contents): (String, String)) -> Result { + let memory_buffer = + MemoryBuffer::create_from_memory_range(contents.as_bytes(), name.as_str()); + Ok(Self { memory_buffer }) + } +} + +impl TryFrom<&Path> for SourceModule { + type Error = LLVMString; + + /// Try to create a module source from the LLVM IR at the provided `path`. + fn try_from(path: &Path) -> Result { + let memory_buffer = MemoryBuffer::create_from_file(path)?; + Ok(Self { memory_buffer }) + } +} + +impl From for MemoryBuffer { + fn from(value: SourceModule) -> Self { + value.memory_buffer + } +} diff --git a/crates/compiler/src/lib.rs b/crates/compiler/src/lib.rs index 75a826f..c98f119 100644 --- a/crates/compiler/src/lib.rs +++ b/crates/compiler/src/lib.rs @@ -44,5 +44,236 @@ #![allow(clippy::module_name_repetitions)] // Allows for better API naming #![allow(clippy::multiple_crate_versions)] // Enforced by our dependencies -pub mod compile; +pub mod constant; +pub mod context; +pub mod llvm; +pub mod pass; pub mod polyfill; + +use ltc_errors::compile::{Error, Result}; +use ltc_flo::FlatLoweredObject; + +use crate::{ + context::SourceContext, + pass::{data::DynPassDataMap, PassManager, PassManagerReturnData}, + polyfill::PolyfillMap, +}; + +/// Handles the compilation of LLVM IR to our [`FlatLoweredObject`] object +/// format. +/// +/// In the context of LLVM to Cairo, compilation refers to the process of +/// translating from [LLVM IR](https://llvm.org/docs/LangRef.html) to our +/// internal `FLO` object file format. +/// +/// LLVM IR is designed around a virtual processor model that is expected to +/// have a multitude of operations common to real CPUs. As we are compiling to +/// target the Cairo VM, we have to work out how to take each of these +/// operations, and represent them in our extremely restricted instruction set. +/// +/// Doing this involves two major approaches: +/// +/// 1. **Translation:** Where there is a good match between the structure of the +/// LLVM IR and the structure of `FlatLowered`, we can translate one to the +/// other. This is useful both in terms of code structure—as LLVM IR is still +/// a structured IR—and in terms of basic operations that are common to both +/// representations. +/// 2. **Polyfills:** Where LLVM expects an operation that we do not have an +/// equivalent for, we instead emit a call to an _implementation of that +/// operation_ in Cairo. We term these implementations _polyfills_ as an +/// analogy to the term used on the web, and they are _software_ +/// implementations of features and capabilities that our hardware is +/// missing. For more information on polyfills, see the [`polyfill`] module. +/// +/// We aim for this compilation process to both achieve a 1:1 semantic match to +/// the original LLVM IR—through use of translation and polyfills as needed—and +/// to retain as much context information as possible so to ensure the +/// possibility of a good user experience in the future. +/// +/// # Targeting `FlatLowered` instead of `Sierra` +/// +/// It might seem strange to target `FlatLowered` instead of something like +/// [Sierra](https://docs.starknet.io/architecture-and-concepts/smart-contracts/cairo-and-sierra/#why_do_we_need_sierra) +/// which is _intended_ as a target for compilation. +/// +/// While we definitely want the benefits of Sierra—particularly model checking +/// for the underlying machine, and the gas monitoring—we do not want to perform +/// all the necessary bookkeeping to make Sierra work on our own at the current +/// time. By targeting `FlatLowered` instead, we gain the benefits of the +/// _already existing_ [`sierragen`](https://github.com/starkware-libs/cairo/blob/main/crates/cairo-lang-sierra-generator/src/lib.rs) +/// functionality, which ingests `FlatLowered` and handles the required Sierra +/// bookkeeping for us, while also being able to iterate and design faster. +/// +/// While this does give us less control—as we rely on the existing +/// translation—the benefits of not having to manually perform this additional +/// work far outweighs that downside. +/// +/// We fully expect to modify the process in the future to target `Sierra` +/// directly, giving us more control as we need it. +#[allow(dead_code)] +pub struct Compiler { + /// The source context, containing references to the LLVM module to be + /// compiled. + pub context: SourceContext, + + /// The passes that this compiler is configured to run. + pub passes: PassManager, + + /// The mapping between LLVM names and polyfill names for the compiler to + /// use during compilation. + pub polyfill_map: PolyfillMap, +} + +impl Compiler { + /// Constructs a new compiler instance, wrapping the provided `context` + /// describing the LLVM module to compile, the `passes` to run, and the + /// `polyfill_map` from LLVM names to polyfill names. + #[must_use] + pub fn new(context: SourceContext, passes: PassManager, polyfill_map: PolyfillMap) -> Self { + Self { + context, + passes, + polyfill_map, + } + } + + /// Executes the compiler on the configured LLVM module. + /// + /// Note that this invokes a state transition that leaves the compiler in an + /// invalid state, and hence it consumes the compiler to prevent API misuse. + /// + /// # Errors + /// + /// - [`ltc_errors::compile::Error`] if the compilation process fails for + /// any reason. + pub fn run(mut self) -> Result { + let PassManagerReturnData { + context: _context, + data: _data, + } = self.passes.run(self.context)?; + + Err(Error::CompilationFailure( + "Compilation is not yet implemented".to_string(), + )) + } +} + +/// The result of compiling an LLVM IR module. +#[derive(Debug)] +pub struct CompilationResult { + /// The final state of the pass data after the compiler passes have been + /// executed. + pub pass_results: DynPassDataMap, + + /// The `FLO` module that results from compilation. + pub result_module: FlatLoweredObject, +} + +impl CompilationResult { + /// Constructs a new compilation result wrapping the final `FLO` module + /// and also containing the final output of any compiler passes. + #[must_use] + pub fn new(pass_results: DynPassDataMap) -> Self { + // TODO (#24) Actually compile to FLO. + let result_module = FlatLoweredObject::new(""); + Self { + pass_results, + result_module, + } + } +} + +/// Allows for building a [`Compiler`] instance while retaining the defaults for +/// fields that do not need to be customized. +pub struct CompilerBuilder { + /// The source context, containing references to the LLVM module to be + /// compiled. + context: SourceContext, + + /// The passes that this compiler is configured to run. + passes: Option, + + /// The mapping between LLVM names and polyfill names for the compiler to + /// use during compilation. + polyfill_map: Option, +} + +impl CompilerBuilder { + /// Creates a new compiler builder wrapping the provided context. + /// + /// The compiler's passes configuration and polyfill configuration will be + /// left as default unless specified otherwise by calling + /// [`Self::with_passes`] and [`Self::with_polyfills`] respectively. + /// + /// # API Style + /// + /// Please note that the API for the builder consumes `self` and is hence + /// designed to have calls chained in the "fluent" API style. + #[must_use] + pub fn new(context: SourceContext) -> Self { + let passes = None; + let polyfill_map = None; + Self { + context, + passes, + polyfill_map, + } + } + + /// Specifies the pass configuration for the compiler. + /// + /// # API Style + /// + /// Please note that the API for the builder consumes `self` and is hence + /// designed to have calls chained in the "fluent" API style. + #[must_use] + pub fn with_passes(mut self, pass_manager: PassManager) -> Self { + self.passes = Some(pass_manager); + self + } + + /// Specifies the polyfill configuration for the compiler. + /// + /// # API Style + /// + /// Please note that the API for the builder consumes `self` and is hence + /// designed to have calls chained in the "fluent" API style. + #[must_use] + pub fn with_polyfills(mut self, polyfill_map: PolyfillMap) -> Self { + self.polyfill_map = Some(polyfill_map); + self + } + + /// Builds a compiler from the specified configuration. + /// + /// # API Style + /// + /// Please note that the API for the builder consumes `self` and is hence + /// designed to have calls chained in the "fluent" API style. + #[must_use] + pub fn build(self) -> Compiler { + Compiler::new( + self.context, + self.passes.unwrap_or_default(), + self.polyfill_map.unwrap_or_default(), + ) + } +} + +#[cfg(test)] +mod test { + use std::path::Path; + + use crate::{context::SourceContext, CompilerBuilder}; + + #[test] + fn compiler_runs_successfully() -> anyhow::Result<()> { + let test_input = r"input/add.ll"; + let ctx = SourceContext::create(Path::new(test_input))?; + + let compiler = CompilerBuilder::new(ctx).build(); + assert!(compiler.run().is_err()); + + Ok(()) + } +} diff --git a/crates/compiler/src/llvm/data_layout.rs b/crates/compiler/src/llvm/data_layout.rs new file mode 100644 index 0000000..d65a7c6 --- /dev/null +++ b/crates/compiler/src/llvm/data_layout.rs @@ -0,0 +1,1364 @@ +//! This module contains the definition of the [`DataLayout`] struct, as well as +//! utilities for querying and reasoning about said layouts. + +use chumsky::{ + error::Simple, + prelude::{choice, just}, + Parser, +}; +use ltc_errors::compile::{Error, Result}; + +use crate::constant::{ + BYTE_SIZE, + DEFAULT_FLOAT_128_LAYOUT, + DEFAULT_FLOAT_16_LAYOUT, + DEFAULT_FLOAT_32_LAYOUT, + DEFAULT_FLOAT_64_LAYOUT, + DEFAULT_INTEGER_16_LAYOUT, + DEFAULT_INTEGER_1_LAYOUT, + DEFAULT_INTEGER_32_LAYOUT, + DEFAULT_INTEGER_64_LAYOUT, + DEFAULT_INTEGER_8_LAYOUT, + DEFAULT_POINTER_0_LAYOUT, + DEFAULT_VECTOR_128_LAYOUT, + DEFAULT_VECTOR_64_LAYOUT, +}; + +/// Information about the expected data-layout for this module. +/// +/// # Defaulting +/// +/// LLVM starts with a default specification of the data-layout that is possibly +/// overridden by the data-layout string. This struct implements this behavior, +/// so if you want a defaulted layout, either call [`DataLayout::new`] with an +/// empty string, or use the [`Default`] instance. +#[derive(Clone, Debug, PartialEq)] +pub struct DataLayout { + /// The endianness used in this data layout. + pub endianness: Endianness, + + /// The mangling scheme used by this data layout. + pub mangling: Mangling, + + /// The natural alignment of the stack in bits. + pub stack_alignment: usize, + + /// The index of the address space that corresponds to program memory. + pub program_address_space: usize, + + /// The index of the address space that corresponds to globals. + pub global_address_space: usize, + + /// The index of the address space for allocations. + pub alloc_address_space: usize, + + /// The layout of pointers. + pub pointer_layouts: Vec, + + /// The layout of the various integer types. + pub integer_layouts: Vec, + + /// The layout of the various vector types. + pub vector_layouts: Vec, + + /// The layout of the various floating-point types. + pub float_layouts: Vec, + + /// The layout of aggregate types. + pub aggregate_layout: AggregateLayout, + + /// The layout of function pointers. + pub function_pointer_layout: FunctionPointerLayout, + + /// The integer widths natively supported by the CPU in this layout. + pub native_integer_widths: NativeIntegerWidths, + + /// The address space numbers in which pointers should be treated as + /// non-integral. + pub nointptr_address_spaces: NonIntegralPointerAddressSpaces, +} + +impl DataLayout { + /// Constructs a new data layout description from the provided + /// `layout_string`. + /// + /// If any portion of the data layout specification is left unspecified, + /// then the default data layout specification is used in its place as + /// described [here](https://llvm.org/docs/LangRef.html#data-layout). In + /// addition, we: + /// + /// - Default to 32 and 64-bit for the native integer widths. + /// - Default to independent function pointers aligned to 64 bits. + /// - Default to the ELF mangling scheme if none is specified. + /// + /// # Errors + /// + /// - [`Error::InvalidDataLayoutSpecification`] if the provided + /// `layout_string` cannot be parsed as a data layout specification. + pub fn new(layout_string: &str) -> Result { + let parts = layout_string.split('-'); + + // Allocate a default that is KNOWINGLY INCOMPLETE. This is not a valid layout + // to return, but serves as a place to stick our specifications as we parse + // them. + let mut layout = DataLayout { + endianness: Endianness::Little, + mangling: Mangling::ELF, + stack_alignment: 0, + program_address_space: 0, + global_address_space: 0, + alloc_address_space: 0, + pointer_layouts: vec![], + integer_layouts: vec![], + vector_layouts: vec![], + float_layouts: vec![], + aggregate_layout: AggregateLayout { + abi_alignment: 0, + preferred_alignment: 64, + }, + function_pointer_layout: FunctionPointerLayout { + ptr_type: FunctionPointerType::Independent, + abi_alignment: 64, + }, + native_integer_widths: NativeIntegerWidths { + widths: vec![32, 64], + }, + nointptr_address_spaces: NonIntegralPointerAddressSpaces { + address_spaces: Vec::new(), + }, + }; + + // Parse out each specification from the data-layout string. + for part in parts { + if let Ok(e) = Endianness::parser().parse(part) { + layout.endianness = e; + } else if let Ok(m) = Mangling::parser().parse(part) { + layout.mangling = m; + } else if let Ok(align) = parsing::stack_alignment().parse(part) { + layout.stack_alignment = align; + } else if let Ok(p_addr) = parsing::program_address_space().parse(part) { + layout.program_address_space = p_addr; + } else if let Ok(g_addr) = parsing::global_address_space().parse(part) { + layout.global_address_space = g_addr; + } else if let Ok(a_addr) = parsing::alloc_address_space().parse(part) { + layout.alloc_address_space = a_addr; + } else if let Ok(ptr_spec) = PointerLayout::parser().parse(part) { + layout.pointer_layouts.push(ptr_spec); + } else if let Ok(int_spec) = IntegerLayout::parser().parse(part) { + layout.integer_layouts.push(int_spec); + } else if let Ok(vec) = VectorLayout::parser().parse(part) { + layout.vector_layouts.push(vec); + } else if let Ok(float_spec) = FloatLayout::parser().parse(part) { + layout.float_layouts.push(float_spec); + } else if let Ok(agg) = AggregateLayout::parser().parse(part) { + layout.aggregate_layout = agg; + } else if let Ok(f_ptr) = FunctionPointerLayout::parser().parse(part) { + layout.function_pointer_layout = f_ptr; + } else if let Ok(iw) = NativeIntegerWidths::parser().parse(part) { + layout.native_integer_widths = iw; + } else if let Ok(npa) = NonIntegralPointerAddressSpaces::parser().parse(part) { + layout.nointptr_address_spaces = npa; + } else if part.is_empty() { + // We don't know if empty parts are allowed, so we just behave permissively + // here. It cannot introduce any bugs to be permissive in this case. + continue; + } else { + Err(Error::InvalidDataLayoutSpecification( + layout_string.to_string(), + part.to_string(), + ))?; + } + } + + // Finally we add the defaults for vector-typed fields as these have to be done + // after parsing. + layout.pointer_layouts = Self::pointer_specs_or_defaults(layout.pointer_layouts); + layout.integer_layouts = Self::int_specs_or_defaults(layout.integer_layouts); + layout.vector_layouts = Self::vec_specs_or_defaults(layout.vector_layouts); + layout.float_layouts = Self::float_specs_or_defaults(layout.float_layouts); + + // Finally we can build the data layout + Ok(layout) + } + + /// Augments the parsed floating-point layout specifications with any + /// missing information based on the defaults for LLVM's data layout. + fn float_specs_or_defaults(mut specs: Vec) -> Vec { + let float_defaults = [ + DEFAULT_FLOAT_16_LAYOUT, + DEFAULT_FLOAT_32_LAYOUT, + DEFAULT_FLOAT_64_LAYOUT, + DEFAULT_FLOAT_128_LAYOUT, + ]; + + for (size, abi_alignment, preferred_alignment) in float_defaults { + if !specs.iter().any(|f| f.size == size) { + specs.push(FloatLayout { + size, + abi_alignment, + preferred_alignment, + }); + } + } + + specs.sort(); + specs + } + + /// Augments the parsed vector layout specifications with any missing + /// information based on the defaults for LLVM's data layout. + fn vec_specs_or_defaults(mut specs: Vec) -> Vec { + let vector_layouts = [DEFAULT_VECTOR_64_LAYOUT, DEFAULT_VECTOR_128_LAYOUT]; + + for (size, abi_alignment, preferred_alignment) in vector_layouts { + if !specs.iter().any(|v| v.size == size) { + specs.push(VectorLayout { + size, + abi_alignment, + preferred_alignment, + }); + } + } + + specs.sort(); + specs + } + + /// Augments the parsed integer specifications with any missing information + /// based on the defaults for LLVM's data layout. + fn int_specs_or_defaults(mut specs: Vec) -> Vec { + let integer_layouts = [ + DEFAULT_INTEGER_1_LAYOUT, + DEFAULT_INTEGER_8_LAYOUT, + DEFAULT_INTEGER_16_LAYOUT, + DEFAULT_INTEGER_32_LAYOUT, + DEFAULT_INTEGER_64_LAYOUT, + ]; + + for (size, abi_alignment, preferred_alignment) in integer_layouts { + if !specs.iter().any(|i| i.size == size) { + specs.push(IntegerLayout { + size, + abi_alignment, + preferred_alignment, + }); + } + } + + specs.sort(); + specs + } + + /// Augments the parsed pointer specifications with any missing information + /// based on the defaults for LLVM's data layout. + fn pointer_specs_or_defaults(mut specs: Vec) -> Vec { + let pointer_layouts = [DEFAULT_POINTER_0_LAYOUT]; + + for (space, size, abi, pref, index) in pointer_layouts { + if !specs.iter().any(|l| l.address_space == space) { + specs.push(PointerLayout { + address_space: space, + size, + abi_alignment: abi, + preferred_alignment: pref, + index_size: index, + }); + } + } + + specs.sort(); + specs + } +} + +impl Default for DataLayout { + fn default() -> Self { + Self::new("").expect("The empty string was not a valid data layout specification") + } +} + +impl TryFrom<&str> for DataLayout { + type Error = Error; + + fn try_from(value: &str) -> std::result::Result { + Self::new(value) + } +} + +impl TryFrom for DataLayout { + type Error = Error; + + fn try_from(value: String) -> std::result::Result { + Self::new(&value) + } +} + +/// A description of the endianness used when laying out data. +#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] +pub enum Endianness { + /// Little-endian (least-significant byte first). + Little, + + /// Big-endian (most-significant byte first). + Big, +} + +impl Endianness { + /// Parses the endianness specification part of the data-layout. + fn parser() -> impl parsing::DLParser { + choice(( + just("e").to(Endianness::Little), + just("E").to(Endianness::Big), + )) + } +} + +/// A description of the mangling scheme used by this module. +#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] +pub enum Mangling { + /// The Unix COFF mangling scheme that is still used by Windows' Portable + /// Executable format. + /// + /// Private symbols get the usual prefix. Functions with `__stdcall`, + /// `__fastcall`, and `__vectorcall` have custom mangling that appends + /// `@N` where `N` is the number of bytes used to pass parameters. C++ + /// symbols starting with `?` are not mangled in any way. + COFF, + + /// The Windows x86 COFF mangling scheme. + /// + /// Private symbols get the usual prefix. Regular C symbols get an `_` + /// prefix. Functions with `__stdcall`, `__fastcall`, and `__vectorcall` + /// have custom mangling that appends `@N` where `N` is the number of + /// bytes used to pass parameters. C++ symbols starting with `?` are not + /// mangled in any way. + COFF86, + + /// The ELF mangling scheme, where private symbols get a `.L` prefix. + ELF, + + /// The GOFF mangling scheme, where private symbols get an `@` prefix. + GOFF, + + /// The Mach-O mangling scheme, where private symbols get an `L` prefix and + /// other symbols get an `_` prefix. + MachO, + + /// The MIPS mangling scheme, where private symbols get a `$` prefix. + MIPS, + + /// The XCOFF mangling scheme, where private symbols get an `L..` prefix. + XCOFF, +} + +impl Mangling { + /// Parses the mangling specification part of the data-layout. + fn parser() -> impl parsing::DLParser { + just("m:").ignore_then(choice(( + just("a").to(Mangling::XCOFF), + just("e").to(Mangling::ELF), + just("l").to(Mangling::GOFF), + just("m").to(Mangling::MIPS), + just("o").to(Mangling::MachO), + just("w").to(Mangling::COFF), + just("x").to(Mangling::COFF86), + ))) + } +} + +/// A specification of the pointer layout for this data-layout. +#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] +pub struct PointerLayout { + /// The address space for which the pointer is being specified. + pub address_space: usize, + + /// The size of the pointer. + pub size: usize, + + /// The required ABI alignment for the pointer. + pub abi_alignment: usize, + + /// The preferred alignment for the pointer. + pub preferred_alignment: usize, + + /// The size of the index used for address calculation. + pub index_size: usize, +} + +impl PointerLayout { + /// Parses the pointer layout specification as part of the data layout + /// string. + #[must_use] + pub fn parser() -> impl parsing::DLParser { + just("p") + .ignore_then(parsing::pos_int(10).delimited_by(just("["), just("]")).or_not()) + .then(parsing::field(parsing::pos_int(10))) + .then(parsing::field(parsing::pos_int(10))) + .then(parsing::field(parsing::pos_int(10)).or_not()) + .then(parsing::field(parsing::pos_int(10)).or_not()) + .try_map( + |((((address_space, size), abi_alignment), preferred_alignment), index_size), + span| { + let address_space = address_space.unwrap_or(0); + let preferred_alignment = preferred_alignment.unwrap_or(abi_alignment); + let index_size = index_size.unwrap_or(size); + if index_size > size { + Err(Simple::custom( + span, + format!( + "The requested index size {index_size} is larger than the pointer \ + size {size}" + ), + ))?; + }; + + Ok(Self { + address_space, + size, + abi_alignment, + preferred_alignment, + index_size, + }) + }, + ) + } +} + +/// A specification of an integer layout for this data-layout. +#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] +pub struct IntegerLayout { + /// The size of the integer. + pub size: usize, + + /// The required ABI alignment for the integer. + pub abi_alignment: usize, + + /// The preferred alignment for the integer. + pub preferred_alignment: usize, +} + +impl IntegerLayout { + /// Parses an integer layout specification as part of the data layout + /// string. + #[must_use] + pub fn parser() -> impl parsing::DLParser { + just("i") + .ignore_then(parsing::pos_int(10)) + .then(parsing::field(parsing::pos_int(10))) + .then(parsing::field(parsing::pos_int(10)).or_not()) + .try_map(|((size, abi_alignment), preferred_alignment), span| { + let preferred_alignment = preferred_alignment.unwrap_or(abi_alignment); + if size == BYTE_SIZE && abi_alignment != size { + Err(Simple::custom( + span, + "i8 was not aligned to a byte boundary", + ))?; + } + + Ok(Self { + size, + abi_alignment, + preferred_alignment, + }) + }) + } +} + +/// A specification of a vector layout for this data-layout. +#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] +pub struct VectorLayout { + /// The size of the vector. + pub size: usize, + + /// The required ABI alignment for the vector. + pub abi_alignment: usize, + + /// The preferred alignment for the vector. + pub preferred_alignment: usize, +} + +impl VectorLayout { + /// Parses a vector layout specification as part of the data layout + /// string. + #[must_use] + pub fn parser() -> impl parsing::DLParser { + just("v") + .ignore_then(parsing::pos_int(10)) + .then(parsing::field(parsing::pos_int(10))) + .then(parsing::field(parsing::pos_int(10)).or_not()) + .map(|((size, abi_alignment), preferred_alignment)| { + let preferred_alignment = preferred_alignment.unwrap_or(abi_alignment); + + Self { + size, + abi_alignment, + preferred_alignment, + } + }) + } +} + +/// A specification of a floating-point layout for this data-layout. +#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] +pub struct FloatLayout { + /// The size of the floating-point number. + pub size: usize, + + /// The required ABI alignment for the floating-point number. + pub abi_alignment: usize, + + /// The preferred alignment for the floating-point number. + pub preferred_alignment: usize, +} + +impl FloatLayout { + /// Parses a floating-point layout specification as part of the data layout + /// string. + #[must_use] + pub fn parser() -> impl parsing::DLParser { + just("f") + .ignore_then(parsing::pos_int(10)) + .then(parsing::field(parsing::pos_int(10))) + .then(parsing::field(parsing::pos_int(10)).or_not()) + .try_map(|((size, abi_alignment), preferred_alignment), span| { + let preferred_alignment = preferred_alignment.unwrap_or(abi_alignment); + if !&[16, 32, 64, 80, 128].contains(&size) { + Err(Simple::custom( + span, + format!("{size} is not a valid floating-point size"), + ))?; + } + + Ok(Self { + size, + abi_alignment, + preferred_alignment, + }) + }) + } +} + +/// A specification of the aggregate layout for this data-layout. +#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] +pub struct AggregateLayout { + /// The required ABI alignment for an aggregate. + pub abi_alignment: usize, + + /// The preferred alignment for an aggregate. + pub preferred_alignment: usize, +} + +impl AggregateLayout { + /// Parses the aggregate layout specification as part of the data layout + /// string. + #[must_use] + pub fn parser() -> impl parsing::DLParser { + just("a") + .ignore_then(parsing::pos_int(10)) + .then(parsing::field(parsing::pos_int(10)).or_not()) + .map(|(abi_alignment, preferred_alignment)| { + let preferred_alignment = preferred_alignment.unwrap_or(abi_alignment); + + Self { + abi_alignment, + preferred_alignment, + } + }) + } +} + +/// A specification of the way function pointers are treated as part of this +/// data-layout. +#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] +pub enum FunctionPointerType { + /// The alignment of function pointers is independent of the alignment of + /// functions, and is a multiple of the ABI alignment. + Independent, + + /// The alignment of function pointers is a multiple of the explicit + /// alignment specified on the function, and is a multiple of the ABI + /// alignment. + Multiple, +} + +impl FunctionPointerType { + /// Parses the function pointer type as part of the data layout string. + #[must_use] + pub fn parser() -> impl parsing::DLParser { + choice(( + just("i").to(FunctionPointerType::Independent), + just("n").to(FunctionPointerType::Multiple), + )) + } +} + +/// A specification of the function pointer layout for this data-layout. +#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] +pub struct FunctionPointerLayout { + /// The way that the function pointer is treated in the data layout. + pub ptr_type: FunctionPointerType, + + /// The alignment of function pointers in this data layout. + pub abi_alignment: usize, +} + +impl FunctionPointerLayout { + /// Parses the function pointer layout specification as part of this data + /// layout. + #[must_use] + pub fn parser() -> impl parsing::DLParser { + just("F") + .ignore_then(FunctionPointerType::parser()) + .then(parsing::pos_int(10)) + .map(|(ptr_type, abi_alignment)| Self { + ptr_type, + abi_alignment, + }) + } +} + +/// A specification of the native integer widths for this data-layout. +/// +/// The CPU must have _at least one_ native integer width. +#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] +pub struct NativeIntegerWidths { + /// The integer widths that are natively supported on the CPU. + pub widths: Vec, +} + +impl NativeIntegerWidths { + /// Parses the specification of native integer widths for the target CPU. + #[must_use] + pub fn parser() -> impl parsing::DLParser { + just("n") + .ignore_then(parsing::pos_int(10)) + .then(parsing::field(parsing::pos_int(10)).repeated()) + .map(|(first, mut rest)| { + rest.insert(0, first); + Self { widths: rest } + }) + } +} + +/// A specification of the address spaces in which the pointers should be +/// treated as [non-integral](https://llvm.org/docs/LangRef.html#nointptrtype). +#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] +pub struct NonIntegralPointerAddressSpaces { + /// The address spaces in which pointers should be treated as non-integral. + pub address_spaces: Vec, +} + +impl NonIntegralPointerAddressSpaces { + /// Parses the specification of address-spaces in which pointers are + /// non-integral. + #[must_use] + pub fn parser() -> impl parsing::DLParser { + just("ni") + .ignore_then(parsing::field(parsing::pos_int(10)).repeated().at_least(1)) + .try_map(|address_spaces, span| { + if address_spaces.contains(&0) { + Err(Simple::custom( + span, + "The 0 address space cannot be specified as using non-integral pointers", + ))?; + } + + Ok(Self { address_spaces }) + }) + } +} + +/// Utility parsing functions to aid in the parsing of data-layouts but that are +/// not associated directly with any type. +pub mod parsing { + use chumsky::{error::Simple, prelude::just, text::int, Parser}; + + use crate::{constant::BYTE_SIZE, llvm::data_layout::parsing}; + + /// Simply to avoid typing out the whole parser type parameter specification + /// every single time given it only varies in one parameter. + pub trait DLParser: Parser> {} + + /// A blanket impl to make this work, because yay. + impl DLParser for U where U: Parser> {} + + /// Parses an element separator. + #[must_use] + pub fn elem_sep<'a>() -> impl DLParser<&'a str> { + just("-") + } + + /// Parses a field separator. + #[must_use] + pub fn field_sep<'a>() -> impl DLParser<&'a str> { + just(":") + } + + /// Parses a field, namely a colon followed by something as given by the + /// `then` parser. + pub fn field(then: impl DLParser) -> impl DLParser { + field_sep().ignore_then(then) + } + + /// Parses a positive integer in the specified `radix`. + #[must_use] + pub fn pos_int(radix: u32) -> impl DLParser { + int(radix).try_map(|num: String, span| { + num.parse::().map_err(|_| { + Simple::custom(span, format!("Could not parse {num} as a positive integer")) + }) + }) + } + + /// Parses the stack alignment specification part of the data-layout. + #[must_use] + pub fn stack_alignment() -> impl DLParser { + just("S").ignore_then(pos_int(10)).validate(|alignment, span, emit| { + if alignment % BYTE_SIZE != 0 { + emit(Simple::custom( + span, + format!("{alignment} must be aligned to a byte offset"), + )); + } + alignment + }) + } + + /// Parses the address space specification part of the data-layout. + fn address_space(space: &str) -> impl DLParser + '_ { + just(space).ignore_then(parsing::pos_int(10)) + } + + #[must_use] + pub fn program_address_space() -> impl DLParser { + address_space("P") + } + + #[must_use] + pub fn global_address_space() -> impl DLParser { + address_space("G") + } + + #[must_use] + pub fn alloc_address_space() -> impl DLParser { + address_space("A") + } +} + +#[cfg(test)] +mod test { + use chumsky::Parser; + + use crate::llvm::data_layout::{ + parsing, + AggregateLayout, + DataLayout, + Endianness, + FloatLayout, + FunctionPointerLayout, + FunctionPointerType, + IntegerLayout, + Mangling, + NativeIntegerWidths, + NonIntegralPointerAddressSpaces, + PointerLayout, + VectorLayout, + }; + + #[test] + fn can_parse_data_layout() { + let dl_string = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"; + + // It should parse correctly + let parsed_layout = DataLayout::new(dl_string); + assert!(parsed_layout.is_ok()); + + // Now we can check that the fields have their proper values. + let layout = parsed_layout.unwrap(); + + // Little endian with ELF mangling + assert_eq!(layout.endianness, Endianness::Little); + assert_eq!(layout.mangling, Mangling::ELF); + + // Stack aligned to 128 bits, with all address spaces in zero. + assert_eq!(layout.stack_alignment, 128); + assert_eq!(layout.program_address_space, 0); + assert_eq!(layout.global_address_space, 0); + assert_eq!(layout.alloc_address_space, 0); + + // Pointers in address space zero are aligned to 64 bits. + assert_eq!( + layout.pointer_layouts, + vec![PointerLayout { + address_space: 0, + size: 64, + abi_alignment: 64, + preferred_alignment: 64, + index_size: 64, + }] + ); + + // Integers are semi-customized, with 8, 16, 64, and 128 using layouts specified + // in the string + assert!(layout.integer_layouts.contains(&IntegerLayout { + size: 1, + abi_alignment: 8, + preferred_alignment: 8, + })); + assert!(layout.integer_layouts.contains(&IntegerLayout { + size: 8, + abi_alignment: 8, + preferred_alignment: 32, + })); + assert!(layout.integer_layouts.contains(&IntegerLayout { + size: 16, + abi_alignment: 16, + preferred_alignment: 32, + })); + assert!(layout.integer_layouts.contains(&IntegerLayout { + size: 32, + abi_alignment: 32, + preferred_alignment: 32, + })); + assert!(layout.integer_layouts.contains(&IntegerLayout { + size: 64, + abi_alignment: 64, + preferred_alignment: 64, + })); + assert!(layout.integer_layouts.contains(&IntegerLayout { + size: 128, + abi_alignment: 128, + preferred_alignment: 128, + })); + + // For vector layouts we only have the defaults + assert!(layout.vector_layouts.contains(&VectorLayout { + size: 64, + abi_alignment: 64, + preferred_alignment: 64, + })); + assert!(layout.vector_layouts.contains(&VectorLayout { + size: 128, + abi_alignment: 128, + preferred_alignment: 128, + })); + + // For float layouts we also use the defaults + assert!(layout.float_layouts.contains(&FloatLayout { + size: 16, + abi_alignment: 16, + preferred_alignment: 16, + })); + assert!(layout.float_layouts.contains(&FloatLayout { + size: 32, + abi_alignment: 32, + preferred_alignment: 32, + })); + assert!(layout.float_layouts.contains(&FloatLayout { + size: 64, + abi_alignment: 64, + preferred_alignment: 64, + })); + assert!(layout.float_layouts.contains(&FloatLayout { + size: 128, + abi_alignment: 128, + preferred_alignment: 128, + })); + + // For the aggregate layout we have the default + assert_eq!( + layout.aggregate_layout, + AggregateLayout { + abi_alignment: 0, + preferred_alignment: 64, + } + ); + + // For the function pointer layout we also have our default + assert_eq!( + layout.function_pointer_layout, + FunctionPointerLayout { + ptr_type: FunctionPointerType::Independent, + abi_alignment: 64, + } + ); + + // For native integer widths this string specifies 32, 64 + assert_eq!( + layout.native_integer_widths, + NativeIntegerWidths { + widths: vec![32, 64], + } + ); + + // And no address spaces should be using non-integral pointers + assert_eq!( + layout.nointptr_address_spaces, + NonIntegralPointerAddressSpaces { + address_spaces: Vec::new(), + } + ); + } + + #[test] + fn can_parse_data_layout_to_default() { + let dl_string = ""; + + // It should parse correctly + let parsed_layout = DataLayout::new(dl_string); + assert!(parsed_layout.is_ok()); + + // Now we can check that the fields have their proper values. + let layout = parsed_layout.unwrap(); + + // Little endian with ELF mangling + assert_eq!(layout.endianness, Endianness::Little); + assert_eq!(layout.mangling, Mangling::ELF); + + // Stack alignment is arbitrary, with all address spaces in zero. + assert_eq!(layout.stack_alignment, 0); + assert_eq!(layout.program_address_space, 0); + assert_eq!(layout.global_address_space, 0); + assert_eq!(layout.alloc_address_space, 0); + + // Pointers in address space zero are aligned to 64 bits. + assert_eq!( + layout.pointer_layouts, + vec![PointerLayout { + address_space: 0, + size: 64, + abi_alignment: 64, + preferred_alignment: 64, + index_size: 64, + }] + ); + + // All the integer layouts should be default + assert!(layout.integer_layouts.contains(&IntegerLayout { + size: 1, + abi_alignment: 8, + preferred_alignment: 8, + })); + assert!(layout.integer_layouts.contains(&IntegerLayout { + size: 8, + abi_alignment: 8, + preferred_alignment: 8, + })); + assert!(layout.integer_layouts.contains(&IntegerLayout { + size: 16, + abi_alignment: 16, + preferred_alignment: 16, + })); + assert!(layout.integer_layouts.contains(&IntegerLayout { + size: 32, + abi_alignment: 32, + preferred_alignment: 32, + })); + assert!(layout.integer_layouts.contains(&IntegerLayout { + size: 64, + abi_alignment: 32, + preferred_alignment: 64, + })); + + // For vector layouts we only have the defaults + assert!(layout.vector_layouts.contains(&VectorLayout { + size: 64, + abi_alignment: 64, + preferred_alignment: 64, + })); + assert!(layout.vector_layouts.contains(&VectorLayout { + size: 128, + abi_alignment: 128, + preferred_alignment: 128, + })); + + // For float layouts we also use the defaults + assert!(layout.float_layouts.contains(&FloatLayout { + size: 16, + abi_alignment: 16, + preferred_alignment: 16, + })); + assert!(layout.float_layouts.contains(&FloatLayout { + size: 32, + abi_alignment: 32, + preferred_alignment: 32, + })); + assert!(layout.float_layouts.contains(&FloatLayout { + size: 64, + abi_alignment: 64, + preferred_alignment: 64, + })); + assert!(layout.float_layouts.contains(&FloatLayout { + size: 128, + abi_alignment: 128, + preferred_alignment: 128, + })); + + // For the aggregate layout we have the default + assert_eq!( + layout.aggregate_layout, + AggregateLayout { + abi_alignment: 0, + preferred_alignment: 64, + } + ); + + // For the function pointer layout we also have our default + assert_eq!( + layout.function_pointer_layout, + FunctionPointerLayout { + ptr_type: FunctionPointerType::Independent, + abi_alignment: 64, + } + ); + + // For native integer widths we should have the default + assert_eq!( + layout.native_integer_widths, + NativeIntegerWidths { + widths: vec![32, 64], + } + ); + + // And no address spaces should be using non-integral pointers + assert_eq!( + layout.nointptr_address_spaces, + NonIntegralPointerAddressSpaces { + address_spaces: Vec::new(), + } + ); + } + + #[test] + fn can_parse_endianness_segment() { + // Failures + assert!(Endianness::parser().parse("foo").is_err()); + + // Successes + assert_eq!( + Endianness::parser() + .parse("e") + .expect("Little endian spec did not parse"), + Endianness::Little + ); + assert_eq!( + Endianness::parser() + .parse("E") + .expect("Big endian spec did not parse"), + Endianness::Big + ); + } + + #[test] + fn can_parse_mangling_segment() { + // Failures + assert!(Mangling::parser().parse("m:").is_err()); + assert!(Mangling::parser().parse("m:f").is_err()); + assert!(Mangling::parser().parse("f").is_err()); + + // Successes + assert_eq!(Mangling::parser().parse("m:a"), Ok(Mangling::XCOFF)); + assert_eq!(Mangling::parser().parse("m:e"), Ok(Mangling::ELF)); + assert_eq!(Mangling::parser().parse("m:l"), Ok(Mangling::GOFF)); + assert_eq!(Mangling::parser().parse("m:m"), Ok(Mangling::MIPS)); + assert_eq!(Mangling::parser().parse("m:o"), Ok(Mangling::MachO)); + assert_eq!(Mangling::parser().parse("m:w"), Ok(Mangling::COFF)); + assert_eq!(Mangling::parser().parse("m:x"), Ok(Mangling::COFF86)); + } + + #[test] + fn can_parse_stack_alignment_segment() { + // Failures + assert!(parsing::stack_alignment().parse("m:").is_err()); + assert!(parsing::stack_alignment().parse("S").is_err()); + assert!(parsing::stack_alignment().parse("S15").is_err()); + + // Successes + assert_eq!(parsing::stack_alignment().parse("S8"), Ok(8)); + assert_eq!(parsing::stack_alignment().parse("S32"), Ok(32)); + assert_eq!(parsing::stack_alignment().parse("S64"), Ok(64)); + assert_eq!(parsing::stack_alignment().parse("S128"), Ok(128)); + assert_eq!(parsing::stack_alignment().parse("S256"), Ok(256)); + } + + #[test] + fn can_parse_program_address_space() { + // Failures + assert!(parsing::program_address_space().parse("PA").is_err()); + assert!(parsing::program_address_space().parse("P").is_err()); + + // Successes + assert_eq!(parsing::program_address_space().parse("P1"), Ok(1)); + assert_eq!(parsing::program_address_space().parse("P0"), Ok(0)); + } + + #[test] + fn can_parse_global_address_space() { + // Failures + assert!(parsing::global_address_space().parse("GA").is_err()); + assert!(parsing::global_address_space().parse("G").is_err()); + + // Successes + assert_eq!(parsing::global_address_space().parse("G1"), Ok(1)); + assert_eq!(parsing::global_address_space().parse("G0"), Ok(0)); + } + + #[test] + fn can_parse_alloc_address_space() { + // Failures + assert!(parsing::alloc_address_space().parse("AA").is_err()); + assert!(parsing::alloc_address_space().parse("A").is_err()); + + // Successes + assert_eq!(parsing::alloc_address_space().parse("A1"), Ok(1)); + assert_eq!(parsing::alloc_address_space().parse("A0"), Ok(0)); + } + + #[test] + fn can_parse_pointer_spec() { + // Failures + assert!(PointerLayout::parser().parse("p[1]:64:128:128:68").is_err()); + assert!(PointerLayout::parser().parse("p[]:64:128:128:32").is_err()); + + // Successes + assert_eq!( + PointerLayout::parser().parse("p[1]:64:128:128:64"), + Ok(PointerLayout { + address_space: 1, + size: 64, + abi_alignment: 128, + preferred_alignment: 128, + index_size: 64, + }) + ); + assert_eq!( + PointerLayout::parser().parse("p:64:128:128:64"), + Ok(PointerLayout { + address_space: 0, + size: 64, + abi_alignment: 128, + preferred_alignment: 128, + index_size: 64, + }) + ); + assert_eq!( + PointerLayout::parser().parse("p:64:128"), + Ok(PointerLayout { + address_space: 0, + size: 64, + abi_alignment: 128, + preferred_alignment: 128, + index_size: 64, + }) + ); + } + + #[test] + fn can_parse_integer_spec() { + // Failures + assert!(IntegerLayout::parser().parse("i").is_err()); + assert!(IntegerLayout::parser().parse("i8:16").is_err()); + + // Successes + assert_eq!( + IntegerLayout::parser().parse("i8:8"), + Ok(IntegerLayout { + size: 8, + abi_alignment: 8, + preferred_alignment: 8, + }) + ); + assert_eq!( + IntegerLayout::parser().parse("i32:64"), + Ok(IntegerLayout { + size: 32, + abi_alignment: 64, + preferred_alignment: 64, + }) + ); + assert_eq!( + IntegerLayout::parser().parse("i32:64:128"), + Ok(IntegerLayout { + size: 32, + abi_alignment: 64, + preferred_alignment: 128, + }) + ); + } + + #[test] + fn can_parse_vector_spec() { + // Failures + assert!(VectorLayout::parser().parse("v").is_err()); + assert!(VectorLayout::parser().parse("v8").is_err()); + + // Successes + assert_eq!( + VectorLayout::parser().parse("v8:8"), + Ok(VectorLayout { + size: 8, + abi_alignment: 8, + preferred_alignment: 8, + }) + ); + assert_eq!( + VectorLayout::parser().parse("v32:64"), + Ok(VectorLayout { + size: 32, + abi_alignment: 64, + preferred_alignment: 64, + }) + ); + assert_eq!( + VectorLayout::parser().parse("v32:64:128"), + Ok(VectorLayout { + size: 32, + abi_alignment: 64, + preferred_alignment: 128, + }) + ); + } + + #[test] + fn can_parse_float_spec() { + // Failures + assert!(FloatLayout::parser().parse("f").is_err()); + assert!(FloatLayout::parser().parse("f8:16").is_err()); + assert!(FloatLayout::parser().parse("f96:128").is_err()); + + // Successes + assert_eq!( + FloatLayout::parser().parse("f16:16"), + Ok(FloatLayout { + size: 16, + abi_alignment: 16, + preferred_alignment: 16, + }) + ); + assert_eq!( + FloatLayout::parser().parse("f32:64"), + Ok(FloatLayout { + size: 32, + abi_alignment: 64, + preferred_alignment: 64, + }) + ); + assert_eq!( + FloatLayout::parser().parse("f32:64:128"), + Ok(FloatLayout { + size: 32, + abi_alignment: 64, + preferred_alignment: 128, + }) + ); + } + + #[test] + fn can_parse_aggregate_spec() { + // Failures + assert!(FloatLayout::parser().parse("a").is_err()); + + // Successes + assert_eq!( + AggregateLayout::parser().parse("a64"), + Ok(AggregateLayout { + abi_alignment: 64, + preferred_alignment: 64, + }) + ); + assert_eq!( + AggregateLayout::parser().parse("a64:128"), + Ok(AggregateLayout { + abi_alignment: 64, + preferred_alignment: 128, + }) + ); + } + + #[test] + fn can_parse_function_pointer_type() { + // Failures + assert!(FunctionPointerType::parser().parse("a").is_err()); + + // Successes + assert_eq!( + FunctionPointerType::parser().parse("i"), + Ok(FunctionPointerType::Independent) + ); + assert_eq!( + FunctionPointerType::parser().parse("n"), + Ok(FunctionPointerType::Multiple) + ); + } + + #[test] + fn can_parse_function_pointer_spec() { + // Failures + assert!(FunctionPointerLayout::parser().parse("Fi").is_err()); + assert!(FunctionPointerLayout::parser().parse("Fb64").is_err()); + + // Successes + assert_eq!( + FunctionPointerLayout::parser().parse("Fi64"), + Ok(FunctionPointerLayout { + ptr_type: FunctionPointerType::Independent, + abi_alignment: 64, + }) + ); + assert_eq!( + FunctionPointerLayout::parser().parse("Fi128"), + Ok(FunctionPointerLayout { + ptr_type: FunctionPointerType::Independent, + abi_alignment: 128, + }) + ); + assert_eq!( + FunctionPointerLayout::parser().parse("Fn32"), + Ok(FunctionPointerLayout { + ptr_type: FunctionPointerType::Multiple, + abi_alignment: 32, + }) + ); + } + + #[test] + fn can_parse_native_integer_widths_spec() { + // Failures + assert!(NativeIntegerWidths::parser().parse("Fi").is_err()); + assert!(NativeIntegerWidths::parser().parse("n").is_err()); + + // Successes + assert_eq!( + NativeIntegerWidths::parser().parse("n64"), + Ok(NativeIntegerWidths { widths: vec![64] }) + ); + assert_eq!( + NativeIntegerWidths::parser().parse("n16:32:64"), + Ok(NativeIntegerWidths { + widths: vec![16, 32, 64], + }) + ); + } + + #[test] + fn can_parse_nointptr_address_spaces_spec() { + // Failures + assert!(NonIntegralPointerAddressSpaces::parser().parse("ni").is_err()); + assert!(NonIntegralPointerAddressSpaces::parser().parse("ni:").is_err()); + assert!(NonIntegralPointerAddressSpaces::parser().parse("ni:0").is_err()); + + // Successes + assert_eq!( + NonIntegralPointerAddressSpaces::parser().parse("ni:1"), + Ok(NonIntegralPointerAddressSpaces { + address_spaces: vec![1], + }) + ); + assert_eq!( + NonIntegralPointerAddressSpaces::parser().parse("ni:1:3:5"), + Ok(NonIntegralPointerAddressSpaces { + address_spaces: vec![1, 3, 5], + }) + ); + } +} diff --git a/crates/compiler/src/llvm/mod.rs b/crates/compiler/src/llvm/mod.rs new file mode 100644 index 0000000..d558e0a --- /dev/null +++ b/crates/compiler/src/llvm/mod.rs @@ -0,0 +1,38 @@ +//! Utilities for working with LLVM concepts inside the codebase. They are +//! intended to bridge between the worlds of LLVM and the worlds of our compiler +//! itself, and hence aid in analysis and transformation of the LLVM IR. + +use crate::llvm::typesystem::LLVMType; + +pub mod data_layout; +pub mod special_intrinsics; +pub mod typesystem; + +/// The type of top-level entry that is encountered in the module. +#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] +pub enum TopLevelEntryKind { + /// A declaration of an external symbol including the name, attributes, and + /// signature. + Declaration, + + /// A definition of symbol including the name, attributes, signature, and + /// **body** (consisting of basic blocks). + Definition, +} + +/// A trait representing objects that have an [`LLVMType`] ascribed to them. +/// +/// This is to enable a uniform interface for richer compilation and metadata +/// structures to easily provide their type to a caller. +pub trait HasLLVMType { + /// Gets the LLVM type for the implementing object. + fn get_type(&self) -> LLVMType; +} + +/// An `LLVMType` _obviously_ has an LLVM type, so we provide a blanket +/// implementation here. +impl HasLLVMType for LLVMType { + fn get_type(&self) -> LLVMType { + self.clone() + } +} diff --git a/crates/compiler/src/llvm/special_intrinsics.rs b/crates/compiler/src/llvm/special_intrinsics.rs new file mode 100644 index 0000000..3fd1562 --- /dev/null +++ b/crates/compiler/src/llvm/special_intrinsics.rs @@ -0,0 +1,123 @@ +//! Unfortunately [`inkwell`] does not deal well with `metadata`-typed function +//! arguments, despite them being valid argument types for function-typed values +//! in LLVM IR. For now, we handle them by delegating to known signatures for +//! these functions, rather than trying to introspect the functions themselves. +//! +//! See [this issue](https://github.com/TheDan64/inkwell/issues/546) for more +//! information. + +use std::collections::HashMap; + +use inkwell::{module::Linkage, GlobalVisibility}; + +use crate::{ + llvm::{typesystem::LLVMType, TopLevelEntryKind}, + pass::analysis::module_map::FunctionInfo, +}; + +/// A registry of LLVM intrinsic functions that need to be handled specially. +#[derive(Clone, Debug, PartialEq)] +pub struct SpecialIntrinsics { + /// The intrinsics that need to be handled specially. + intrinsics: HashMap, +} + +impl SpecialIntrinsics { + /// Constructs the special intrinsics mapping, providing the appropriate + /// [`FunctionInfo`] metadata for the intrinsics that we insert. + #[must_use] + pub fn new() -> Self { + let mut intrinsics = HashMap::new(); + intrinsics.insert( + "llvm.dbg.declare".to_string(), + FunctionInfo { + kind: TopLevelEntryKind::Declaration, + intrinsic: true, + typ: LLVMType::make_function( + LLVMType::void, + &[LLVMType::Metadata, LLVMType::Metadata, LLVMType::Metadata], + ), + linkage: Linkage::External, + visibility: GlobalVisibility::Default, + }, + ); + intrinsics.insert( + "llvm.dbg.value".to_string(), + FunctionInfo { + kind: TopLevelEntryKind::Declaration, + intrinsic: true, + typ: LLVMType::make_function( + LLVMType::void, + &[LLVMType::Metadata, LLVMType::Metadata, LLVMType::Metadata], + ), + linkage: Linkage::External, + visibility: GlobalVisibility::Default, + }, + ); + intrinsics.insert( + "llvm.dbg.assign".to_string(), + FunctionInfo { + kind: TopLevelEntryKind::Declaration, + intrinsic: true, + typ: LLVMType::make_function( + LLVMType::void, + &[ + LLVMType::Metadata, + LLVMType::Metadata, + LLVMType::Metadata, + LLVMType::Metadata, + LLVMType::Metadata, + ], + ), + linkage: Linkage::External, + visibility: GlobalVisibility::Default, + }, + ); + + Self { intrinsics } + } + + /// Gets the function information for `function_name` if it exists, and + /// returns [`None`] otherwise. + #[must_use] + pub fn info_for(&self, function_name: &str) -> Option { + self.intrinsics.get(function_name).cloned() + } + + /// Gets the function information for `function_name` if it exists. + /// + /// # Panics + /// + /// If `function_name` does not exist in the special intrinsics container. + #[must_use] + pub fn info_for_unchecked(&self, function_name: &str) -> FunctionInfo { + self.info_for(function_name) + .unwrap_or_else(|| panic!("No information found for {function_name}")) + } +} + +impl Default for SpecialIntrinsics { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod test { + use crate::llvm::special_intrinsics::SpecialIntrinsics; + + #[test] + fn contains_dbg_declare() { + assert!(SpecialIntrinsics::new().intrinsics.contains_key("llvm.dbg.declare")) + } + + #[test] + fn contains_dbg_value() { + assert!(SpecialIntrinsics::new().intrinsics.contains_key("llvm.dbg.value")) + } + + #[test] + fn contains_dbg_assign() { + assert!(SpecialIntrinsics::new().intrinsics.contains_key("llvm.dbg.assign")) + } +} diff --git a/crates/compiler/src/llvm/typesystem.rs b/crates/compiler/src/llvm/typesystem.rs new file mode 100644 index 0000000..16d0159 --- /dev/null +++ b/crates/compiler/src/llvm/typesystem.rs @@ -0,0 +1,534 @@ +//! The compiler's internal representation of LLVM types, without being tied to +//! the context as the [`BasicTypeEnum`] is. + +use std::fmt::{Display, Formatter}; + +use inkwell::types::{ + AnyTypeEnum, + ArrayType, + BasicTypeEnum, + FloatType, + FunctionType, + IntType, + PointerType, + StructType, + VectorType, + VoidType, +}; +use itertools::Itertools; +use ltc_errors::{compile, compile::Error}; + +use crate::constant::BYTE_SIZE; + +/// A representation of the LLVM [types](https://llvm.org/docs/LangRef.html#type-system) +/// for use within the compiler. +/// +/// # Why Not Use `BasicTypeEnum`? +/// +/// The definition of Inkwell's [`BasicTypeEnum`] and [`AnyTypeEnum`] depends on +/// being tied directly to the host LLVM context. This is not something we want +/// for metadata that is likely to be passed around liberally within this +/// compiler and potentially even cross program boundaries. +/// +/// To that end, we convert it to our own internal representation with the +/// knowledge that this static and does not update if the internal LLVM +/// representation changes. +/// +/// We additionally want to restrict the allowable types in our use-case. This +/// enum **does not** match LLVM IR's type system 1:1, instead restricting the +/// allowable types—particularly the integers—to be the ones that we care about. +/// +/// # Value Semantics +/// +/// It is intended that this type is used as having value semantics, and not +/// ever have a reference returned to it. +#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord)] +#[allow(non_camel_case_types)] // To better match the LLVM internal convention +pub enum LLVMType { + /// The boolean type, represented inside LLVM by the `i1` + /// [integer type](https://llvm.org/docs/LangRef.html#integer-type). + bool, + + /// The 8-bit wide [integer type](https://llvm.org/docs/LangRef.html#integer-type). + i8, + + /// The 16-bit wide [integer type](https://llvm.org/docs/LangRef.html#integer-type). + i16, + + /// The 32-bit wide [integer type](https://llvm.org/docs/LangRef.html#integer-type). + i32, + + /// The 64-bit wide [integer type](https://llvm.org/docs/LangRef.html#integer-type). + i64, + + /// The 128-bit wide [integer type](https://llvm.org/docs/LangRef.html#integer-type). + i128, + + /// The IEEE-754 `binary16` [floating point type](https://llvm.org/docs/LangRef.html#floating-point-types). + half, + + /// The IEEE-754 `binary32` [floating point type](https://llvm.org/docs/LangRef.html#floating-point-types). + float, + + /// The IEEE-754 `binary64` [floating point type](https://llvm.org/docs/LangRef.html#floating-point-types). + double, + + /// Used to specify locations in memory as described in the + /// [LLVM IR reference](https://llvm.org/docs/LangRef.html#pointer-type). + /// + /// Note that pointers in our use only support the base address space, and + /// do not specify the corresponding pointee type as was available in + /// earlier versions of LLVM. + ptr, + + /// A [type](https://llvm.org/docs/LangRef.html#void-type) that does not + /// represent any value and has no size. + void, + + /// An [array](https://llvm.org/docs/LangRef.html#array-type) is a + /// sequential arrangement of a number of elements of the given type + /// linearly in memory. + Array { + /// The number of elements in the array type. + count: usize, + + /// The type of elements in the array type. + typ: Box, + }, + + /// A [structure](https://llvm.org/docs/LangRef.html#structure-type) + /// represents a number of elements together in memory. + /// + /// Note that struct elements do not have names, and can only be accessed by + /// index. This makes LLVM struct types far more akin to what we call a + /// Tuple in most languages. + Structure { + /// If the structure is packed, it has one-byte alignment with no + /// padding between elements. + /// + /// If it is not packed, then the padding and alignment of struct + /// elements is given by the module's data-layout string. + packed: bool, + + /// The element types in the structure type. + /// + /// The order is semantically meaninful here. + elements: Vec, + }, + + /// A [function](https://llvm.org/docs/LangRef.html#function-type) is akin + /// to a function signature. + Function { + /// The type returned from the function. + return_type: Box, + + /// The types of the parameters to the function. + /// + /// Note that these are never named, and are purely matched + /// positionally. + parameter_types: Vec, + }, + + /// Embedded [metadata](https://llvm.org/docs/LangRef.html#metadata-type) + /// used as a value has this type. + Metadata, +} + +/// Additional utility constructors for creating the compound types without +/// having to manage boxing manually. +impl LLVMType { + /// Builds an array type containing the provided `elem_count` number of + /// elements of type `elem_type`. + #[must_use] + pub fn make_array(elem_count: usize, elem_type: LLVMType) -> Self { + Self::Array { + count: elem_count, + typ: Box::new(elem_type), + } + } + + /// Creates a struct type from the provided `elem_types` and whether it is + /// `packed`. + #[must_use] + pub fn make_struct(packed: bool, elem_types: &[LLVMType]) -> Self { + Self::Structure { + packed, + elements: Vec::from(elem_types), + } + } + + /// Creates a function type from the provided `return_type` and + /// `param_types`. + #[must_use] + pub fn make_function(return_type: LLVMType, param_types: &[LLVMType]) -> Self { + Self::Function { + return_type: Box::new(return_type), + parameter_types: Vec::from(param_types), + } + } +} + +/// Operations for working with LLVM types, such as asserting properties on +/// them, or processing them. +impl LLVMType { + /// Checks if the LLVM type represented by `self` unifies with the type + /// represented by `other`. + /// + /// Please note that this is currently purely an equality check. It exists + /// so that in the future we can seamlessly implement more complex + /// unification rules if needed. + #[must_use] + pub fn unifies_with(&self, other: &LLVMType) -> bool { + self == other + } + + /// Returns `true` if `self` is a primitive type, and `false` otherwise. + #[must_use] + pub fn is_primitive(&self) -> bool { + matches!( + self, + Self::bool + | Self::i8 + | Self::i32 + | Self::i64 + | Self::i128 + | Self::half + | Self::float + | Self::double + | Self::ptr + | Self::void + | Self::Metadata + ) + } + + /// Returns `true` if `self` is a compound type, and `false` otherwise. + #[must_use] + pub fn is_compound(&self) -> bool { + !self.is_primitive() + } + + /// Returns `true` if `self` is an integral type, and `false` otherwise. + #[must_use] + pub fn is_integral(&self) -> bool { + matches!( + self, + Self::bool | Self::i8 | Self::i16 | Self::i32 | Self::i64 | Self::i128 + ) + } + + /// Returns `true` if `self` is a floating-point type, and `false` + /// otherwise. + #[must_use] + pub fn is_float(&self) -> bool { + matches!(self, Self::half | Self::float | Self::double) + } +} + +/// This attempts to match the LLVM representations for these types where it is +/// reasonable. +/// +/// For Array types we currently use the Rust syntax as that is clearer to read +/// than the LLVM product-style syntax. +impl Display for LLVMType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let result = match self { + LLVMType::bool => "bool".to_string(), + LLVMType::i8 => "i8".to_string(), + LLVMType::i16 => "i16".to_string(), + LLVMType::i32 => "i32".to_string(), + LLVMType::i64 => "i64".to_string(), + LLVMType::i128 => "i128".to_string(), + LLVMType::half => "half".to_string(), + LLVMType::float => "float".to_string(), + LLVMType::double => "double".to_string(), + LLVMType::ptr => "ptr".to_string(), + LLVMType::void => "void".to_string(), + LLVMType::Metadata => "metadata".to_string(), + LLVMType::Array { count, typ: ty } => { + let ty_str = ty.to_string(); + format!("[{ty_str}; {count}]") + } + LLVMType::Structure { packed, elements } => { + let elem_strs = elements.iter().map(std::string::ToString::to_string).join(", "); + if *packed { + format!("<{{ {elem_strs} }}>") + } else { + format!("{{ {elem_strs} }}") + } + } + LLVMType::Function { + return_type, + parameter_types, + } => { + let params_string = parameter_types + .iter() + .map(std::string::ToString::to_string) + .join(", "); + format!("({params_string}) -> {return_type}") + } + }; + + writeln!(f, "{result}") + } +} + +/// Conversion from Inkwell's generic type enum to our type language. +impl<'ctx> TryFrom> for LLVMType { + type Error = compile::Error; + + fn try_from(value: AnyTypeEnum<'ctx>) -> Result { + Self::try_from(&value) + } +} + +/// Conversion from Inkwell's generic type enum to our type language. +impl<'ctx> TryFrom<&AnyTypeEnum<'ctx>> for LLVMType { + type Error = compile::Error; + + fn try_from(value: &AnyTypeEnum<'ctx>) -> Result { + match value { + AnyTypeEnum::ArrayType(array_type) => Self::try_from(array_type), + AnyTypeEnum::FloatType(float_type) => Self::try_from(float_type), + AnyTypeEnum::FunctionType(fn_ty) => Self::try_from(fn_ty), + AnyTypeEnum::IntType(int_type) => Self::try_from(int_type), + AnyTypeEnum::PointerType(ptr_type) => Self::try_from(ptr_type), + AnyTypeEnum::StructType(struct_type) => Self::try_from(struct_type), + AnyTypeEnum::VoidType(void_type) => Self::try_from(void_type), + AnyTypeEnum::VectorType(vector_type) => Self::try_from(vector_type), + } + } +} + +/// Conversion from Inkwell's basic type enum to our type language. +impl<'ctx> TryFrom> for LLVMType { + type Error = compile::Error; + + fn try_from(value: BasicTypeEnum<'ctx>) -> Result { + Self::try_from(&value) + } +} + +/// Conversion from Inkwell's basic type enum to our type language. +impl<'ctx> TryFrom<&BasicTypeEnum<'ctx>> for LLVMType { + type Error = compile::Error; + + fn try_from(value: &BasicTypeEnum<'ctx>) -> Result { + match value { + BasicTypeEnum::ArrayType(array_type) => Self::try_from(array_type), + BasicTypeEnum::FloatType(float_type) => Self::try_from(float_type), + BasicTypeEnum::IntType(int_type) => Self::try_from(int_type), + BasicTypeEnum::PointerType(ptr_type) => Self::try_from(ptr_type), + BasicTypeEnum::StructType(struct_type) => Self::try_from(struct_type), + BasicTypeEnum::VectorType(vector_type) => Self::try_from(vector_type), + } + } +} + +/// Conversion from Inkwell's array type to our type language. +impl<'ctx> TryFrom> for LLVMType { + type Error = compile::Error; + + fn try_from(value: ArrayType<'ctx>) -> Result { + Self::try_from(&value) + } +} + +/// Conversion from Inkwell's array type to our type language. +impl<'ctx> TryFrom<&ArrayType<'ctx>> for LLVMType { + type Error = compile::Error; + + fn try_from(value: &ArrayType<'ctx>) -> Result { + let length = value.len() as usize; + let elem_type = Self::try_from(value.get_element_type())?; + Ok(Self::make_array(length, elem_type)) + } +} + +/// Conversion from Inkwell's generic float type to our specific float types. +impl<'ctx> TryFrom> for LLVMType { + type Error = compile::Error; + + fn try_from(value: FloatType<'ctx>) -> Result { + Self::try_from(&value) + } +} + +/// Conversion from Inkwell's generic float type to our specific float types. +impl<'ctx> TryFrom<&FloatType<'ctx>> for LLVMType { + type Error = compile::Error; + + fn try_from(value: &FloatType<'ctx>) -> Result { + #[allow(clippy::cast_possible_wrap)] // Our byte size should never be large enough + let float_size_bits = value + .size_of() + .get_sign_extended_constant() + .ok_or(Error::UnsupportedType(value.to_string()))? + * BYTE_SIZE as i64; + let ret_val = match float_size_bits { + 16 => Self::half, + 32 => Self::float, + 64 => Self::double, + _ => Err(Error::UnsupportedType(value.to_string()))?, + }; + Ok(ret_val) + } +} + +/// Conversion from Inkwell's generic integer type to our specific integer +/// types. +impl<'ctx> TryFrom> for LLVMType { + type Error = compile::Error; + + fn try_from(value: IntType<'ctx>) -> Result { + Self::try_from(&value) + } +} + +/// Conversion from Inkwell's generic integer type to our specific integer +/// types. +impl<'ctx> TryFrom<&IntType<'ctx>> for LLVMType { + type Error = compile::Error; + + fn try_from(value: &IntType<'ctx>) -> Result { + let res = match value.get_bit_width() { + 1 => Self::bool, + 8 => Self::i8, + 16 => Self::i16, + 32 => Self::i32, + 64 => Self::i64, + 128 => Self::i128, + _ => Err(Error::UnsupportedType(value.to_string()))?, + }; + + Ok(res) + } +} + +/// Conversion from Inkwell's pointer type to our type language. +/// +/// We centralize it here despite it being trivial as this gives us one place to +/// potentially need to change if we ever add type system support for typed +/// pointers. Otherwise, we would have to change every site performing +/// conversion of pointer types. +impl<'ctx> TryFrom> for LLVMType { + type Error = compile::Error; + + fn try_from(value: PointerType<'ctx>) -> Result { + Self::try_from(&value) + } +} + +/// Conversion from Inkwell's pointer type to our type language. +/// +/// We centralize it here despite it being trivial as this gives us one place to +/// potentially need to change if we ever add type system support for typed +/// pointers. Otherwise, we would have to change every site performing +/// conversion of pointer types. +impl<'ctx> TryFrom<&PointerType<'ctx>> for LLVMType { + type Error = compile::Error; + + fn try_from(_: &PointerType<'ctx>) -> Result { + Ok(Self::ptr) + } +} + +/// Conversion from Inkwell's struct type to our type language. +impl<'ctx> TryFrom> for LLVMType { + type Error = compile::Error; + + fn try_from(value: StructType<'ctx>) -> Result { + Self::try_from(&value) + } +} + +/// Conversion from Inkwell's struct type to our type language. +impl<'ctx> TryFrom<&StructType<'ctx>> for LLVMType { + type Error = compile::Error; + + fn try_from(value: &StructType<'ctx>) -> Result { + let field_types: Vec = value + .get_field_types() + .iter() + .map(Self::try_from) + .collect::, Error>>()?; + let packed = value.is_packed(); + Ok(Self::make_struct(packed, &field_types)) + } +} + +/// Conversion from Inkwell's vector type to our type language. +/// +/// Currently, our type language **cannot represent** the SIMD vector types, so +/// this operation will error. It exists to ensure that in the future we can +/// seamlessly add support without having to change multiple conversion sites +/// that would currently need to produce errors. +impl<'ctx> TryFrom> for LLVMType { + type Error = compile::Error; + + fn try_from(value: VectorType<'ctx>) -> Result { + Self::try_from(&value) + } +} + +/// Conversion from Inkwell's vector type to our type language. +/// +/// Currently, our type language **cannot represent** the SIMD vector types, so +/// this operation will error. It exists to ensure that in the future we can +/// seamlessly add support without having to change multiple conversion sites +/// that would currently need to produce errors. +impl<'ctx> TryFrom<&VectorType<'ctx>> for LLVMType { + type Error = compile::Error; + + fn try_from(value: &VectorType<'ctx>) -> Result { + Err(Error::UnsupportedType(value.to_string()))? + } +} + +/// Conversion from Inkwell's function type to our type language. +impl<'ctx> TryFrom> for LLVMType { + type Error = compile::Error; + + fn try_from(value: FunctionType<'ctx>) -> Result { + Self::try_from(&value) + } +} + +/// Conversion from Inkwell's function type to our type language. +impl<'ctx> TryFrom<&FunctionType<'ctx>> for LLVMType { + type Error = compile::Error; + + fn try_from(value: &FunctionType<'ctx>) -> Result { + let return_type = value.get_return_type().map_or(Ok(LLVMType::void), Self::try_from)?; + let param_types = value + .get_param_types() + .iter() + .map(Self::try_from) + .collect::, Error>>()?; + + Ok(Self::make_function(return_type, ¶m_types)) + } +} + +/// Conversion from Inkwell's void type to our type language. +/// +/// We centralize this in a conversion to ensure that it is consistent at all +/// use sites. +impl<'ctx> TryFrom> for LLVMType { + type Error = compile::Error; + + fn try_from(value: VoidType<'ctx>) -> Result { + Self::try_from(&value) + } +} + +/// Conversion from Inkwell's void type to our type language. +/// +/// We centralize this in a conversion to ensure that it is consistent at all +/// use sites. +impl<'ctx> TryFrom<&VoidType<'ctx>> for LLVMType { + type Error = compile::Error; + + fn try_from(_: &VoidType<'ctx>) -> Result { + Ok(Self::void) + } +} diff --git a/crates/compiler/src/pass/analysis/mod.rs b/crates/compiler/src/pass/analysis/mod.rs new file mode 100644 index 0000000..e922cb3 --- /dev/null +++ b/crates/compiler/src/pass/analysis/mod.rs @@ -0,0 +1,5 @@ +//! Analysis passes are those that do not change the underlying IR structure, +//! but instead generate some kind of data that can be read by downstream +//! functionality to make decisions on the basis of. + +pub mod module_map; diff --git a/crates/compiler/src/pass/analysis/module_map.rs b/crates/compiler/src/pass/analysis/module_map.rs new file mode 100644 index 0000000..345dc04 --- /dev/null +++ b/crates/compiler/src/pass/analysis/module_map.rs @@ -0,0 +1,568 @@ +//! This pass is responsible for generating a map of the top-level structure of +//! an LLVM IR module as described by an `.ll` file. This map encompasses both +//! function and global entries at the module level, as well as data layout +//! description for the module. +//! +//! The [`ModuleMap`] that results from this pass is intended for downstream +//! usage during the compilation step, primarily for consistency checking. + +use std::collections::HashMap; + +use inkwell::{ + module::{Linkage, Module}, + values::{FunctionValue, GlobalValue}, + GlobalVisibility, +}; +use ltc_errors::compile::{Error, Result}; + +use crate::{ + context::SourceContext, + llvm::{ + data_layout::DataLayout, + special_intrinsics::SpecialIntrinsics, + typesystem::LLVMType, + TopLevelEntryKind, + }, + pass::{ + data::{ConcretePassData, DynPassDataMap, DynPassReturnData, PassDataOps}, + ConcretePass, + Pass, + PassKey, + PassOps, + }, +}; + +/// Generates a map of the top-level structure of an LLVM module. +/// +/// This map includes both functions and globals, as well as the [`DataLayout`] +/// definition for the module. +#[derive(Clone, Debug, PartialEq)] +pub struct BuildModuleMap { + /// The passes that this pass depends upon the results of for its execution. + depends: Vec, + + /// The passes that this pass invalidates the results of by executing. + invalidates: Vec, + + /// LLVM intrinsics that need to be handled specially. + special_intrinsics: SpecialIntrinsics, +} + +impl Default for BuildModuleMap { + fn default() -> Self { + Self::new() + } +} + +/// Constructors that provide ways to create an instance of the +/// [`BuildModuleMap`] pass. +impl BuildModuleMap { + /// Creates a new instance of the module mapping pass. + #[must_use] + pub fn new() -> Self { + // This pass depends on the results of no other passes. + let depends = vec![]; + + // This pass's operation is purely analytical and hence it does not invalidate + // any other passes. + let invalidates = vec![]; + + let special_intrinsics = SpecialIntrinsics::new(); + Self { + depends, + invalidates, + special_intrinsics, + } + } + + /// Creates a new trait object of the module mapping pass. + #[must_use] + pub fn new_dyn() -> Box { + Box::new(Self::new()) + } +} + +/// Functionality that the [`BuildModuleMap`] pass implements. +impl BuildModuleMap { + /// Generates a module map for the provided module in the source context, + /// returning the module map if successful. + /// + /// # Errors + /// + /// - [`Error`] if the module cannot be mapped successfully. + pub fn map_module(&mut self, module: &Module) -> Result { + // We start by analyzing the data-layout of the module, which is important to + // ensure that things match later on and that we are not being asked for things + // that we do not or cannot support. This _may_ currently return errors due to + // unsupported data layouts, but this could potentially be moved into the + // compilation step in the future. + let data_layout = self.process_data_layout(module.get_data_layout().as_str().to_str()?)?; + + // With our data layout obtained successfully, we can build our module map and + // start adding top-level entries to it. + let mut mod_map = ModuleMap::new(data_layout); + + // We then process the global definitions in scope and gather the relevant + // information about them. + module + .get_globals() + .map(|g| self.map_global(&g, &mut mod_map)) + .collect::>>()?; + + // Finally we use the top-level information about functions to create a map of + // the remaining symbols that occur in the module. + module + .get_functions() + .map(|f| self.map_function(&f, &mut mod_map)) + .collect::>>()?; + + // Our map is complete, so we can just return it. + Ok(mod_map) + } + + /// Processes the data layout declaration from the module. + /// + /// # Future-Gazing + /// + /// In the future we may well want to treat our target as a proper Harvard + /// architecture with the separate program address space and allocation + /// address space that it actually has. For now, we are relying on a stopgap + /// target (`aarch64-unknown-none-softfloat`) which does not give us this + /// control, and so we are raising an error if the configuration is + /// incorrect. + /// + /// # Errors + /// + /// - [`Error::UnsupportedAdditionalAddressSpaces`] if the data layout + /// declares pointers in any address space other than the default 0. + /// - [`Error::UnsupportedNonIntegralPointerConfiguration`] if the data + /// layout requests non-integral pointers for any address space. + pub fn process_data_layout(&mut self, layout_string: &str) -> Result { + // We start by analyzing the data-layout of the module, which is important to + // ensure that things match later on and that we are not being asked for things + // that we do not or cannot support. + let data_layout = DataLayout::new(layout_string)?; + + // We do not support split address spaces (for now). Later we may want to use + // this to properly state that our target architecture is a Harvard one rather + // than a Von-Neumann one, but we are leaving it for now. + if data_layout.pointer_layouts.iter().any(|p| p.address_space != 0) + || data_layout.alloc_address_space != 0 + || data_layout.global_address_space != 0 + || data_layout.program_address_space != 0 + { + Err(Error::UnsupportedAdditionalAddressSpaces)?; + } + + // We do not support non-integral pointers in any address space. + if !data_layout.nointptr_address_spaces.address_spaces.is_empty() { + Err(Error::UnsupportedNonIntegralPointerConfiguration)?; + } + + Ok(data_layout) + } + + /// Gathers the data for a module-level global and writes it into the + /// `mod_map`. + /// + /// # Errors + /// + /// - [`Error`] if the global information cannot be gathered successfully. + pub fn map_global(&mut self, global: &GlobalValue, mod_map: &mut ModuleMap) -> Result<()> { + let name = global.get_name().to_str()?.to_string(); + + let kind = if global.is_declaration() { + TopLevelEntryKind::Declaration + } else { + TopLevelEntryKind::Definition + }; + + let typ = global.get_value_type().try_into()?; + let is_const = global.is_constant(); + let alignment = global.get_alignment() as usize; + let linkage = global.get_linkage(); + let visibility = global.get_visibility(); + let is_initialized = global.get_initializer().is_some(); + + let global_info = GlobalInfo { + kind, + typ, + linkage, + visibility, + alignment, + is_const, + is_initialized, + }; + + mod_map.globals.insert(name, global_info); + + Ok(()) + } + + /// Gathers the data for a module-level function and writes it into the + /// `mod_map`. + /// + /// # Errors + /// + /// - [`Error`] if the function information cannot be gathered successfully. + pub fn map_function(&mut self, func: &FunctionValue, mod_map: &mut ModuleMap) -> Result<()> { + let name = func.get_name().to_str()?.to_string(); + + if let Some(intrinsic) = self.special_intrinsics.info_for(&name) { + mod_map.functions.insert(name, intrinsic); + } else { + let kind = if func.as_global_value().is_declaration() { + TopLevelEntryKind::Declaration + } else { + TopLevelEntryKind::Definition + }; + let typ = LLVMType::try_from(func.get_type())?; + let linkage = func.get_linkage(); + let intrinsic = func.get_intrinsic_id() != 0; + let visibility = func.as_global_value().get_visibility(); + let f_info = FunctionInfo { + kind, + intrinsic, + typ, + linkage, + visibility, + }; + + mod_map.functions.insert(name, f_info); + } + + Ok(()) + } +} + +/// We need to be able to run this pass using the pass manager, so we are +/// obligated to implement `PassOps` for it to make this possible. +impl PassOps for BuildModuleMap { + fn run( + &mut self, + context: SourceContext, + _pass_data: &DynPassDataMap, + ) -> Result { + let analysis_result = context.analyze_module(|module| self.map_module(module))?; + Ok(DynPassReturnData::new(context, Box::new(analysis_result))) + } + + fn depends(&self) -> &[PassKey] { + self.depends.as_slice() + } + + fn invalidates(&self) -> &[PassKey] { + self.invalidates.as_slice() + } + + fn dupe(&self) -> Pass { + Box::new(self.clone()) + } +} + +/// We also want to be able to work with the pass when it is not type-erased to +/// `dyn PassOps`, so we are obliged to implement the concrete pass operations +/// trait here too. +impl ConcretePass for BuildModuleMap { + type Data = ModuleMap; +} + +/// The module map that results from executing this analysis pass on an LLVM IR +/// module. +/// +/// It contains information on the module's: +/// +/// - Data layout, as given by the embedded data layout string. +/// - Functions, as given by the function definitions and declarations. +/// - Globals, as given by the global definitions and declarations. +#[derive(Clone, Debug, PartialEq)] +pub struct ModuleMap { + /// The data layout provided for this module. + pub data_layout: DataLayout, + + /// The globals that are contained within the module. + pub globals: HashMap, + + /// The functions that are contained within the module. + pub functions: HashMap, +} + +impl ModuleMap { + /// Creates a new instance of the output data for the module mapping pass. + #[must_use] + pub fn new(data_layout: DataLayout) -> Self { + let functions = HashMap::new(); + let globals = HashMap::new(); + Self { + data_layout, + globals, + functions, + } + } + + /// Creates a new trait object of the output data for the module mapping + /// pass. + #[must_use] + pub fn new_dyn(data_layout: DataLayout) -> Box { + Box::new(Self::new(data_layout)) + } +} + +/// We need to work with this type as a generic piece of pass data. +impl PassDataOps for ModuleMap {} + +/// We also need to work with this type as a piece of _concrete_ pass data for +/// non type-erased workflows. +impl ConcretePassData for ModuleMap { + type Pass = BuildModuleMap; +} + +/// The information necessary to describe the conventions and operations +/// necessary to call a function. +#[derive(Clone, Debug, PartialEq)] +pub struct FunctionInfo { + /// The type of function entity that was encountered here. + pub kind: TopLevelEntryKind, + + /// Set if this function is an LLVM intrinsic, and unset otherwise. + pub intrinsic: bool, + + /// The LLVM type of our function. + pub typ: LLVMType, + + /// The linkage for our function. + pub linkage: Linkage, + + /// The visibility of our function. + pub visibility: GlobalVisibility, +} + +/// The information necessary to describe the conventions and operations +/// necessary to access a global. +#[derive(Clone, Debug, PartialEq)] +pub struct GlobalInfo { + /// The type of global entity that was encountered here. + pub kind: TopLevelEntryKind, + + /// The LLVM type of our global. + pub typ: LLVMType, + + /// The linkage for our global. + pub linkage: Linkage, + + /// The visibility of our global. + pub visibility: GlobalVisibility, + + /// The alignment of the global value. + pub alignment: usize, + + /// `true` if this global is constant, and `false` otherwise. + pub is_const: bool, + + /// `true` if this global is initialized, and `false` otherwise. + pub is_initialized: bool, +} + +#[cfg(test)] +mod test { + use std::path::Path; + + use inkwell::{module::Linkage, GlobalVisibility}; + + use crate::{ + context::SourceContext, + llvm::{data_layout::DataLayout, typesystem::LLVMType, TopLevelEntryKind}, + pass::{analysis::module_map::BuildModuleMap, data::DynPassDataMap, ConcretePass, PassOps}, + }; + + /// A utility function to make it easy to load the testing context in all + /// the tests. + fn get_text_context() -> SourceContext { + SourceContext::create(Path::new(r"input/add.ll")) + .expect("Unable to construct testing source context") + } + + #[test] + fn returns_correct_data_type() -> anyhow::Result<()> { + // Setup + let ctx = get_text_context(); + let data = DynPassDataMap::new(); + let mut pass = BuildModuleMap::new_dyn(); + let dyn_return_data = pass.run(ctx, &data)?; + + // We should be able to get the pass data as the correct associated type. + assert!( + dyn_return_data + .data + .view_as::<::Data>() + .is_some() + ); + + Ok(()) + } + + #[test] + fn discovers_correct_data_layout() -> anyhow::Result<()> { + // Setup + let ctx = get_text_context(); + let data = DynPassDataMap::new(); + let mut pass = BuildModuleMap::new_dyn(); + + let dyn_return_data = pass.run(ctx, &data)?; + let map = dyn_return_data + .data + .view_as::<::Data>() + .unwrap(); + + // The data layout should have been picked up correctly from the module, and we + // know that parsing works, so we check equality + let data_layout = &map.data_layout; + let expected_data_layout = + DataLayout::new("e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128")?; + assert_eq!(data_layout, &expected_data_layout); + + Ok(()) + } + + #[test] + fn discovers_correct_globals() -> anyhow::Result<()> { + let ctx = get_text_context(); + let data = DynPassDataMap::new(); + let mut pass = BuildModuleMap::new_dyn(); + + let dyn_return_data = pass.run(ctx, &data)?; + let map = dyn_return_data + .data + .view_as::<::Data>() + .unwrap(); + let globals = &map.globals; + + // Functions, though technically globals, should not be seen + assert!( + !globals.contains_key(&"_ZN19ltc_rust_test_input3add17h828e50e9267cb510E".to_string()) + ); + assert!(!globals.contains_key(&"llvm.dbg.declare".to_string())); + assert!(!globals.contains_key(&"llvm.uadd.with.overflow.i64".to_string())); + assert!( + !globals.contains_key( + &"_ZN4core9panicking11panic_const24panic_const_add_overflow17he7771b1d81fa091aE" + .to_string() + ) + ); + + // The first global + assert!(globals.contains_key(&"alloc_4190527422e5cc48a15bd1cb4f38f425".to_string())); + let global_1 = globals + .get(&"alloc_4190527422e5cc48a15bd1cb4f38f425".to_string()) + .unwrap(); + assert!(global_1.is_initialized); + assert_eq!(global_1.visibility, GlobalVisibility::Default); + assert_eq!(global_1.alignment, 1); + assert!(global_1.is_const); + assert_eq!(global_1.linkage, Linkage::Private); + assert_eq!(global_1.kind, TopLevelEntryKind::Definition); + assert_eq!( + global_1.typ, + LLVMType::make_struct(true, &[LLVMType::make_array(33, LLVMType::i8)]) + ); + + // The second global + assert!(globals.contains_key(&"alloc_5b4544c775a23c08ca70c48dd7be27fc".to_string())); + let global_2 = globals + .get(&"alloc_5b4544c775a23c08ca70c48dd7be27fc".to_string()) + .unwrap(); + assert!(global_2.is_initialized); + assert_eq!(global_2.visibility, GlobalVisibility::Default); + assert_eq!(global_2.alignment, 8); + assert!(global_2.is_const); + assert_eq!(global_2.linkage, Linkage::Private); + assert_eq!(global_2.kind, TopLevelEntryKind::Definition); + assert_eq!( + global_2.typ, + LLVMType::make_struct( + true, + &[LLVMType::ptr, LLVMType::make_array(16, LLVMType::i8)] + ) + ); + + Ok(()) + } + + #[test] + fn discovers_correct_functions() -> anyhow::Result<()> { + let ctx = get_text_context(); + let data = DynPassDataMap::new(); + let mut pass = BuildModuleMap::new_dyn(); + + let dyn_return_data = pass.run(ctx, &data)?; + let map = dyn_return_data + .data + .view_as::<::Data>() + .unwrap(); + let functions = &map.functions; + + // First we check that the globals have avoided somehow being recorded as + // functions. + assert!(!functions.contains_key(&"alloc_4190527422e5cc48a15bd1cb4f38f425".to_string())); + assert!(!functions.contains_key(&"alloc_5b4544c775a23c08ca70c48dd7be27fc".to_string())); + + // _ZN19ltc_rust_test_input3add17h828e50e9267cb510E + let rust_test_input = functions + .get(&"_ZN19ltc_rust_test_input3add17h828e50e9267cb510E".to_string()) + .unwrap(); + assert!(!rust_test_input.intrinsic); + assert_eq!(rust_test_input.kind, TopLevelEntryKind::Definition); + assert_eq!(rust_test_input.linkage, Linkage::External); + assert_eq!(rust_test_input.visibility, GlobalVisibility::Default); + assert_eq!( + rust_test_input.typ, + LLVMType::make_function(LLVMType::i64, &[LLVMType::i64, LLVMType::i64]) + ); + + // llvm.dbg.declare + let rust_test_input = functions.get(&"llvm.dbg.declare".to_string()).unwrap(); + assert!(rust_test_input.intrinsic); + assert_eq!(rust_test_input.kind, TopLevelEntryKind::Declaration); + assert_eq!(rust_test_input.linkage, Linkage::External); + assert_eq!(rust_test_input.visibility, GlobalVisibility::Default); + assert_eq!( + rust_test_input.typ, + LLVMType::make_function( + LLVMType::void, + &[LLVMType::Metadata, LLVMType::Metadata, LLVMType::Metadata] + ) + ); + + // llvm.uadd.with.overflow.i64 + let rust_test_input = functions.get(&"llvm.uadd.with.overflow.i64".to_string()).unwrap(); + assert!(rust_test_input.intrinsic); + assert_eq!(rust_test_input.kind, TopLevelEntryKind::Declaration); + assert_eq!(rust_test_input.linkage, Linkage::External); + assert_eq!(rust_test_input.visibility, GlobalVisibility::Default); + assert_eq!( + rust_test_input.typ, + LLVMType::make_function( + LLVMType::make_struct(false, &[LLVMType::i64, LLVMType::bool]), + &[LLVMType::i64, LLVMType::i64] + ) + ); + + // _ZN4core9panicking11panic_const24panic_const_add_overflow17he7771b1d81fa091aE + let rust_test_input = functions + .get( + &"_ZN4core9panicking11panic_const24panic_const_add_overflow17he7771b1d81fa091aE" + .to_string(), + ) + .unwrap(); + assert!(!rust_test_input.intrinsic); + assert_eq!(rust_test_input.kind, TopLevelEntryKind::Declaration); + assert_eq!(rust_test_input.linkage, Linkage::External); + assert_eq!(rust_test_input.visibility, GlobalVisibility::Default); + assert_eq!( + rust_test_input.typ, + LLVMType::make_function(LLVMType::void, &[LLVMType::ptr]) + ); + + Ok(()) + } +} diff --git a/crates/compiler/src/pass/data.rs b/crates/compiler/src/pass/data.rs new file mode 100644 index 0000000..0fae12d --- /dev/null +++ b/crates/compiler/src/pass/data.rs @@ -0,0 +1,263 @@ +//! Pass data is data that results from the operation of some pass that _cannot_ +//! be represented in the standard output of the pass. + +use std::{ + any::{Any, TypeId}, + collections::HashMap, + fmt::Debug, +}; + +use derivative::Derivative; +use downcast_rs::Downcast; + +use crate::{ + context::SourceContext, + pass::{ConcretePass, PassKey}, +}; + +/// Pass data is output by any given pass +pub type PassData = Box; + +/// The operations that we expect one of our pass data objects to have. +/// +/// The implementation is designed to be used via dynamic dispatch, and hence +/// can provide the requisite operations however it is able. +/// +/// # Recommended Functions +/// +/// On the concrete type that implements this trait, it is recommended to +/// implement: +/// +/// - A `new(...) -> Self` associated function. +/// - A `new_dyn(...) -> PassData` associated function. This one can usually +/// simply call `Box::new(Self::new(...))`. +/// +/// These aid in providing a uniform way to construct pass data. +/// +/// # Self Bounds +/// +/// The bounds on `Self` are required by these traits for the following reasons: +/// +/// - [`Any`] allows downcasting to concrete implementations of `Opcode` if +/// needed. +/// - [`Debug`] to provide representations to aid in debugging. It is +/// recommended to use the derive feature for this. +/// - [`Downcast`] for easy conversions _to_ [`Any`] for downcasting. +/// +/// In addition, it is required but not enforced that implementors of this +/// trait also implement [`ConcretePassData`]. +pub trait PassDataOps +where + Self: Any + Debug + Downcast, +{ +} + +/// Operations implemented on `dyn PassDataOps` are **only** available on the +/// concrete trait object and hence not equivalent to a blanket implementation +/// of a method for `trait PassDataOps`. +impl dyn PassDataOps { + /// Checks if the pass is an instance of the concrete pass `T`, returning + /// `true` if it is and `false` otherwise. + pub fn is(&self) -> bool { + self.as_any().is::() + } + + /// Allows you to view the dynamic pass `self` as the concrete pass type + /// `T`, returning a `&T` if possible and `None` otherwise. + pub fn view_as(&self) -> Option<&T> { + self.as_any().downcast_ref::() + } + + /// Allows you to view the dynamic pass `self` as the concrete pass type + /// `T`, returning a `&mut T` if possible and `None` otherwise. + pub fn view_as_mut(&mut self) -> Option<&mut T> { + self.as_any_mut().downcast_mut::() + } + + /// Allows you to view the dynamic pass `self` as the concrete pass type + /// `T`, returning a `&T` if possible. + /// + /// # Panics + /// + /// If `self` is not `T`. + pub fn unwrap_as(&self) -> &T { + self.view_as() + .unwrap_or_else(|| panic!("self was not a {:?}", TypeId::of::())) + } + + /// Allows you to view the dynamic pass `self` as the concrete pass type + /// `T`, returning a `&mut T` if possible. + /// + /// # Panics + /// + /// If `self` is not `T`. + pub fn unwrap_as_mut(&mut self) -> &mut T { + self.view_as_mut() + .unwrap_or_else(|| panic!("self was not a {:?}", TypeId::of::())) + } +} + +/// Provides additional operations that can be called when operating on a +/// concrete instance of a specific pass, rather than any pass instance. +/// +/// # Recommended Functions +/// +/// On the concrete type that implements this trait, it is recommended to +/// implement: +/// +/// - A `new(...) -> Self` associated function. +/// - A `new_dyn(...) -> PassData` associated function. This one can usually +/// simply call `Box::new(Self::new(...))`. +/// +/// These aid in providing a uniform way to construct pass data. +pub trait ConcretePassData +where + Self: Clone + Debug + PassDataOps, +{ + /// The pass with which the data is associated. + type Pass: ConcretePass; +} + +/// Pass return data that returns a dynamic [`PassData`]. +pub type DynPassReturnData = PassReturnData; + +/// The data returned when executing a pass. +#[derive(Derivative)] +#[derivative(Debug(bound = "T: Debug"))] +pub struct PassReturnData { + /// The newly-modified source context. + pub source_context: SourceContext, + + /// The data returned by the pass. + pub data: T, +} +impl PassReturnData { + /// Creates a new instance of the pass return data. + pub fn new(source_context: SourceContext, data: T) -> Self { + Self { + source_context, + data, + } + } +} + +impl PassReturnData { + /// Allows you to get the returned pass data as the concrete data type `T`, + /// returning `&T` if possible and `None` otherwise. + #[must_use] + pub fn data_as(&self) -> Option<&T> { + self.data.as_any().downcast_ref::() + } + + /// Allows you to get the returned pass data as the concrete data type `T`, + /// returning `&T` if possible and `None` otherwise. + pub fn data_as_mut(&mut self) -> Option<&mut T> { + self.data.as_any_mut().downcast_mut::() + } + + /// Allows you to get the returned pass data as the concrete data type `T`, + /// returning `&T` if possible. + /// + /// # Panics + /// + /// If `self.data` is not an instance of `T`. + #[must_use] + pub fn unwrap_data_as(&self) -> &T { + self.data_as::().unwrap() + } + + /// Allows you to get the returned pass data as the concrete data type `T`, + /// returning `&mut T` if possible. + /// + /// # Panics + /// + /// If `self.data` is not an instance of `T`. + pub fn unwrap_data_as_mut(&mut self) -> &mut T { + self.data_as_mut::().unwrap() + } +} + +/// A mapping from pass keys to the associated pass data. +/// +/// It will always contain the latest pass data, as there is no need to re-run a +/// pass unless it was invalidated by a subsequent pass. +pub type DynPassDataMap = PassDataMap; + +/// A mapping from pass keys to the associated pass data. +/// +/// It will always contain the latest pass data, as there is no need to re-run a +/// pass unless it was invalidated by a subsequent pass. +#[derive(Derivative)] +#[derivative( + Clone(bound = "T: Clone"), + Debug(bound = "T: Debug"), + PartialEq(bound = "T: PartialEq") +)] +pub struct PassDataMap { + /// The mapping from pass keys to pass data. + mapping: HashMap, +} + +impl PassDataMap { + /// Constructs a new, empty, mapping from pass keys to pass data. + #[must_use] + pub fn new() -> Self { + let mapping = HashMap::new(); + Self { mapping } + } + + /// Clears all pass data. + pub fn clear_all(&mut self) { + self.mapping.clear(); + } + + /// Gets a reference to the last-written data for the pass given by the + /// provided `key` if it exists. exists, and returns `None` otherwise. + #[must_use] + pub fn get_key(&self, key: PassKey) -> Option<&T> { + self.mapping.get(&key) + } + + /// Writes the provided `data` into the container associating it with the + /// pass described by `key`, overwriting any existing data for that pass. + pub fn put_key(&mut self, key: PassKey, data: T) { + self.mapping.insert(key, data); + } + + /// Clears the data for the pass given by the provided `key`, if it exists. + pub fn clear_key(&mut self, key: PassKey) { + self.mapping.remove(&key); + } +} + +impl PassDataMap { + /// Gets a reference to the last-written data for the pass `P` if it exists, + /// and returns `None` otherwise. + /// + /// The data returned is returned as the concrete type. + #[must_use] + pub fn get(&self) -> Option<&P::Data> { + self.mapping.get(&P::key())?.view_as::() + } + + /// Writes the provided `data` into the container associating it with the + /// pass `P`, overwriting any existing data for that pass. + /// + /// This expects the data to be the concrete pass data type for the pass in + /// question. + pub fn put(&mut self, data: P::Data) { + let data = Box::new(data); + self.mapping.insert(P::key(), data); + } + + /// Clears the data for the pass `P` if it exists. + pub fn clear(&mut self) { + self.mapping.remove(&P::key()); + } +} + +impl Default for PassDataMap { + fn default() -> Self { + Self::new() + } +} diff --git a/crates/compiler/src/pass/mod.rs b/crates/compiler/src/pass/mod.rs new file mode 100644 index 0000000..b5cc60a --- /dev/null +++ b/crates/compiler/src/pass/mod.rs @@ -0,0 +1,338 @@ +//! This module contains both the definition of the [`Pass`] type and the +//! [`PassManager`] object. +//! +//! Every Pass should implement [`ConcretePass`], as this provides the full set +//! of features required of a pass. It is also expected that each pass provide a +//! type-specific constructor function called `new`. +//! +//! This compiler will take advantage of passes defined in LLVM—that we can use +//! via [`inkwell`]—and also custom passes tailored to the `CairoVM` CPU model +//! that may depend on LLVM-defined analyses. +//! +//! # Passes +//! +//! A pass is a self-contained unit of functionality that performs some +//! operation over the LLVM IR. They tend to fall into the following three +//! categories: +//! +//! - **Analysis:** These examine the structure of the IR to infer information +//! about it without changing its structure. The information produced by +//! analysis passes can be invalidated by transformation passes. +//! - **Transformation:** Transformation passes use either information from +//! analysis passes or structural information about the IR to change the +//! structure of the IR. These structural changes can happen for many reasons, +//! but usually involve optimizing some metric (e.g. runtime or code size). +//! - **Utility:** These passes do not fall neatly into either of the above +//! categories. +//! +//! # Note: Skeleton +//! +//! The implementations in this file are deliberately left incomplete, and exist +//! only as skeletons to serve the purposes of correctly designing the compiler +//! state. A proper implementation will take place later in the project, as +//! tracked by [#56](https://github.com/reilabs/llvm-to-cairo/issues/56). + +pub mod analysis; +pub mod data; + +use std::{ + any::{Any, TypeId}, + fmt::Debug, +}; + +use downcast_rs::Downcast; +use ltc_errors::compile::{Error, Result}; + +use crate::{ + context::SourceContext, + pass::data::{ConcretePassData, DynPassDataMap, DynPassReturnData}, +}; + +/// A pass is a self-contained unit of functionality that performs some +/// operation over the LLVM IR. +pub type Pass = Box; + +/// A handle that uniquely identifies the pass. +pub type PassKey = TypeId; + +/// The operations that we expect one of our passes to have. +/// +/// The implementation is designed te be used via dynamic dispatch, and hence +/// can provide the requisite operations however it is able. +/// +/// # Recommended Functions +/// +/// On the concrete type that implements this trait, it is recommended to +/// implement: +/// +/// - A `new(...) -> Self` associated function. +/// - A `new_dyn(...) -> PassData` associated function. This one can usually +/// simply call `Box::new(Self::new(...))`. +/// +/// These aid in providing a uniform way to construct pass data. +/// +/// # Self Bounds +/// +/// The bounds on `Self` are required by these traits for the following reasons: +/// +/// - [`Any`] allows downcasting to concrete implementations of `Opcode` if +/// needed. +/// - [`Debug`] to provide representations to aid in debugging. It is +/// recommended to use the derive feature for this. +/// - [`Downcast`] for easy conversions _to_ [`Any`] for downcasting. +/// +/// In addition, it is required but not enforced that implementors of this +/// trait also implement [`ConcretePass`]. +pub trait PassOps +where + Self: Any + Debug + Downcast, +{ + /// Executes the pass on the provided `context`, returning both the + /// potentially-modified context and any data returned by the pass. + /// + /// It takes a map of `pass_data` that allows the running pass to get at the + /// data required by + /// + /// # Errors + /// + /// - [`Error`] if pass execution fails for any reason. + fn run( + &mut self, + context: SourceContext, + pass_data: &DynPassDataMap, + ) -> Result; + + /// Gets a slice containing the keys of the passes whose output this pass + /// depends on. + fn depends(&self) -> &[PassKey]; + + /// Gets a slice containing the keys of the passes that are invalidated by + /// this pass. + /// + /// # Future-Gazing + /// + /// In the future (#56), we may instead want passes to declare what _kinds + /// of data_ they change in the context (e.g. `Structure`, `Alias`, and so + /// on). This would make it far less brittle, as pass implementers would + /// not need to know the details of passes that they might accidentally + /// invalidate. + /// + /// This is for the future, as the current pass infrastructure is more of a + /// framework to ensure we do not paint ourselves into a corner. + fn invalidates(&self) -> &[PassKey]; + + /// Returns a duplicate of this pass. + fn dupe(&self) -> Pass; + + /// Gets a key that uniquely represents the pass. + /// + /// This **must** return the same value as [`ConcretePass::key`]. + fn key_dyn(&self) -> PassKey { + self.type_id() + } +} + +/// Operations implemented on `dyn PassOps` are **only** available on the +/// concrete trait object and hence not equivalent to a blanket implementation +/// of a method for `trait PassOps`. +impl dyn PassOps { + /// Checks if the pass is an instance of the concrete pass `T`, returning + /// `true` if it is and `false` otherwise. + pub fn is(&self) -> bool { + self.as_any().is::() + } + + /// Allows you to view the dynamic pass `self` as the concrete pass type + /// `T`, returning a `&T` if possible and `None` otherwise. + pub fn view_as(&self) -> Option<&T> { + self.as_any().downcast_ref::() + } + + /// Allows you to view the dynamic pass `self` as the concrete pass type + /// `T`, returning a `&mut T` if possible and `None` otherwise. + pub fn view_as_mut(&mut self) -> Option<&mut T> { + self.as_any_mut().downcast_mut::() + } + + /// Allows you to view the dynamic pass `self` as the concrete pass type + /// `T`, returning a `&T` if possible. + /// + /// # Panics + /// + /// If `self` is not an instance of `T`. + pub fn unwrap_as(&self) -> &T { + self.view_as::() + .unwrap_or_else(|| panic!("self was not a {:?}", TypeId::of::())) + } + + /// Allows you to view the dynamic pass `self` as the concrete pass type + /// `T`, returning a `&mut T` if possible. + /// + /// # Panics + /// + /// If `self` is not an instance of `T`. + pub fn unwrap_as_mut(&mut self) -> &mut T { + self.view_as_mut::() + .unwrap_or_else(|| panic!("self was not a {:?}", TypeId::of::())) + } +} + +/// Provides extra operations that can be called when operating on a concrete +/// instance of a specific pass, rather than on any instance of a pass. +/// +/// # Recommended Functions +/// +/// On the concrete type that implements this trait, it is recommended to +/// implement: +/// +/// - A `new(...) -> Self` associated function. +/// - A `new_dyn(...) -> PassData` associated function. This one can usually +/// simply call `Box::new(Self::new(...))`. +/// +/// These aid in providing a uniform way to construct pass data. +pub trait ConcretePass +where + Self: Clone + Debug + PassOps, +{ + /// The type of data returned by the pass. + type Data: ConcretePassData; + + /// Gets a key that uniquely represents the pass. + /// + /// This **must** return the same value as [`PassOps::key_dyn`]. + #[must_use] + fn key() -> PassKey { + TypeId::of::() + } +} + +/// The data returned when executing all passes via the pass manager. +#[derive(Debug)] +pub struct PassManagerReturnData { + /// The newly-modified source context. + pub context: SourceContext, + + /// A mapping from pass key to the data returned by the pass. + pub data: DynPassDataMap, +} + +impl PassManagerReturnData { + /// Creates a new pass manager return data element wrapping the transformed + /// source `context` and the result `data` from all the passes. + #[must_use] + pub fn new(context: SourceContext, data: DynPassDataMap) -> Self { + Self { context, data } + } +} + +/// A manager for passes within the compiler. +/// +/// The primary task of this pass manager is to automatically resolve a pass +/// ordering based on dependencies between passes. This ensures that pass +/// orderings are correct, without the need for costly manual validation. +pub struct PassManager { + /// The set of passes organized into the order in which they will be + /// executed. + /// + /// Note that this `pass_ordering` may contain passes more than once, + /// depending on the dependencies and invalidations expressed between the + /// passes. + pass_ordering: Vec, +} + +impl PassManager { + /// Creates a new pass manager wrapping the provided passes. + /// + /// # Errors + /// + /// - [`Error::InvalidPassOrdering`] if no valid pass ordering can be + /// generated from the provided `passes`. + pub fn new(passes: Vec) -> Result { + let pass_ordering = Self::generate_pass_ordering(passes)?; + Ok(Self { pass_ordering }) + } + + /// Executes the pass ordering on the provided `context`. + /// + /// # Errors + /// + /// - [`Error`] if any pass fails. + pub fn run(&mut self, mut context: SourceContext) -> Result { + let mut pass_data_map = DynPassDataMap::new(); + + for pass in &mut self.pass_ordering { + // Execute the pass and grab both the potentially-modified source context and + // the data returned by the pass. + let DynPassReturnData { + source_context, + data, + } = pass.run(context, &pass_data_map)?; + + // After this pass runs, we have to ensure that anything that it invalidates has + // the data removed from the pass data mapping. + let invalidated_passes = pass.invalidates(); + invalidated_passes + .iter() + .for_each(|p_key| pass_data_map.clear_key(*p_key)); + + // Finally, we can assign the new data to the pass data mapping. + pass_data_map.put_key(pass.key_dyn(), data); + + // Our context gets overwritten with the new context. + context = source_context; + } + + let result = PassManagerReturnData::new(context, pass_data_map); + Ok(result) + } + + /// Gets the current pass ordering. + /// + /// This method is always guaranteed to return a valid pass ordering that + /// respects the requirements of the passes. + #[must_use] + pub fn passes(&self) -> &[Pass] { + &self.pass_ordering + } + + /// Generates a valid pass ordering from `passes` wherever possible. + /// + /// # Errors + /// + /// - [`Error::InvalidPassOrdering`] if no valid pass ordering can be + /// generated from the provided `passes`. This will usually occur due to + /// circular dependencies between passes. + pub fn generate_pass_ordering(passes: Vec) -> Result> { + // TODO Actually implement this (#56). The current constraint is silly for the + // future but sane for now as we only have the one pass. + // + // In future it should actually construct a topological ordering of passes based + // on their declared dependencies and invalidations, only returning an error if + // there is an unbreakable topological cycle. + let no_deps = passes.iter().all(|p| p.depends().is_empty()); + if no_deps { + Ok(passes) + } else { + Err(Error::InvalidPassOrdering( + "Passes had dependencies where they should not".to_string(), + )) + } + } +} + +impl Default for PassManager { + /// Returns a pass manager with the default set of passes associated with + /// it. + /// + /// # Default Passes + /// + /// The list of default passes is as follows. Please note that they will be + /// assembled into a correct ordering, and will not necessarily be executed + /// in the order in which they are presented here. + /// + /// - [`analysis::module_map::BuildModuleMap`] + fn default() -> Self { + Self::new(vec![analysis::module_map::BuildModuleMap::new_dyn()]) + .expect("Default pass ordering was invalid") + } +} diff --git a/crates/compiler/src/polyfill.rs b/crates/compiler/src/polyfill.rs deleted file mode 100644 index ee57585..0000000 --- a/crates/compiler/src/polyfill.rs +++ /dev/null @@ -1,52 +0,0 @@ -//! In the context of this project, a polyfill is an implementation of some -//! functionality that is _not_ supported by our target CPU in terms of -//! functionality that _is_ supported by our target. -//! -//! By way of example, consider that our CPU does not support floating point -//! arithmetic, so to compile LLVM code that uses such a thing we need to -//! implement it and call _our_ functions where it needs to perform these -//! operations. -//! -//! Our polyfill mechanism aims to be generic, such that we can implement and -//! improve our polyfills without requiring invasive changes to the code-base. -//! In order to do this, we have created a _library_ of polyfills that the -//! compilation process (see [`crate::compiler`]) can select from dynamically. -//! -//! # Polyfills and Optimization -//! -//! We are implementing our polyfills in Cairo-the-language, thereby enabling us -//! to have them in the same form as our compiled LLVM IR: `FlatLowered`. This -//! means that we can combine the polyfills and source into a compilation unit -//! seamlessly. -//! -//! While implementing these in Cairo means that they are amenable to rapid -//! iteration and experimentation, the polyfill is not the _end goal_ of this -//! process. -//! -//! 1. **Polyfills:** Implemented in Cairo, these implement functionality that -//! our CPU is missing using functionality that it is not. They are slow in -//! that they take more steps than the other options to perform an operation, -//! but are much easier to experiment with and iterate on. -//! 2. **Builtins:** Builtins are units of functionality written in Rust that -//! act as coprocessors using a DMA-like mechanism to receive operands and -//! provide results back to the executing code. These are much faster to -//! execute, taking few steps at most, but are more invasive to experiment -//! with and change. They may also require more memory than an equivalent -//! polyfill, which would increase the verification time. -//! 3. **AIR Instructions:** AIR instructions are the fastest option here, but -//! adding a new instruction has the downside of increasing the width of the -//! trace table. Any increase in table width increases the size of the table -//! and also the time to prove the execution. -//! -//! Starting with the polyfills, however, allows us to experiment and iterate -//! rapidly to arrive at a design that we are happy with. This would be far more -//! complex for a builtin, and more complex still for an AIR instruction. -//! -//! Perhaps more importantly, the polyfills allow us to examine and profile to -//! find which operations will be most effective to "upgrade". Rather than a -//! scattershot approach based on hunches, the polyfills allow us to base these -//! decisions on real-world data. -//! -//! To that end, there are certainly polyfills that will still exist. It is very -//! unlikely that every single operation is beneficial to implement as a builtin -//! or AIR instruction. diff --git a/crates/compiler/src/polyfill/mappings.rs b/crates/compiler/src/polyfill/mappings.rs new file mode 100644 index 0000000..fefa793 --- /dev/null +++ b/crates/compiler/src/polyfill/mappings.rs @@ -0,0 +1,14 @@ +//! A set of polyfill mappings that provide the default mappings used by the +//! compiler. +//! +//! These constants are left undocumented as they have extremely self-describing +//! names. + +/// A pair where the left element is the LLVM-side name, and the right side is +/// the expected name for the polyfill. +type PolyPair<'a> = (&'a str, &'a str); + +pub const LLVM_UADD_WITH_OVERFLOW_I64: PolyPair<'static> = ( + "llvm.uadd.with.overflow.i64", + "__llvm_uadd_with_overflow_i64_i64", +); diff --git a/crates/compiler/src/polyfill/mod.rs b/crates/compiler/src/polyfill/mod.rs new file mode 100644 index 0000000..48c6777 --- /dev/null +++ b/crates/compiler/src/polyfill/mod.rs @@ -0,0 +1,185 @@ +//! In the context of this project, a polyfill is an implementation of some +//! functionality that is _not_ supported by our target CPU in terms of +//! functionality that _is_ supported by our target. +//! +//! By way of example, consider that our CPU does not support floating point +//! arithmetic, so to compile LLVM code that uses such a thing we need to +//! implement it and call _our_ functions where it needs to perform these +//! operations. +//! +//! Our polyfill mechanism aims to be generic, such that we can implement and +//! improve our polyfills without requiring invasive changes to the code-base. +//! In order to do this, we have created a _library_ of polyfills that the +//! compilation process (see [`crate::Compiler`]) can select from dynamically. +//! +//! # Polyfills and Optimization +//! +//! We are implementing our polyfills in Cairo-the-language, thereby enabling us +//! to have them in the same form as our compiled LLVM IR: `FlatLowered`. This +//! means that we can combine the polyfills and source into a compilation unit +//! seamlessly. +//! +//! While implementing these in Cairo means that they are amenable to rapid +//! iteration and experimentation, the polyfill is not the _end goal_ of this +//! process. +//! +//! 1. **Polyfills:** Implemented in Cairo, these implement functionality that +//! our CPU is missing using functionality that it is not. They are slow in +//! that they take more steps than the other options to perform an operation, +//! but are much easier to experiment with and iterate on. +//! 2. **Builtins:** Builtins are units of functionality written in Rust that +//! act as coprocessors using a DMA-like mechanism to receive operands and +//! provide results back to the executing code. These are much faster to +//! execute, taking few steps at most, but are more invasive to experiment +//! with and change. They may also require more memory than an equivalent +//! polyfill, which would increase the verification time. +//! 3. **AIR Instructions:** AIR instructions are the fastest option here, but +//! adding a new instruction has the downside of increasing the width of the +//! trace table. Any increase in table width increases the size of the table +//! and also the time to prove the execution. +//! +//! Starting with the polyfills, however, allows us to experiment and iterate +//! rapidly to arrive at a design that we are happy with. This would be far more +//! complex for a builtin, and more complex still for an AIR instruction. +//! +//! Perhaps more importantly, the polyfills allow us to examine and profile to +//! find which operations will be most effective to "upgrade". Rather than a +//! scattershot approach based on hunches, the polyfills allow us to base these +//! decisions on real-world data. +//! +//! To that end, there are certainly polyfills that will still exist. It is very +//! unlikely that every single operation is beneficial to implement as a builtin +//! or AIR instruction. + +pub mod mappings; + +use bimap::{BiHashMap, BiMap}; + +use crate::polyfill::mappings::LLVM_UADD_WITH_OVERFLOW_I64; + +/// A bidirectional mapping from the builtin names for LLVM to the internal +/// names for the corresponding polyfills. +/// +/// This exists in order to enable external linkage of symbols not part of the +/// current translation unit. +/// +/// # LLVM Opcodes +/// +/// Note that some LLVM opcodes (e.g. `add`) map to potentially multiple +/// implementations. For such opcodes, the expected LLVM name is given by the +/// [`Self::of_opcode`] function. +#[derive(Clone, Debug, PartialEq)] +pub struct PolyfillMap { + /// A mapping from the LLVM-side names to the corresponding polyfill names. + mapping: BiMap, +} + +impl PolyfillMap { + /// Constructs a new polyfill map from the provided `mapping`. + #[must_use] + pub fn new(mapping: BiHashMap) -> Self { + Self { mapping } + } + + /// Queries for the polyfill name that corresponds to the provided + /// `llvm_name`, returning it if it exists or returning [`None`] otherwise. + pub fn polyfill(&self, llvm_name: impl Into) -> Option<&String> { + self.mapping.get_by_left(&llvm_name.into()) + } + + /// Queries for the LLVM opcode (as modified by [`Self::of_opcode`]) that + /// corresponds to the provided `polyfill_name`, returning it if it exists + /// or returning [`None`] otherwise. + pub fn llvm(&self, polyfill_name: impl Into) -> Option<&String> { + self.mapping.get_by_right(&polyfill_name.into()) + } + + /// Provides more information to assist in resolving the correct polyfill + /// based on the types associated with the particular opcode invocation. + /// + /// Note that this is a purely _syntactic_ transformation, and does not + /// account for type aliases and the like. Please ensure that any types are + /// fully resolved before calling this. + /// + /// ``` + /// use ltc_compiler::polyfill::PolyfillMap; + /// + /// let opcode_name = "add"; + /// let arg_types = vec!["i8", "i64"]; + /// + /// assert_eq!( + /// PolyfillMap::of_opcode(opcode_name, arg_types.as_slice()), + /// "__llvm_add_i8_i64" + /// ); + /// ``` + #[must_use] + pub fn of_opcode(opcode: &str, types: &[&str]) -> String { + let types_str = if types.is_empty() { + "void".to_string() + } else { + types.join("_") + }; + format!("__llvm_{opcode}_{types_str}") + } +} + +impl Default for PolyfillMap { + /// Contains the default mapping from opcodes and builtins to the + /// corresponding polyfill names. + fn default() -> Self { + let defaults = [LLVM_UADD_WITH_OVERFLOW_I64]; + + Self::new( + defaults + .into_iter() + .map(|(l, r)| (l.to_string(), r.to_string())) + .collect(), + ) + } +} + +#[cfg(test)] +mod test { + use crate::polyfill::PolyfillMap; + + #[test] + fn llvm_lookup_works() { + let map = PolyfillMap::default(); + + assert_eq!( + map.llvm("__llvm_uadd_with_overflow_i64_i64").unwrap(), + "llvm.uadd.with.overflow.i64" + ); + } + + #[test] + fn polyfill_lookup_works() { + let map = PolyfillMap::default(); + + assert_eq!( + map.polyfill("llvm.uadd.with.overflow.i64").unwrap(), + "__llvm_uadd_with_overflow_i64_i64" + ); + } + + #[test] + fn of_opcode_works() { + let opcode_name = "my_opcode"; + let tys_1 = vec!["i8", "i64"]; + let tys_2 = vec!["i1"]; + let tys_3 = vec![]; + + assert_eq!( + PolyfillMap::of_opcode(opcode_name, tys_1.as_slice()), + "__llvm_my_opcode_i8_i64" + ); + assert_eq!( + PolyfillMap::of_opcode(opcode_name, tys_2.as_slice()), + "__llvm_my_opcode_i1" + ); + assert_eq!( + PolyfillMap::of_opcode(opcode_name, tys_3.as_slice()), + "__llvm_my_opcode_void" + ); + } +} diff --git a/crates/error/Cargo.toml b/crates/error/Cargo.toml index f4db351..34aa8bc 100644 --- a/crates/error/Cargo.toml +++ b/crates/error/Cargo.toml @@ -15,4 +15,5 @@ rust-version.workspace = true [dependencies] ariadne.workspace = true +inkwell.workspace = true thiserror.workspace = true diff --git a/crates/error/src/compile.rs b/crates/error/src/compile.rs new file mode 100644 index 0000000..504ef22 --- /dev/null +++ b/crates/error/src/compile.rs @@ -0,0 +1,68 @@ +//! Error types and utilities to do with the compilation from LLVM IR to Cairo +//! IR. + +use std::str::Utf8Error; + +use inkwell::support::LLVMString; +use thiserror::Error; + +/// The result type for use in the compiler. +pub type Result = std::result::Result; + +/// This error type is for use during the process of compilation from LLVM IR to +/// the Cairo IR. +#[derive(Debug, Error)] +pub enum Error { + /// A generic compilation failure with a string message, used as a catch-all + /// for cases that are uncommon enough to not have specific error variants + /// for them. + #[error("Compilation failed: {_0}")] + CompilationFailure(String), + + /// An error that occurs when trying to convert from the LLVM string + /// representation used by Inkwell to the UTF-8 string representation used + /// by Rust. + #[error("Could not create Rust string from C string: {_0}")] + CStrConversionError(#[from] Utf8Error), + + #[error("`{_0}` with invalid segment `{_1}` could not be parsed as an LLVM data layout")] + InvalidDataLayoutSpecification(String, String), + + /// Emitted when code tries to construct an invalid ordering of compiler + /// passes. + #[error("Invalid Pass Ordering: {_0}")] + InvalidPassOrdering(String), + + /// An error when doing IO during compilation. + #[error(transparent)] + IOError(#[from] std::io::Error), + + /// An error coming from LLVM. + /// + /// Unfortunately this does not directly contain an `LLVMString` as we want + /// our error types to be [`Send`] and `LLVMString` is not. + #[error("LLVM Error: {_0}")] + LLVMError(String), + + /// Emitted when an attempt is made to add a module to the compilation + /// context, but cannot do soe compilation context, but cannot do so. + #[error("Unable to add module to context: {_0}")] + UnableToAddModuleToContext(String), + + #[error("We only support targets that use a single address space numbered 0")] + UnsupportedAdditionalAddressSpaces, + + #[error("We do not support targets with non-integral pointers configured.")] + UnsupportedNonIntegralPointerConfiguration, + + /// Emitted when we encounter an LLVM type that we do not support. + #[error("The LLVM basic type {_0} is not supported")] + UnsupportedType(String), +} + +impl From for Error { + /// Wrap an error from LLVM into our error type. + fn from(value: LLVMString) -> Self { + Self::LLVMError(value.to_string()) + } +} diff --git a/crates/error/src/lib.rs b/crates/error/src/lib.rs index d306600..61f15f6 100644 --- a/crates/error/src/lib.rs +++ b/crates/error/src/lib.rs @@ -10,7 +10,7 @@ //! specific errors in library code. To that end, we make sure that our errors //! are kept strongly typed within the library as much as is possible. -pub mod llvm_compile; +pub mod compile; use thiserror::Error; @@ -23,11 +23,8 @@ pub type Result = std::result::Result; /// this is the type that is used at the boundaries of the library. Though we do /// not make a habit of hiding things, any function intended to be part of the /// _truly_ public interface of this library should return this error type. -#[derive(Clone, Debug, Error)] +#[derive(Debug, Error)] pub enum Error { #[error(transparent)] - LlvmCompile(#[from] llvm_compile::Error), - - #[error("An unknown error occurred: {_0}")] - Miscellaneous(String), + Compile(#[from] compile::Error), } diff --git a/crates/error/src/llvm_compile.rs b/crates/error/src/llvm_compile.rs deleted file mode 100644 index 004c866..0000000 --- a/crates/error/src/llvm_compile.rs +++ /dev/null @@ -1,12 +0,0 @@ -//! Error types and utilities to do with the compilation from LLVM IR to Cairo -//! IR. - -use thiserror::Error; - -/// This error type is for use during the process of compilation from LLVM IR to -/// the Cairo IR. -#[derive(Clone, Debug, Error)] -pub enum Error { - #[error("Miscellaneous compilation error: {_0}")] - Miscellaneous(String), -} diff --git a/crates/flo/README.md b/crates/flo/README.md index dc9f60e..549e923 100644 --- a/crates/flo/README.md +++ b/crates/flo/README.md @@ -1,7 +1,7 @@ # `FlatLowered` Intermediate Representation -The `FlatLowered` Intermediate Representation (`FLIR`) is an intermediate representation for the -LLVM to Cairo project that is based on Cairo's `FlatLowered` but tailored for our use-case. +The `FlatLowered` Object format (`FLO`) is an intermediate representation for the LLVM to Cairo +project that is based on Cairo's `FlatLowered` but tailored for our use-case. In particular, it removes any dependency on the [Salsa](https://github.com/salsa-rs/salsa) database structures, as well as: diff --git a/workspace.nix b/workspace.nix index 4d017c1..9c961c0 100644 --- a/workspace.nix +++ b/workspace.nix @@ -3,11 +3,13 @@ # We want to be able to run commands from a nix shell using this package # definition. { - craneLib, lib, - llvmPackages_18, - libiconv, stdenv, + craneLib, + libffi, + libiconv, + libxml2, + llvmPackages_18, }: let workspaceToml = lib.importTOML ./Cargo.toml; @@ -34,11 +36,13 @@ # Things that are needed at build time on the system doing building. nativeBuildInputs = [ - llvmPackages_18.llvm + llvmPackages_18.llvm ]; # The things that we need available at build and runtime on the target system. buildInputs = [ + libffi + libxml2 llvmPackages_18.llvm ] ++ lib.optionals stdenv.hostPlatform.isDarwin [ libiconv