diff --git a/.cargo/config.toml b/.cargo/config.toml index b148603255..a3dba95c4e 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -1,3 +1,26 @@ +[target.aarch64-unknown-linux-musl] +rustflags = [ + "-Zpre-link-args=-target aarch64-linux-musl", + "-Ctarget-feature=+lse,+crt-static", + "-Ctarget-cpu=neoverse-n1", +] +linker = "./zigcc" +ar = "zig ar" + +[target.x86_64-unknown-linux-musl] +rustflags = [ + "-Zpre-link-args=-target x86_64-linux-musl", + "-Ctarget-feature=+crt-static", +] +linker = "./zigcc" +ar = "zig ar" + [unstable] build-std = ["core", "compiler_builtins", "alloc", "std", "panic_abort"] build-std-features = ["panic_immediate_abort"] + +[env] +CC_aarch64_unknown_linux_musl = "zig cc -target aarch64-linux-musl" +CXX_aarch64_unknown_linux_musl = "zig cc -target aarch64-linux-musl" +CC_x86_64_unknown_linux_musl = "zig cc -target x86_64-linux-musl" +CXX_x86_64_unknown_linux_musl = "zig cc -target x86_64-linux-musl" diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6121185f4e..ba64477c32 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -79,15 +79,12 @@ jobs: CARGO_TARGET_AARCH64_UNKNOWN_LINUX_MUSL_RUNNER: qemu-aarch64 run: | sudo apt-get install -y \ - libc6-arm64-cross \ - libc6-dev-arm64-cross \ - crossbuild-essential-arm64 \ qemu-system-arm \ qemu-efi-aarch64 \ qemu-utils \ qemu-user - make CURRENT_TARGET=aarch64-unknown-linux-gnu test-ci + make CURRENT_TARGET=aarch64-unknown-linux-musl test-ci - name: Build Linux binaries if: inputs.release && inputs.platform == 'linux' run: | diff --git a/CHANGELOG.md b/CHANGELOG.md index aa9f58d77c..a6e5946800 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,39 +1,4 @@ -* Over 2x faster JSON parsing & stringify: - - Size 2580: - parse: 49.333µs vs. 89.792µs - stringify: 31µs vs. 92.375µs - Size 51701: - parse: 494.458µs vs. 1.160125ms - stringify: 427.791µs vs. 1.097625ms - Size 517001: - parse: 4.932875ms vs. 11.911375ms - stringify: 3.925875ms vs. 10.853125ms - Size 5170901: - parse: 56.0855ms vs. 126.783833ms - stringify: 38.671083ms vs. 107.312875ms - Size 51718901: - parse: 731.7205ms vs. 1.285825541s - stringify: 395.82225ms vs. 1.39267225s - Size 517288901: - parse: 6.886183416s vs. 14.985707583s - stringify: 3.957781167s vs. 10.885577917s - -* 7x faster integer and float toString(): - - Benchmark 1: target/release/llrt - Time (mean ± σ): 1.568 s ± 0.016 s [User: 1.555 s, System: 0.007 s] - Range (min … max): 1.558 s … 1.610 s 10 runs - - Benchmark 2: target/release/llrt-next - Time (mean ± σ): 205.1 ms ± 3.1 ms [User: 196.9 ms, System: 2.2 ms] - Range (min … max): 200.0 ms … 213.1 ms 14 runs - - Summary - target/release/llrt-next ran - 7.65 ± 0.14 times faster than target/release/llrt - -* Improved logging: - * LLRT now supports [advanced logging controls](https://aws.amazon.com/blogs/compute/introducing-advanced-logging-controls-for-aws-lambda-functions/) for AWS Lambda - * `requestId` is now captured and outputted with logging - * Console has some performance improvements by reusing String and avoiding allocations +* Use MiMalloc allocator for improved performance. +* Switch libc from gnu to musl +* Bug fixes +* Dependency upgrades \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index eaa9e37c33..da7cd37a23 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -739,9 +739,19 @@ version = "0.2.152" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" +[[package]] +name = "libmimalloc-sys" +version = "0.1.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3979b5c37ece694f1f5e51e7ecc871fdb0f517ed04ee45f88d15d6d553cb9664" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "llrt" -version = "0.1.4-beta" +version = "0.1.5-beta" dependencies = [ "async-trait", "base64-simd", @@ -758,6 +768,7 @@ dependencies = [ "itoa", "jwalk", "libc", + "libmimalloc-sys", "md-5", "nanoid", "once_cell", diff --git a/Cargo.toml b/Cargo.toml index 5648162583..8633bec9ef 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "llrt" -version = "0.1.4-beta" +version = "0.1.5-beta" edition = "2021" [features] @@ -19,6 +19,7 @@ md-5 = { version = "0.10.6", features = ["asm"] } rquickjs = { git = "https://github.com/DelSkayn/rquickjs", rev = "60696e88dfb903d8f5cd81b2667fb3f64f9e9f67", features = [ "full-async", "parallel", + "rust-alloc", ], default-features = false } tokio = { version = "1", features = ["full"] } tracing = { version = "0.1.40", features = ["log"] } @@ -66,10 +67,12 @@ tokio-rustls = { version = "0.25.0", features = [ "ring", ], default-features = false } ring = "0.17.7" +libmimalloc-sys = "0.1.35" [build-dependencies] rquickjs = { git = "https://github.com/DelSkayn/rquickjs", rev = "60696e88dfb903d8f5cd81b2667fb3f64f9e9f67", features = [ "full-async", + "rust-alloc", ], default-features = false } relative-path = "1.9.0" tokio = { version = "1", features = ["full"] } diff --git a/Makefile b/Makefile index 6724879c8a..6867134f7c 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ -TARGET_linux_x86_64 = x86_64-unknown-linux-gnu -TARGET_linux_arm64 = aarch64-unknown-linux-gnu +TARGET_linux_x86_64 = x86_64-unknown-linux-musl +TARGET_linux_arm64 = aarch64-unknown-linux-musl TARGET_darwin_x86_64 = x86_64-apple-darwin TARGET_darwin_arm64 = aarch64-apple-darwin RUST_VERSION = nightly @@ -7,7 +7,10 @@ TOOLCHAIN = +$(RUST_VERSION) BUILD_ARG = $(TOOLCHAIN) build -r BUILD_DIR = ./target/release BUNDLE_DIR = bundle -ZSTD_LIB_ARGS = -j lib-nomt CC="$(CURDIR)/zigcc -s -O3 -flto" AR="zig ar" UNAME=Linux ZSTD_LIB_COMPRESSION=0 ZSTD_LIB_DICTBUILDER=0 +ZSTD_LIB_ARGS = -j lib-nomt UNAME=Linux ZSTD_LIB_COMPRESSION=0 ZSTD_LIB_DICTBUILDER=0 AR="zig ar" +ZSTD_LIB_CC_ARGS = -s -O3 -flto +ZSTD_LIB_CC_arm64 = CC="zig cc -target aarch64-linux-musl $(ZSTD_LIB_CC_ARGS)" +ZSTD_LIB_CC_x64 = CC="zig cc -target aarch64-linux-musl $(ZSTD_LIB_CC_ARGS)" TS_SOURCES = $(wildcard src/js/*.ts) $(wildcard src/js/@llrt/*.ts) $(wildcard tests/*.ts) STD_JS_FILE = $(BUNDLE_DIR)/@llrt/std.js @@ -27,29 +30,8 @@ else ARCH := $(shell uname -m) endif -ifeq ($(DETECTED_OS),darwin) - export AR = $(CURDIR)/zigar - export CC_aarch64_unknown_linux_gnu = $(CURDIR)/zigcc - export CCX_aarch64_unknown_linux_gnu = $(CURDIR)/zigcc - export CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER = $(CURDIR)/zigcc - export CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUSTFLAGS = -Ctarget-feature=+crt-static,+lse -Ctarget-cpu=neoverse-n1 - - export CC_x86_64_unknown_linux_gnu = $(CURDIR)/zigcc - export CXX_x86_64_unknown_linux_gnu = $(CURDIR)/zigcc - export CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER = $(CURDIR)/zigcc -else ifeq ($(DETECTED_OS),linux) - export CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUSTFLAGS = -Ctarget-feature=+crt-static - export CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER = x86_64-linux-gnu-gcc - - export CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUSTFLAGS = -Clink-arg=-Wl,--allow-multiple-definition -Ctarget-feature=+crt-static,+lse -Ctarget-cpu=neoverse-n1 - export CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER = aarch64-linux-gnu-gcc - -endif - CURRENT_TARGET ?= $(TARGET_$(DETECTED_OS)_$(ARCH)) -export COMPILE_TARGET = $(CURRENT_TARGET) - lambda-all: libs $(RELEASE_ZIPS) release-all: | lambda-all llrt-linux-x64.zip llrt-linux-arm64.zip llrt-darwin-x64.zip llrt-darwin-arm64.zip release: llrt-$(DETECTED_OS)-$(ARCH).zip @@ -73,8 +55,8 @@ llrt-linux-arm64.zip: | clean-js js zip -j $@ target/$(TARGET_linux_arm64)/release/llrt define release_template -release-${1}: js - COMPILE_TARGET=$$(TARGET_linux_$$(RELEASE_ARCH_NAME_${1})) cargo $$(BUILD_ARG) --target $$(TARGET_linux_$$(RELEASE_ARCH_NAME_${1})) --features lambda -vv +release-${1}: | clean-js js + cargo $$(BUILD_ARG) --target $$(TARGET_linux_$$(RELEASE_ARCH_NAME_${1})) --features lambda -vv ./pack target/$$(TARGET_linux_$$(RELEASE_ARCH_NAME_${1}))/release/llrt target/$$(TARGET_linux_$$(RELEASE_ARCH_NAME_${1}))/release/bootstrap @rm -rf llrt-lambda-${1}.zip zip -j llrt-lambda-${1}.zip target/$$(TARGET_linux_$$(RELEASE_ARCH_NAME_${1}))/release/bootstrap @@ -132,13 +114,6 @@ run: export _HANDLER = index.handler run: | clean-js js cargo run -r -vv -run-release: export _HANDLER = fixtures/local.handler -run-release: js - cargo build - time target/release/llrt - time target/release/llrt - time target/release/llrt - run-ssr: export AWS_LAMBDA_RUNTIME_API = localhost:3000 run-ssr: export TABLE_NAME=quickjs-table run-ssr: export AWS_REGION = us-east-1 @@ -175,13 +150,13 @@ lib/zstd.h: lib/arm64/libzstd.a: mkdir -p $(dir $@) rm -f zstd/lib/-.o - cd zstd/lib && make clean && COMPILE_TARGET="aarch64-unknown-linux-musl" make $(ZSTD_LIB_ARGS) + cd zstd/lib && make clean && make $(ZSTD_LIB_ARGS) $(ZSTD_LIB_CC_arm64) cp zstd/lib/libzstd.a $@ lib/x64/libzstd.a: mkdir -p $(dir $@) rm -f zstd/lib/-.o - cd zstd/lib && make clean && COMPILE_TARGET="x86_64-unknown-linux-musl" make $(ZSTD_LIB_ARGS) + cd zstd/lib && make clean && make $(ZSTD_LIB_ARGS) $(ZSTD_LIB_CC_x64) cp zstd/lib/libzstd.a $@ bench: diff --git a/README.md b/README.md index 9fdf83768e..d41dc6dce1 100644 --- a/README.md +++ b/README.md @@ -178,6 +178,11 @@ B) Without the JIT overhead, LLRT conserves both CPU and memory resources that c ## Building from source +Clone code and cd to directory + + git clone git@github.com:awslabs/llrt.git --recursive + cd llrt + Install rust curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | bash -s -- -y @@ -189,17 +194,9 @@ Install dependencies brew install zig make zstd node corepack # Ubuntu - sudo apt -y install make zstd gcc \ - libc6-arm64-cross \ - libc6-dev-arm64-cross \ - crossbuild-essential-arm64 + sudo apt -y install make zstd sudo snap install zig --classic --beta -Clone code and cd to directory - - git clone git@github.com:awslabs/llrt.git --recursive - cd llrt - Install Node.js packages corepack enable @@ -209,12 +206,16 @@ Install generate libs and setup rust targets & toolchains make stdlib && make libs -Build release +Build release for Lambda make release-arm64 # or for x86, use make release-x86 +Optionally build for your local machine (Mac or Linux) + + make release + You should now have a `llrt-lambda-arm64.zip` or `llrt-lambda-x86.zip`. You can manually upload this as a Lambda layer or use it via your Infrastructure-as-code pipeline ## Running Lambda emulator diff --git a/example/functions/src/ssr.ts b/example/functions/src/ssr.ts index ff09515b7d..a1dc0275d6 100644 --- a/example/functions/src/ssr.ts +++ b/example/functions/src/ssr.ts @@ -147,7 +147,8 @@ const assetResponse = async (path: string) => { }; export const handler = async (event: any) => { - const { method, path: eventPath } = event?.requestContext?.http || {}; + const { method = "GET", path: eventPath = "/" } = + event?.requestContext?.http || {}; try { const reqSegments: string[] = (eventPath as string) diff --git a/src/allocator.rs b/src/allocator.rs new file mode 100644 index 0000000000..b3b41f1f69 --- /dev/null +++ b/src/allocator.rs @@ -0,0 +1,44 @@ +use libmimalloc_sys as ffi; + +use core::alloc::{GlobalAlloc, Layout}; +use core::ffi::c_void; +use core::sync::atomic::AtomicUsize; +use ffi::*; +use std::sync::atomic::Ordering; + +pub static USED_MEM: AtomicUsize = AtomicUsize::new(0); + +pub struct TrackingMiMalloc; + +unsafe impl GlobalAlloc for TrackingMiMalloc { + #[inline] + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + let size = layout.size(); + USED_MEM.fetch_add(size, Ordering::Relaxed); + mi_malloc_aligned(size, layout.align()) as *mut u8 + } + + #[inline] + unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 { + let size = layout.size(); + USED_MEM.fetch_add(size, Ordering::Relaxed); + mi_zalloc_aligned(size, layout.align()) as *mut u8 + } + + #[inline] + unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { + USED_MEM.fetch_sub(layout.size(), Ordering::Relaxed); + mi_free(ptr as *mut c_void); + } + + #[inline] + unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 { + let size = layout.size(); + if new_size > size { + USED_MEM.fetch_add(new_size - size, Ordering::Relaxed); + } else { + USED_MEM.fetch_sub(size - new_size, Ordering::Relaxed); + } + mi_realloc_aligned(ptr as *mut c_void, new_size, layout.align()) as *mut u8 + } +} diff --git a/src/js/@llrt/runtime.ts b/src/js/@llrt/runtime.ts index 9856e0ed4e..1ef892a95b 100644 --- a/src/js/@llrt/runtime.ts +++ b/src/js/@llrt/runtime.ts @@ -198,6 +198,7 @@ const startProcessEvents = async ( } iterations++; } + global.__gc(); } }; diff --git a/src/main.c b/src/main.c index 1437eb8f6f..81cb1b2628 100644 --- a/src/main.c +++ b/src/main.c @@ -298,8 +298,27 @@ int main(int argc, char *argv[]) char startTimeStr[16]; sprintf(startTimeStr, "%lu", startTime); + char *memorySizeStr = getenv("AWS_LAMBDA_FUNCTION_MEMORY_SIZE"); + int memorySize = memorySizeStr ? atoi(memorySizeStr) : 128; + double memoryFactor = 0.8; + if (memorySize > 512) + { + memoryFactor = 0.9; + } + if (memorySize > 1024) + { + memoryFactor = 0.92; + } + if (memorySize > 2048) + { + memoryFactor = 0.95; + } + + char mimallocReserveMemoryMb[16]; + sprintf(mimallocReserveMemoryMb, "%iMiB", (int)(memorySize * memoryFactor)); + setenv("_START_TIME", startTimeStr, false); - setenv("MIMALLOC_RESERVE_OS_MEMORY", "120m", false); + setenv("MIMALLOC_RESERVE_OS_MEMORY", mimallocReserveMemoryMb, false); setenv("MIMALLOC_LIMIT_OS_ALLOC", "1", false); logInfo("Starting app\n"); diff --git a/src/main.rs b/src/main.rs index 7e68b1099c..d18d7a35af 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,6 +4,7 @@ #[macro_use] mod macros; +mod allocator; mod buffer; mod bytearray_buffer; mod child_process; @@ -32,6 +33,7 @@ mod uuid; mod vm; mod xml; +use allocator::TrackingMiMalloc; use minimal_tracer::MinimalTracer; use rquickjs::{AsyncContext, Module}; use std::{ @@ -57,6 +59,9 @@ use crate::{ const VERSION: &str = env!("CARGO_PKG_VERSION"); +#[global_allocator] +pub static ALLOCATOR: TrackingMiMalloc = TrackingMiMalloc; + #[tokio::main] async fn main() -> Result<(), Box> { MinimalTracer::register()?; diff --git a/src/vm.rs b/src/vm.rs index b57426323c..92ddbcbff5 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -8,7 +8,7 @@ use std::{ path::{Component, Path, PathBuf}, process::exit, result::Result as StdResult, - sync::{Arc, Mutex}, + sync::{atomic::Ordering, Arc, Mutex}, }; use once_cell::sync::Lazy; @@ -33,6 +33,7 @@ use zstd::{bulk::Decompressor, dict::DecoderDictionary}; include!("./bytecode_cache.rs"); use crate::{ + allocator::USED_MEM, buffer::BufferModule, child_process::ChildProcessModule, console, @@ -94,6 +95,14 @@ create_modules!( "uuid" => UuidModule ); +static GC_THRESHOLD: Lazy = Lazy::new(|| { + let gc_threshold_mb: usize = env::var("LLRT_GC_THRESHOLD_MB") + .unwrap_or("20".into()) + .parse() + .unwrap_or(20); + gc_threshold_mb * 1024 * 1024 +}); + struct ModuleInfo { name: &'static str, module: T, @@ -344,6 +353,9 @@ impl Vm { .fill(&mut [0; 8]) .expect("Failed to initialize SystemRandom"); + //inited lazy + let _ = *GC_THRESHOLD > 0; + let mut file_resolver = FileResolver::default(); let mut binary_resolver = BinaryResolver::default(); let mut paths: Vec<&str> = Vec::with_capacity(10); @@ -467,9 +479,24 @@ fn json_parse_string<'js>(ctx: Ctx<'js>, value: Value<'js>) -> Result json_parse(&ctx, bytes) } +fn run_gc(ctx: Ctx<'_>) { + if USED_MEM.load(Ordering::Relaxed) < *GC_THRESHOLD { + return; + } + + trace!("Running GC"); + + unsafe { + let rt = qjs::JS_GetRuntime(ctx.as_raw().as_ptr()); + qjs::JS_RunGC(rt); + }; +} + fn init(ctx: &Ctx<'_>, module_names: HashSet<&'static str>) -> Result<()> { let globals = ctx.globals(); + globals.set("__gc", Func::from(run_gc))?; + let number: Function = globals.get(PredefinedAtom::Number)?; let number_proto: Object = number.get(PredefinedAtom::Prototype)?; number_proto.set(PredefinedAtom::ToString, Func::from(number_to_string))?; diff --git a/zigar b/zigar deleted file mode 100755 index 4a5011d578..0000000000 --- a/zigar +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -zig ar "$@" \ No newline at end of file diff --git a/zigcc b/zigcc index 7b274676b1..e024d48882 100755 --- a/zigcc +++ b/zigcc @@ -1,26 +1,10 @@ #!/bin/bash set -e -if [ -z ${COMPILE_TARGET+x} ]; then - COMPILE_TARGET="${CARGO_CFG_TARGET_ARCH}-${CARGO_CFG_TARGET_VENDOR}-${CARGO_CFG_TARGET_OS}-${CARGO_CFG_TARGET_ENV}" -fi - -CC_TARGET="" - -if [[ $COMPILE_TARGET == "x86_64-unknown-linux-gnu" ]]; then - CC_TARGET="x86_64-linux-gnu" -elif [[ $COMPILE_TARGET == "aarch64-unknown-linux-gnu" ]]; then - CC_TARGET="aarch64-linux-gnu" -elif [[ $COMPILE_TARGET == "x86_64-unknown-linux-musl" ]]; then - CC_TARGET="x86_64-linux-musl" -elif [[ $COMPILE_TARGET == "aarch64-unknown-linux-musl" ]]; then - CC_TARGET="aarch64-linux-musl" -fi - new_array=() for value in "$@" do [[ $value != *self-contained/*crt* ]] && new_array+=($value) done -zig cc -target $CC_TARGET "${new_array[@]}" \ No newline at end of file +zig cc "${new_array[@]}" \ No newline at end of file