From 20dc20fafeca763e0fa9b808200ecd223c1b8b53 Mon Sep 17 00:00:00 2001 From: Robert Bastian Date: Thu, 20 Jan 2022 13:45:41 +0100 Subject: [PATCH 1/4] Datagen key file support and test scaffolding --- .gitignore | 1 + Makefile.toml | 4 +- tools/datagen/README.md | 36 +++++----- tools/datagen/src/bin/datagen.rs | 29 +++++--- tools/datagen/src/main.rs | 68 +++++++++---------- .../datagen/tests/testdata/work_log+keys.txt | 4 ++ tools/scripts/data.toml | 44 ++++++++---- tools/scripts/wasm.toml | 56 +++++++++++++++ 8 files changed, 166 insertions(+), 76 deletions(-) create mode 100644 tools/datagen/tests/testdata/work_log+keys.txt diff --git a/.gitignore b/.gitignore index 799786906a2..3c63338f000 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,7 @@ dhat-heap.json # Do not check-in bincode test data provider/testdata/data/bincode +tools/datagen/tests/testdata/work_log_bincode # Ignore irrelevant files that get generated on macOS **/.DS_Store diff --git a/Makefile.toml b/Makefile.toml index 2f66eb57583..b643b027119 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -48,7 +48,7 @@ dependencies = [ "test-all-features", "test-docs-default", "test-docs", - "testdata-build-bincode-all", + "testdata-build-worklog-bincode", "testdata-check", ] @@ -91,6 +91,8 @@ dependencies = [ # we have to set up the environment for the emscripten job separately # Instead, each of these is called individually. "wasm-release", + "wasm-test-release", + "wasm-compare-worklog-keys", "wasm-cpp-emscripten", ] diff --git a/tools/datagen/README.md b/tools/datagen/README.md index 5337abcc59d..ca9e08dcac6 100644 --- a/tools/datagen/README.md +++ b/tools/datagen/README.md @@ -11,39 +11,39 @@ More details on each tool can be found by running `--help`. ## Examples -Generate ICU4X JSON file tree: +Generate ICU4X Postcard blob (single file) for all keys and all locales: ```bash # Run from the icu4x project folder $ cargo run --bin icu4x-datagen -- \ - --cldr-tag 39.0.0 \ - --all-keys \ - --all-locales \ - --out /tmp/icu4x_data/json + --cldr-tag 39.0.0 \ + --all-keys \ + --all-locales \ + --format blob \ + --out /tmp/icu4x_data/icu4x_data.postcard ``` -Generate ICU4X Postcard blob (single file): +Extract the keys used by an executable into a key file: ```bash # Run from the icu4x project folder -$ cargo run --bin icu4x-datagen -- \ - --cldr-tag 39.0.0 \ - --all-keys \ - --all-locales \ - --format blob \ - --out /tmp/icu4x_data/icu4x_data.postcard +$ cargo build --example work_log --release +$ cargo make icu4x-key-extract \ + target/release/examples/work_log \ + /tmp/icu4x_data/work_log+keys.txt +$ cat /tmp/icu4x_data/work_log+keys.txt ``` -Generate ICU4X Bincode file tree: +Generate ICU4X JSON file tree from the key file for Spanish and German: ```bash # Run from the icu4x project folder $ cargo run --bin icu4x-datagen -- \ - --cldr-tag 39.0.0 \ - --all-keys \ - --all-locales \ - --syntax bincode \ - --out /tmp/icu4x_data/bincode + --cldr-tag 39.0.0 \ + --key-file /tmp/icu4x_data/work_log+keys.txt \ + --locales es \ + --locales de \ + --out /tmp/icu4x_data/work_log_json ``` ## More Information diff --git a/tools/datagen/src/bin/datagen.rs b/tools/datagen/src/bin/datagen.rs index f22653d4616..9d2e3efaec4 100644 --- a/tools/datagen/src/bin/datagen.rs +++ b/tools/datagen/src/bin/datagen.rs @@ -24,7 +24,11 @@ use icu_provider_fs::export::FilesystemExporter; use icu_provider_fs::manifest; use icu_provider_uprops::{EnumeratedPropertyCodePointTrieProvider, PropertiesDataProvider}; use simple_logger::SimpleLogger; +use std::borrow::Cow; use std::collections::HashSet; +use std::fs::File; +use std::io; +use std::io::BufRead; use std::path::PathBuf; use std::str::FromStr; use writeable::Writeable; @@ -240,10 +244,6 @@ fn main() -> eyre::Result<()> { .unwrap() } - if matches.is_present("KEY_FILE") { - eyre::bail!("Key file is not yet supported (see #192)",); - } - if matches.is_present("DRY_RUN") { eyre::bail!("Dry-run is not yet supported"); } @@ -282,9 +282,22 @@ fn main() -> eyre::Result<()> { if matches.is_present("ALL_KEYS") || matches.is_present("KEYS") + || matches.is_present("KEY_FILE") || matches.is_present("TEST_KEYS") { - let keys = matches.values_of("KEYS").map(|values| values.collect()); + let mut keys = matches + .values_of("KEYS") + .map(|keys| keys.map(Cow::Borrowed).collect::>()); + if let Some(key_file_path) = matches.value_of_os("KEY_FILE") { + let keys = keys.get_or_insert_with(Default::default); + let file = File::open(key_file_path) + .with_context(|| key_file_path.to_string_lossy().into_owned())?; + for line in io::BufReader::new(file).lines() { + let line_string = + line.with_context(|| key_file_path.to_string_lossy().into_owned())?; + keys.insert(Cow::Owned(line_string)); + } + } export_cldr(&matches, exporter, locales_vec.as_deref(), keys.as_ref())?; export_set_props(&matches, exporter, keys.as_ref())?; export_map_props(&matches, exporter, keys.as_ref())?; @@ -379,7 +392,7 @@ fn export_cldr( matches: &ArgMatches, exporter: &mut (impl DataExporter + ?Sized), allowed_locales: Option<&[LanguageIdentifier]>, - allowed_keys: Option<&HashSet<&str>>, + allowed_keys: Option<&HashSet>>, ) -> eyre::Result<()> { let locale_subset = matches.value_of("CLDR_LOCALE_SUBSET").unwrap_or("full"); let cldr_paths: Box = if let Some(tag) = matches.value_of("CLDR_TAG") { @@ -434,7 +447,7 @@ fn export_cldr( fn export_set_props( matches: &ArgMatches, exporter: &mut (impl DataExporter + ?Sized), - allowed_keys: Option<&HashSet<&str>>, + allowed_keys: Option<&HashSet>>, ) -> eyre::Result<()> { log::trace!("Loading data for binary properties..."); @@ -482,7 +495,7 @@ fn export_set_props( fn export_map_props( matches: &ArgMatches, exporter: &mut (impl DataExporter + ?Sized), - allowed_keys: Option<&HashSet<&str>>, + allowed_keys: Option<&HashSet>>, ) -> eyre::Result<()> { log::trace!("Loading data for enumerated properties..."); diff --git a/tools/datagen/src/main.rs b/tools/datagen/src/main.rs index 0b70db1c863..79497d59572 100644 --- a/tools/datagen/src/main.rs +++ b/tools/datagen/src/main.rs @@ -13,40 +13,40 @@ //! //! # Examples //! -//! Generate ICU4X JSON file tree: -//! -//!```bash -//!# Run from the icu4x project folder -//!$ cargo run --bin icu4x-datagen -- \ -//! --cldr-tag 39.0.0 \ -//! --all-keys \ -//! --all-locales \ -//! --out /tmp/icu4x_data/json -//!``` -//! -//! Generate ICU4X Postcard blob (single file): -//! -//!```bash -//!# Run from the icu4x project folder -//!$ cargo run --bin icu4x-datagen -- \ -//! --cldr-tag 39.0.0 \ -//! --all-keys \ -//! --all-locales \ -//! --format blob \ -//! --out /tmp/icu4x_data/icu4x_data.postcard -//!``` -//! -//! Generate ICU4X Bincode file tree: -//! -//!```bash -//!# Run from the icu4x project folder -//!$ cargo run --bin icu4x-datagen -- \ -//! --cldr-tag 39.0.0 \ -//! --all-keys \ -//! --all-locales \ -//! --syntax bincode \ -//! --out /tmp/icu4x_data/bincode -//!``` +//! Generate ICU4X Postcard blob (single file) for all keys and all locales: +//! +//! ```bash +//! # Run from the icu4x project folder +//! $ cargo run --bin icu4x-datagen -- \ +//! --cldr-tag 39.0.0 \ +//! --all-keys \ +//! --all-locales \ +//! --format blob \ +//! --out /tmp/icu4x_data/icu4x_data.postcard +//! ``` +//! +//! Extract the keys used by an executable into a key file: +//! +//! ```bash +//! # Run from the icu4x project folder +//! $ cargo build --example work_log --release +//! $ cargo make icu4x-key-extract \ +//! target/release/examples/work_log \ +//! /tmp/icu4x_data/work_log+keys.txt +//! $ cat /tmp/icu4x_data/work_log+keys.txt +//! ``` +//! +//! Generate ICU4X JSON file tree from the key file for Spanish and German: +//! +//! ```bash +//! # Run from the icu4x project folder +//! $ cargo run --bin icu4x-datagen -- \ +//! --cldr-tag 39.0.0 \ +//! --key-file /tmp/icu4x_data/work_log+keys.txt \ +//! --locales es \ +//! --locales de \ +//! --out /tmp/icu4x_data/work_log_json +//! ``` fn main() { panic!("Please run a more specific binary") diff --git a/tools/datagen/tests/testdata/work_log+keys.txt b/tools/datagen/tests/testdata/work_log+keys.txt new file mode 100644 index 00000000000..9f03fd10b11 --- /dev/null +++ b/tools/datagen/tests/testdata/work_log+keys.txt @@ -0,0 +1,4 @@ +datetime/lengths@1 +datetime/skeletons@1 +datetime/symbols@1 +plurals/ordinal@1 diff --git a/tools/scripts/data.toml b/tools/scripts/data.toml index 5605734b84e..f07fe11b55b 100644 --- a/tools/scripts/data.toml +++ b/tools/scripts/data.toml @@ -15,6 +15,18 @@ args = [ "-v", ] +[tasks.icu4x-key-extract] +description = "Extracts ICU4X resource keys used by a binary." +category = "ICU4X Data" +script_runner = "bash" +script = """ + # TODO(#1106): Implement this + echo "datetime/lengths@1" > ${2} + echo "datetime/skeletons@1" >> ${2} + echo "datetime/symbols@1" >> ${2} + echo "plurals/ordinal@1" >> ${2} +""" + [tasks.testdata-build-json] description = "Build ICU4X JSON from the downloaded CLDR JSON, overwriting the existing ICU4X JSON." category = "ICU4X Data" @@ -48,21 +60,6 @@ args = [ "--overwrite", ] -[tasks.testdata-build-hello-world] -description = "Build the Hello World postcard testdata file." -category = "ICU4X Data" -command = "cargo" -args = [ - "run", - "--bin=icu4x-datagen", - "--", - "--format=blob", - "--hello-world-key", - "--all-locales", - "--out=provider/blob/tests/data/hello_world.postcard", - "--overwrite", -] - [tasks.testdata-build-blob-smaller] description = "Build a small subset of ICU4X data as a Postcard blob." category = "ICU4X Data" @@ -142,3 +139,20 @@ args = [ "--syntax=bincode", "--overwrite", ] + +[tasks.testdata-build-worklog-bincode] +description = "Build Bincode files for the work_log example" +category = "ICU4X Data" +command = "cargo" +args = [ + "run", + "--bin=icu4x-datagen", + "--", + "--input-from-testdata", + "--out=tools/datagen/tests/testdata/work_log_bincode", + "--key-file=tools/datagen/tests/testdata/work_log+keys.txt", + "--locales=de", + "--locales=es", + "--syntax=bincode", + "--overwrite", +] diff --git a/tools/scripts/wasm.toml b/tools/scripts/wasm.toml index f1717a37bef..ad4a6afd4c2 100644 --- a/tools/scripts/wasm.toml +++ b/tools/scripts/wasm.toml @@ -334,6 +334,62 @@ end ''' dependencies = ["wasm-wasm-examples"] +[tasks.wasm-key-extract-examples] +description = "Run icu4x-key-extract on WASM files" +category = "ICU4X WASM" +script_runner = "@duckscript" +script = ''' +exit_on_error true + +mkdir wasmpkg/keys + +handle = glob_array wasmpkg/*.wasm +for src_path in ${handle} + path_no_extension = substring ${src_path} -5 + basename = substring ${path_no_extension} 8 + out_path = concat wasmpkg/keys/ ${basename} "+keys.txt" + + out_exists = is_path_exists ${out_path} + up_to_date = set false + if ${out_exists} + src_time = get_last_modified_time ${src_path} + out_time = get_last_modified_time ${out_path} + up_to_date = less_than ${src_time} ${out_time} + end + + if not ${up_to_date} + echo Writing ${out_path} + exec --fail-on-error cargo make icu4x-key-extract ${src_path} ${out_path} + end +end +''' +dependencies = ["wasm-wasm-examples"] + +[tasks.wasm-compare-worklog-keys] +description = "Compare the generated worklog key file with the golden version" +category = "ICU4X WASM" +script_runner = "@duckscript" +script = ''' +exit_on_error true + +expected = readfile tools/datagen/tests/testdata/work_log+keys.txt +actual = readfile wasmpkg/keys/work_log+keys.txt +are_equal = eq ${expected} ${actual} + +if ${are_equal} + exit 0 +else + echo "*****" + echo "work_log+keys.txt do not match! Actual generated output:" + echo "" + echo ${actual} + echo "If this is expected, copy the above output into tools/datagen/tests/testdata/work_log+keys.txt" + echo "*****" + exit 1 +end +''' +dependencies = ["wasm-key-extract-examples"] + [tasks.wasm-dev] description = "All-in-one command to build dev-mode WASM FFI to wasmpkg" category = "ICU4X WASM" From c8d21601d18cab7f1b01424366eb5775dd41cea5 Mon Sep 17 00:00:00 2001 From: Robert Bastian Date: Thu, 20 Jan 2022 21:06:06 +0100 Subject: [PATCH 2/4] comments --- tools/datagen/README.md | 1 + tools/datagen/src/main.rs | 1 + tools/scripts/data.toml | 15 +++++++++++++++ 3 files changed, 17 insertions(+) diff --git a/tools/datagen/README.md b/tools/datagen/README.md index ca9e08dcac6..faf4a03c82a 100644 --- a/tools/datagen/README.md +++ b/tools/datagen/README.md @@ -6,6 +6,7 @@ The tools include: 1. `icu4x-datagen`: Read source data (CLDR JSON) and dump ICU4X-format data. 2. `icu4x-testdata-download`: Download fresh CLDR JSON for testdata. +3. `icu4x-key-extract`: Extract `ResourceKey` objects present in a compiled executable. More details on each tool can be found by running `--help`. diff --git a/tools/datagen/src/main.rs b/tools/datagen/src/main.rs index 79497d59572..b891e07f76a 100644 --- a/tools/datagen/src/main.rs +++ b/tools/datagen/src/main.rs @@ -8,6 +8,7 @@ //! //! 1. `icu4x-datagen`: Read source data (CLDR JSON) and dump ICU4X-format data. //! 2. `icu4x-testdata-download`: Download fresh CLDR JSON for testdata. +//! 3. `icu4x-key-extract`: Extract `ResourceKey` objects present in a compiled executable. //! //! More details on each tool can be found by running `--help`. //! diff --git a/tools/scripts/data.toml b/tools/scripts/data.toml index f07fe11b55b..7eb86d87ca2 100644 --- a/tools/scripts/data.toml +++ b/tools/scripts/data.toml @@ -60,6 +60,21 @@ args = [ "--overwrite", ] +[tasks.testdata-build-hello-world] +description = "Build the Hello World postcard testdata file." +category = "ICU4X Data" +command = "cargo" +args = [ + "run", + "--bin=icu4x-datagen", + "--", + "--format=blob", + "--hello-world-key", + "--all-locales", + "--out=provider/blob/tests/data/hello_world.postcard", + "--overwrite", +] + [tasks.testdata-build-blob-smaller] description = "Build a small subset of ICU4X data as a Postcard blob." category = "ICU4X Data" From 61e7077b3068b6494b7390d114b89b374de873a0 Mon Sep 17 00:00:00 2001 From: Robert Bastian Date: Thu, 20 Jan 2022 21:28:11 +0100 Subject: [PATCH 3/4] CI --- .github/workflows/build-test.yml | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index 414e1a3fbe9..dfbf13e8052 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -231,6 +231,10 @@ jobs: steps: - uses: actions/checkout@v2 + - name: Install Node.js v14.17.0 + uses: actions/setup-node@v1 + with: + node-version: 14.17.0 - name: Load nightly Rust toolchain for WASM. run: | rustup install nightly-2021-12-22 @@ -320,15 +324,16 @@ jobs: with: command: make args: wasm-release - - name: Install Node.js v14.17.0 - uses: actions/setup-node@v1 - with: - node-version: 14.17.0 - - name: Build + - name: Test uses: actions-rs/cargo@v1.0.1 with: command: make args: wasm-test-release + - name: Build Examples and test icu4x-key-extract + uses: actions-rs/cargo@v1.0.1 + with: + command: make + args: wasm-compare-worklog-keys # This has to be a separate test since the emscripten sdk # will otherwise interfere with other node-using tests - name: Run emscripten test @@ -453,7 +458,7 @@ jobs: with: token: ${{ secrets.GITHUB_TOKEN }} # keep args in sync with `clippy-all` in .cargo/config.toml - args: --all-targets --all-features -- -D warnings + args: --all-targets --all-features -- -D warnings -Aclippy::unknown-clippy-lints -Aclippy::field-reassign-with-default # Benchmarking & dashboards job @@ -759,10 +764,8 @@ jobs: rustup component add rust-src rustup toolchain list rustup toolchain install nightly-2021-12-22 - cd ffi/diplomat/wasm - npm ci - echo "$HOME/work/icu4x/icu4x/ffi/diplomat/wasm/node_modules/wasm-opt/bin" >> $GITHUB_PATH - echo "$HOME/work/icu4x/icu4x/ffi/diplomat/wasm/node_modules/wabt/bin" >> $GITHUB_PATH + sudo npm install -g wasm-opt --unsafe-perm + sudo npm install -g wabt - name: Setup output data directory run: | From c0c4b9bc04c8b9a19e3696ffbd2385b7c24b4760 Mon Sep 17 00:00:00 2001 From: Robert Bastian Date: Fri, 21 Jan 2022 11:55:47 +0100 Subject: [PATCH 4/4] actions --- .github/workflows/build-test.yml | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index dfbf13e8052..298860565a4 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -231,10 +231,6 @@ jobs: steps: - uses: actions/checkout@v2 - - name: Install Node.js v14.17.0 - uses: actions/setup-node@v1 - with: - node-version: 14.17.0 - name: Load nightly Rust toolchain for WASM. run: | rustup install nightly-2021-12-22 @@ -324,7 +320,11 @@ jobs: with: command: make args: wasm-release - - name: Test + - name: Install Node.js v14.17.0 + uses: actions/setup-node@v1 + with: + node-version: 14.17.0 + - name: Build uses: actions-rs/cargo@v1.0.1 with: command: make @@ -458,7 +458,7 @@ jobs: with: token: ${{ secrets.GITHUB_TOKEN }} # keep args in sync with `clippy-all` in .cargo/config.toml - args: --all-targets --all-features -- -D warnings -Aclippy::unknown-clippy-lints -Aclippy::field-reassign-with-default + args: --all-targets --all-features -- -D warnings # Benchmarking & dashboards job @@ -764,8 +764,10 @@ jobs: rustup component add rust-src rustup toolchain list rustup toolchain install nightly-2021-12-22 - sudo npm install -g wasm-opt --unsafe-perm - sudo npm install -g wabt + cd ffi/diplomat/wasm + npm ci + echo "$HOME/work/icu4x/icu4x/ffi/diplomat/wasm/node_modules/wasm-opt/bin" >> $GITHUB_PATH + echo "$HOME/work/icu4x/icu4x/ffi/diplomat/wasm/node_modules/wabt/bin" >> $GITHUB_PATH - name: Setup output data directory run: |