diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index 414e1a3fbe9..298860565a4 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -329,6 +329,11 @@ jobs: with: command: make args: wasm-test-release + - name: Build Examples and test icu4x-key-extract + uses: actions-rs/cargo@v1.0.1 + with: + command: make + args: wasm-compare-worklog-keys # This has to be a separate test since the emscripten sdk # will otherwise interfere with other node-using tests - name: Run emscripten test diff --git a/.gitignore b/.gitignore index 799786906a2..3c63338f000 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,7 @@ dhat-heap.json # Do not check-in bincode test data provider/testdata/data/bincode +tools/datagen/tests/testdata/work_log_bincode # Ignore irrelevant files that get generated on macOS **/.DS_Store diff --git a/Makefile.toml b/Makefile.toml index 2f66eb57583..b643b027119 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -48,7 +48,7 @@ dependencies = [ "test-all-features", "test-docs-default", "test-docs", - "testdata-build-bincode-all", + "testdata-build-worklog-bincode", "testdata-check", ] @@ -91,6 +91,8 @@ dependencies = [ # we have to set up the environment for the emscripten job separately # Instead, each of these is called individually. "wasm-release", + "wasm-test-release", + "wasm-compare-worklog-keys", "wasm-cpp-emscripten", ] diff --git a/tools/datagen/README.md b/tools/datagen/README.md index 5337abcc59d..faf4a03c82a 100644 --- a/tools/datagen/README.md +++ b/tools/datagen/README.md @@ -6,44 +6,45 @@ The tools include: 1. `icu4x-datagen`: Read source data (CLDR JSON) and dump ICU4X-format data. 2. `icu4x-testdata-download`: Download fresh CLDR JSON for testdata. +3. `icu4x-key-extract`: Extract `ResourceKey` objects present in a compiled executable. More details on each tool can be found by running `--help`. ## Examples -Generate ICU4X JSON file tree: +Generate ICU4X Postcard blob (single file) for all keys and all locales: ```bash # Run from the icu4x project folder $ cargo run --bin icu4x-datagen -- \ - --cldr-tag 39.0.0 \ - --all-keys \ - --all-locales \ - --out /tmp/icu4x_data/json + --cldr-tag 39.0.0 \ + --all-keys \ + --all-locales \ + --format blob \ + --out /tmp/icu4x_data/icu4x_data.postcard ``` -Generate ICU4X Postcard blob (single file): +Extract the keys used by an executable into a key file: ```bash # Run from the icu4x project folder -$ cargo run --bin icu4x-datagen -- \ - --cldr-tag 39.0.0 \ - --all-keys \ - --all-locales \ - --format blob \ - --out /tmp/icu4x_data/icu4x_data.postcard +$ cargo build --example work_log --release +$ cargo make icu4x-key-extract \ + target/release/examples/work_log \ + /tmp/icu4x_data/work_log+keys.txt +$ cat /tmp/icu4x_data/work_log+keys.txt ``` -Generate ICU4X Bincode file tree: +Generate ICU4X JSON file tree from the key file for Spanish and German: ```bash # Run from the icu4x project folder $ cargo run --bin icu4x-datagen -- \ - --cldr-tag 39.0.0 \ - --all-keys \ - --all-locales \ - --syntax bincode \ - --out /tmp/icu4x_data/bincode + --cldr-tag 39.0.0 \ + --key-file /tmp/icu4x_data/work_log+keys.txt \ + --locales es \ + --locales de \ + --out /tmp/icu4x_data/work_log_json ``` ## More Information diff --git a/tools/datagen/src/bin/datagen.rs b/tools/datagen/src/bin/datagen.rs index f22653d4616..9d2e3efaec4 100644 --- a/tools/datagen/src/bin/datagen.rs +++ b/tools/datagen/src/bin/datagen.rs @@ -24,7 +24,11 @@ use icu_provider_fs::export::FilesystemExporter; use icu_provider_fs::manifest; use icu_provider_uprops::{EnumeratedPropertyCodePointTrieProvider, PropertiesDataProvider}; use simple_logger::SimpleLogger; +use std::borrow::Cow; use std::collections::HashSet; +use std::fs::File; +use std::io; +use std::io::BufRead; use std::path::PathBuf; use std::str::FromStr; use writeable::Writeable; @@ -240,10 +244,6 @@ fn main() -> eyre::Result<()> { .unwrap() } - if matches.is_present("KEY_FILE") { - eyre::bail!("Key file is not yet supported (see #192)",); - } - if matches.is_present("DRY_RUN") { eyre::bail!("Dry-run is not yet supported"); } @@ -282,9 +282,22 @@ fn main() -> eyre::Result<()> { if matches.is_present("ALL_KEYS") || matches.is_present("KEYS") + || matches.is_present("KEY_FILE") || matches.is_present("TEST_KEYS") { - let keys = matches.values_of("KEYS").map(|values| values.collect()); + let mut keys = matches + .values_of("KEYS") + .map(|keys| keys.map(Cow::Borrowed).collect::>()); + if let Some(key_file_path) = matches.value_of_os("KEY_FILE") { + let keys = keys.get_or_insert_with(Default::default); + let file = File::open(key_file_path) + .with_context(|| key_file_path.to_string_lossy().into_owned())?; + for line in io::BufReader::new(file).lines() { + let line_string = + line.with_context(|| key_file_path.to_string_lossy().into_owned())?; + keys.insert(Cow::Owned(line_string)); + } + } export_cldr(&matches, exporter, locales_vec.as_deref(), keys.as_ref())?; export_set_props(&matches, exporter, keys.as_ref())?; export_map_props(&matches, exporter, keys.as_ref())?; @@ -379,7 +392,7 @@ fn export_cldr( matches: &ArgMatches, exporter: &mut (impl DataExporter + ?Sized), allowed_locales: Option<&[LanguageIdentifier]>, - allowed_keys: Option<&HashSet<&str>>, + allowed_keys: Option<&HashSet>>, ) -> eyre::Result<()> { let locale_subset = matches.value_of("CLDR_LOCALE_SUBSET").unwrap_or("full"); let cldr_paths: Box = if let Some(tag) = matches.value_of("CLDR_TAG") { @@ -434,7 +447,7 @@ fn export_cldr( fn export_set_props( matches: &ArgMatches, exporter: &mut (impl DataExporter + ?Sized), - allowed_keys: Option<&HashSet<&str>>, + allowed_keys: Option<&HashSet>>, ) -> eyre::Result<()> { log::trace!("Loading data for binary properties..."); @@ -482,7 +495,7 @@ fn export_set_props( fn export_map_props( matches: &ArgMatches, exporter: &mut (impl DataExporter + ?Sized), - allowed_keys: Option<&HashSet<&str>>, + allowed_keys: Option<&HashSet>>, ) -> eyre::Result<()> { log::trace!("Loading data for enumerated properties..."); diff --git a/tools/datagen/src/main.rs b/tools/datagen/src/main.rs index 0b70db1c863..b891e07f76a 100644 --- a/tools/datagen/src/main.rs +++ b/tools/datagen/src/main.rs @@ -8,45 +8,46 @@ //! //! 1. `icu4x-datagen`: Read source data (CLDR JSON) and dump ICU4X-format data. //! 2. `icu4x-testdata-download`: Download fresh CLDR JSON for testdata. +//! 3. `icu4x-key-extract`: Extract `ResourceKey` objects present in a compiled executable. //! //! More details on each tool can be found by running `--help`. //! //! # Examples //! -//! Generate ICU4X JSON file tree: -//! -//!```bash -//!# Run from the icu4x project folder -//!$ cargo run --bin icu4x-datagen -- \ -//! --cldr-tag 39.0.0 \ -//! --all-keys \ -//! --all-locales \ -//! --out /tmp/icu4x_data/json -//!``` -//! -//! Generate ICU4X Postcard blob (single file): -//! -//!```bash -//!# Run from the icu4x project folder -//!$ cargo run --bin icu4x-datagen -- \ -//! --cldr-tag 39.0.0 \ -//! --all-keys \ -//! --all-locales \ -//! --format blob \ -//! --out /tmp/icu4x_data/icu4x_data.postcard -//!``` -//! -//! Generate ICU4X Bincode file tree: -//! -//!```bash -//!# Run from the icu4x project folder -//!$ cargo run --bin icu4x-datagen -- \ -//! --cldr-tag 39.0.0 \ -//! --all-keys \ -//! --all-locales \ -//! --syntax bincode \ -//! --out /tmp/icu4x_data/bincode -//!``` +//! Generate ICU4X Postcard blob (single file) for all keys and all locales: +//! +//! ```bash +//! # Run from the icu4x project folder +//! $ cargo run --bin icu4x-datagen -- \ +//! --cldr-tag 39.0.0 \ +//! --all-keys \ +//! --all-locales \ +//! --format blob \ +//! --out /tmp/icu4x_data/icu4x_data.postcard +//! ``` +//! +//! Extract the keys used by an executable into a key file: +//! +//! ```bash +//! # Run from the icu4x project folder +//! $ cargo build --example work_log --release +//! $ cargo make icu4x-key-extract \ +//! target/release/examples/work_log \ +//! /tmp/icu4x_data/work_log+keys.txt +//! $ cat /tmp/icu4x_data/work_log+keys.txt +//! ``` +//! +//! Generate ICU4X JSON file tree from the key file for Spanish and German: +//! +//! ```bash +//! # Run from the icu4x project folder +//! $ cargo run --bin icu4x-datagen -- \ +//! --cldr-tag 39.0.0 \ +//! --key-file /tmp/icu4x_data/work_log+keys.txt \ +//! --locales es \ +//! --locales de \ +//! --out /tmp/icu4x_data/work_log_json +//! ``` fn main() { panic!("Please run a more specific binary") diff --git a/tools/datagen/tests/testdata/work_log+keys.txt b/tools/datagen/tests/testdata/work_log+keys.txt new file mode 100644 index 00000000000..9f03fd10b11 --- /dev/null +++ b/tools/datagen/tests/testdata/work_log+keys.txt @@ -0,0 +1,4 @@ +datetime/lengths@1 +datetime/skeletons@1 +datetime/symbols@1 +plurals/ordinal@1 diff --git a/tools/scripts/data.toml b/tools/scripts/data.toml index 5605734b84e..7eb86d87ca2 100644 --- a/tools/scripts/data.toml +++ b/tools/scripts/data.toml @@ -15,6 +15,18 @@ args = [ "-v", ] +[tasks.icu4x-key-extract] +description = "Extracts ICU4X resource keys used by a binary." +category = "ICU4X Data" +script_runner = "bash" +script = """ + # TODO(#1106): Implement this + echo "datetime/lengths@1" > ${2} + echo "datetime/skeletons@1" >> ${2} + echo "datetime/symbols@1" >> ${2} + echo "plurals/ordinal@1" >> ${2} +""" + [tasks.testdata-build-json] description = "Build ICU4X JSON from the downloaded CLDR JSON, overwriting the existing ICU4X JSON." category = "ICU4X Data" @@ -142,3 +154,20 @@ args = [ "--syntax=bincode", "--overwrite", ] + +[tasks.testdata-build-worklog-bincode] +description = "Build Bincode files for the work_log example" +category = "ICU4X Data" +command = "cargo" +args = [ + "run", + "--bin=icu4x-datagen", + "--", + "--input-from-testdata", + "--out=tools/datagen/tests/testdata/work_log_bincode", + "--key-file=tools/datagen/tests/testdata/work_log+keys.txt", + "--locales=de", + "--locales=es", + "--syntax=bincode", + "--overwrite", +] diff --git a/tools/scripts/wasm.toml b/tools/scripts/wasm.toml index f1717a37bef..ad4a6afd4c2 100644 --- a/tools/scripts/wasm.toml +++ b/tools/scripts/wasm.toml @@ -334,6 +334,62 @@ end ''' dependencies = ["wasm-wasm-examples"] +[tasks.wasm-key-extract-examples] +description = "Run icu4x-key-extract on WASM files" +category = "ICU4X WASM" +script_runner = "@duckscript" +script = ''' +exit_on_error true + +mkdir wasmpkg/keys + +handle = glob_array wasmpkg/*.wasm +for src_path in ${handle} + path_no_extension = substring ${src_path} -5 + basename = substring ${path_no_extension} 8 + out_path = concat wasmpkg/keys/ ${basename} "+keys.txt" + + out_exists = is_path_exists ${out_path} + up_to_date = set false + if ${out_exists} + src_time = get_last_modified_time ${src_path} + out_time = get_last_modified_time ${out_path} + up_to_date = less_than ${src_time} ${out_time} + end + + if not ${up_to_date} + echo Writing ${out_path} + exec --fail-on-error cargo make icu4x-key-extract ${src_path} ${out_path} + end +end +''' +dependencies = ["wasm-wasm-examples"] + +[tasks.wasm-compare-worklog-keys] +description = "Compare the generated worklog key file with the golden version" +category = "ICU4X WASM" +script_runner = "@duckscript" +script = ''' +exit_on_error true + +expected = readfile tools/datagen/tests/testdata/work_log+keys.txt +actual = readfile wasmpkg/keys/work_log+keys.txt +are_equal = eq ${expected} ${actual} + +if ${are_equal} + exit 0 +else + echo "*****" + echo "work_log+keys.txt do not match! Actual generated output:" + echo "" + echo ${actual} + echo "If this is expected, copy the above output into tools/datagen/tests/testdata/work_log+keys.txt" + echo "*****" + exit 1 +end +''' +dependencies = ["wasm-key-extract-examples"] + [tasks.wasm-dev] description = "All-in-one command to build dev-mode WASM FFI to wasmpkg" category = "ICU4X WASM"