diff --git a/provider/cldr/src/transform/decimal/mod.rs b/provider/cldr/src/transform/decimal/mod.rs index 9d02543c433..15fefa4cf2e 100644 --- a/provider/cldr/src/transform/decimal/mod.rs +++ b/provider/cldr/src/transform/decimal/mod.rs @@ -62,10 +62,7 @@ impl TryFrom<&dyn CldrPaths> for NumbersProvider { impl KeyedDataProvider for NumbersProvider { fn supports_key(resc_key: &ResourceKey) -> Result<(), DataError> { - if resc_key.category != ResourceCategory::Decimal || resc_key.version != 1 { - return Err(DataErrorKind::MissingResourceKey.with_key(*resc_key)); - } - Ok(()) + resc_key.match_key(key::SYMBOLS_V1) } } diff --git a/provider/cldr/src/transform/plurals/mod.rs b/provider/cldr/src/transform/plurals/mod.rs index 97a99267d64..6fccd9a9c3c 100644 --- a/provider/cldr/src/transform/plurals/mod.rs +++ b/provider/cldr/src/transform/plurals/mod.rs @@ -56,10 +56,12 @@ impl TryFrom<&dyn CldrPaths> for PluralsProvider { impl KeyedDataProvider for PluralsProvider { fn supports_key(resc_key: &ResourceKey) -> Result<(), DataError> { - if resc_key.category != ResourceCategory::Plurals || resc_key.version != 1 { - return Err(DataErrorKind::MissingResourceKey.with_key(*resc_key)); + // TODO(#442): Clean up KeyedDataProvider + match *resc_key { + key::CARDINAL_V1 => Ok(()), + key::ORDINAL_V1 => Ok(()), + _ => Err(DataErrorKind::MissingResourceKey.with_key(*resc_key)), } - Ok(()) } } diff --git a/provider/cldr/src/transform/time_zones/mod.rs b/provider/cldr/src/transform/time_zones/mod.rs index a7dbb6004a6..c12e1ecf596 100644 --- a/provider/cldr/src/transform/time_zones/mod.rs +++ b/provider/cldr/src/transform/time_zones/mod.rs @@ -67,10 +67,16 @@ impl TryFrom<&str> for TimeZonesProvider { impl KeyedDataProvider for TimeZonesProvider { fn supports_key(resc_key: &ResourceKey) -> Result<(), DataError> { - if resc_key.category != ResourceCategory::TimeZone || resc_key.version != 1 { - return Err(DataErrorKind::MissingResourceKey.with_key(*resc_key)); + // TODO(#442): Clean up KeyedDataProvider + match *resc_key { + key::TIMEZONE_FORMATS_V1 => Ok(()), + key::TIMEZONE_EXEMPLAR_CITIES_V1 => Ok(()), + key::TIMEZONE_GENERIC_NAMES_LONG_V1 => Ok(()), + key::TIMEZONE_GENERIC_NAMES_SHORT_V1 => Ok(()), + key::TIMEZONE_SPECIFIC_NAMES_LONG_V1 => Ok(()), + key::TIMEZONE_SPECIFIC_NAMES_SHORT_V1 => Ok(()), + _ => Err(DataErrorKind::MissingResourceKey.with_key(*resc_key)), } - Ok(()) } } diff --git a/provider/core/src/data_provider.rs b/provider/core/src/data_provider.rs index 3ccf908c0fc..f57f6d866df 100644 --- a/provider/core/src/data_provider.rs +++ b/provider/core/src/data_provider.rs @@ -58,7 +58,7 @@ impl DataRequest { /// ``` /// use icu_provider::prelude::*; /// - /// const FOO_BAR: ResourceKey = icu_provider::resource_key!(x, "foo", "bar", 1); + /// const FOO_BAR: ResourceKey = icu_provider::resource_key!("foo/bar@1"); /// /// let req_no_langid = DataRequest { /// resource_path: ResourcePath { diff --git a/provider/core/src/data_provider/test.rs b/provider/core/src/data_provider/test.rs index 8c59b0ef893..1c467801ffd 100644 --- a/provider/core/src/data_provider/test.rs +++ b/provider/core/src/data_provider/test.rs @@ -16,7 +16,7 @@ use crate::yoke; // JSON string. It also exercises most of the data provider code paths. /// Key for HelloAlt, used for testing mismatched types -const HELLO_ALT_KEY: ResourceKey = crate::resource_key!(Core, "helloalt", 1); +const HELLO_ALT_KEY: ResourceKey = crate::resource_key!("core/helloalt@1"); /// A data struct serialization-compatible with HelloWorldV1 used for testing mismatched types #[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq, Yokeable, ZeroCopyFrom)] diff --git a/provider/core/src/dynutil.rs b/provider/core/src/dynutil.rs index 5be8f777d6f..56e2a023ad2 100644 --- a/provider/core/src/dynutil.rs +++ b/provider/core/src/dynutil.rs @@ -65,7 +65,7 @@ where /// use icu_provider::prelude::*; /// use icu_provider::marker::CowStrMarker; /// use std::borrow::Cow; -/// const DEMO_KEY: ResourceKey = icu_provider::resource_key!(x, "foo", "bar", 1); +/// const DEMO_KEY: ResourceKey = icu_provider::resource_key!("foo/bar@1"); /// /// // A small DataProvider that returns owned strings /// struct MyProvider(pub String); diff --git a/provider/core/src/hello_world.rs b/provider/core/src/hello_world.rs index 79b04f9f296..9274222b0eb 100644 --- a/provider/core/src/hello_world.rs +++ b/provider/core/src/hello_world.rs @@ -22,8 +22,7 @@ use litemap::LiteMap; pub mod key { use crate::resource::ResourceKey; - use crate::resource_key; - pub const HELLO_WORLD_V1: ResourceKey = resource_key!(Core, "helloworld", 1); + pub const HELLO_WORLD_V1: ResourceKey = crate::resource_key!("core/helloworld@1"); } /// A struct containing "Hello World" in the requested language. diff --git a/provider/core/src/helpers.rs b/provider/core/src/helpers.rs index 9cd9b0539b5..bf545546af9 100644 --- a/provider/core/src/helpers.rs +++ b/provider/core/src/helpers.rs @@ -92,7 +92,6 @@ fn test_escape_for_json() { /// 4. FxHash is designed to output 32-bit or 64-bit values, whereas SHA outputs more bits, /// such that truncation would be required in order to fit into a u32, partially reducing /// the benefit of a cryptographically secure algorithm -#[allow(dead_code)] pub const fn fxhash_32(bytes: &[u8]) -> u32 { // This code is adapted from https://github.com/rust-lang/rustc-hash, // whose license text is reproduced below. diff --git a/provider/core/src/lib.rs b/provider/core/src/lib.rs index c4b1ceb0e77..1bad8ee4cf0 100644 --- a/provider/core/src/lib.rs +++ b/provider/core/src/lib.rs @@ -146,7 +146,6 @@ pub mod prelude { pub use crate::error::DataError; pub use crate::error::DataErrorKind; pub use crate::marker::DataMarker; - pub use crate::resource::ResourceCategory; pub use crate::resource::ResourceKey; pub use crate::resource::ResourceOptions; pub use crate::resource::ResourcePath; @@ -164,11 +163,3 @@ pub use yoke; // Also include the same symbols at the top level for selective inclusion pub use prelude::*; - -pub mod internal { - //! Macro dependencies; not intended to be used directly. - /// Re-export tinystr16 for macro resource_key!() - pub use tinystr::tinystr16; - /// Re-export tinystr4 for macro resource_key!() - pub use tinystr::tinystr4; -} diff --git a/provider/core/src/resource.rs b/provider/core/src/resource.rs index c37efa2bf42..fd0a9dc4982 100644 --- a/provider/core/src/resource.rs +++ b/provider/core/src/resource.rs @@ -5,128 +5,209 @@ //! Resource paths and related types. use alloc::borrow::Cow; -use alloc::format; -use alloc::string::String; use alloc::string::ToString; use crate::error::{DataError, DataErrorKind}; -use core::borrow::Borrow; +use crate::helpers; use core::default::Default; use core::fmt; use core::fmt::Write; use icu_locid::LanguageIdentifier; -use tinystr::{TinyStr16, TinyStr4}; use writeable::{LengthHint, Writeable}; -/// A top-level collection of related resource keys. -#[non_exhaustive] -#[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Debug)] -pub enum ResourceCategory { - Core, - Calendar, - DateTime, - Decimal, - LocaleCanonicalizer, - Plurals, - TimeZone, - Properties, - ListFormatter, - Segmenter, - PrivateUse(TinyStr4), -} +/// A compact hash of a [`ResourceKey`]. Useful for keys in maps. +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash)] +#[repr(transparent)] +pub struct ResourceKeyHash([u8; 4]); -impl ResourceCategory { - /// Gets or builds a string form of this [`ResourceCategory`]. - pub fn as_str(&self) -> Cow<'static, str> { - match self { - Self::Core => Cow::Borrowed("core"), - Self::Calendar => Cow::Borrowed("calendar"), - Self::DateTime => Cow::Borrowed("datetime"), - Self::Decimal => Cow::Borrowed("decimal"), - Self::LocaleCanonicalizer => Cow::Borrowed("locale_canonicalizer"), - Self::Plurals => Cow::Borrowed("plurals"), - Self::TimeZone => Cow::Borrowed("time_zone"), - Self::Properties => Cow::Borrowed("props"), - Self::ListFormatter => Cow::Borrowed("list_formatter"), - Self::Segmenter => Cow::Borrowed("segmenter"), - Self::PrivateUse(id) => { - let mut result = String::from("x-"); - result.push_str(id.as_str()); - Cow::Owned(result) - } - } +impl ResourceKeyHash { + const fn compute_from_str(path: &str) -> Self { + Self(helpers::fxhash_32(path.as_bytes()).to_le_bytes()) } } -impl fmt::Display for ResourceCategory { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.write_str(&self.as_str()) - } +/// The resource key used for loading data from an ICU4X data provider. +/// +/// A resource key is tightly coupled with the code that uses it to load data at runtime. +/// Executables can be searched for ResourceKey instances to produce optimized data files. +/// Therefore, users should not generally create ResourceKey instances; they should instead use +/// the ones exported by a component. +#[derive(PartialEq, Eq, Copy, Clone)] +pub struct ResourceKey { + path: &'static str, + hash: ResourceKeyHash, } -impl writeable::Writeable for ResourceCategory { - fn write_to(&self, sink: &mut W) -> core::fmt::Result { - sink.write_str(&self.as_str()) +impl ResourceKey { + /// Gets a human-readable representation of a [`ResourceKey`]. + /// + /// The human-readable path string always contains at least one '/', and it ends with '@' + /// followed by one or more digits. Paths do not contain characters other than ASCII, + /// '_', '/', '=', and '@'. + /// + /// Useful for reading and writing data to a file system. + #[inline] + pub const fn get_path(&self) -> &str { + self.path } - fn write_len(&self) -> writeable::LengthHint { - writeable::LengthHint::exact(self.as_str().len()) + /// Gets a machine-readable representation of a [`ResourceKey`]. + /// + /// The machine-readable hash is 4 bytes and can be used as the key in a map. + /// + /// The hash is a 32-bit FxHash of the path, computed as if on a little-endian platform. + #[inline] + pub const fn get_hash(&self) -> ResourceKeyHash { + self.hash + } + + /// Creates a new ResourceKey from a path, returning an error if the path is invalid. + /// + /// It is intended that `ResourceKey` objects are const-constructed. To force construction + /// into a const context, use [`resource_key!()`]. Doing so ensures that compile-time key + /// extraction functions as expected. + /// + /// # Example + /// + /// ``` + /// use icu_provider::prelude::*; + /// + /// // Const constructed (preferred): + /// const k1: ResourceKey = icu_provider::resource_key!("foo/bar@1"); + /// + /// // Runtime constructed: + /// let k2: ResourceKey = ResourceKey::try_new("foo/bar@1").unwrap(); + /// + /// assert_eq!(k1, k2); + /// ``` + #[inline] + pub const fn try_new(path: &'static str) -> Result { + match Self::check_path_syntax(path) { + Ok(_) => Ok(Self { + path, + hash: ResourceKeyHash::compute_from_str(path), + }), + Err(_) => Err(DataError::custom("resource key syntax error")), + } + } + + const fn check_path_syntax(path: &str) -> Result<(), ()> { + // Approximate regex: \w+(/\w+)*@\d+ + // State 0 = start of string + // State 1 = after first character + // State 2 = after a slash + // State 3 = after a character after a slash + // State 4 = after @ + // State 5 = after a digit after @ + let mut i = 0; + let mut state = 0; + let path_bytes = path.as_bytes(); + while i < path_bytes.len() { + let c = path_bytes[i]; + state = match (state, c) { + (0 | 1, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' | b'=') => 1, + (1, b'/') => 2, + (2 | 3, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' | b'=') => 3, + (3, b'/') => 2, + (3, b'@') => 4, + (4 | 5, b'0'..=b'9') => 5, + _ => return Err(()), + }; + i += 1; + } + if state != 5 { + return Err(()); + } + Ok(()) } } -/// A category, subcategory, and version, used for requesting data from a -/// [`DataProvider`](crate::DataProvider). -/// -/// The fields in a [`ResourceKey`] should generally be known at compile time. -/// -/// Use [`resource_key!`] as a shortcut to create resource keys in code. -#[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone)] -pub struct ResourceKey { - pub category: ResourceCategory, - pub sub_category: TinyStr16, - pub version: u16, +#[test] +fn test_path_syntax() { + // Valid keys: + assert!(matches!(ResourceKey::try_new("hello/world@1"), Ok(_))); + assert!(matches!(ResourceKey::try_new("hello/world/foo@1"), Ok(_))); + assert!(matches!(ResourceKey::try_new("hello/world@999"), Ok(_))); + assert!(matches!(ResourceKey::try_new("hello_world/foo@1"), Ok(_))); + assert!(matches!(ResourceKey::try_new("hello_458/world@1"), Ok(_))); + + // No slash: + assert!(matches!(ResourceKey::try_new("hello_world@1"), Err(_))); + + // No version: + assert!(matches!(ResourceKey::try_new("hello/world"), Err(_))); + assert!(matches!(ResourceKey::try_new("hello/world@"), Err(_))); + assert!(matches!(ResourceKey::try_new("hello/world@foo"), Err(_))); + + // Invalid characters: + assert!(matches!(ResourceKey::try_new("你好/世界@1"), Err(_))); } /// Shortcut to construct a const resource identifier. /// -/// # Examples -/// -/// Create a private-use ResourceKey: -/// -/// ``` -/// use icu_provider::prelude::*; -/// -/// const MY_PRIVATE_USE_KEY: ResourceKey = icu_provider::resource_key!(x, "foo", "bar", 1); -/// assert_eq!("x-foo/bar@1", format!("{}", MY_PRIVATE_USE_KEY)); -/// ``` -/// -/// Create a ResourceKey for a specific [`ResourceCategory`] (for ICU4X library code only): -/// -/// ``` -/// use icu_provider::prelude::*; -/// -/// const MY_PRIVATE_USE_KEY: ResourceKey = icu_provider::resource_key!(Plurals, "ordinal", 1); -/// assert_eq!("plurals/ordinal@1", format!("{}", MY_PRIVATE_USE_KEY)); -/// ``` +/// For example, see [`ResourceKey::try_new()`]. #[macro_export] macro_rules! resource_key { - ($category:ident, $sub_category:literal, $version:tt) => { - $crate::resource_key!($crate::ResourceCategory::$category, $sub_category, $version) + ($path:literal) => {{ + // Force the ResourceKey into a const context + const RESOURCE_KEY_MACRO_CONST: $crate::ResourceKey = { + match $crate::ResourceKey::try_new($path) { + Ok(v) => v, + Err(_) => panic!(concat!("Invalid resource key: ", $path)), + } + }; + RESOURCE_KEY_MACRO_CONST + }}; + // TODO(#570): Migrate call sites to the all-in-one string version of the macro above, + // and then delete all of the macro branches that follow. + (Core, $sub_category:literal, $version:tt) => { + $crate::resource_key!("core", $sub_category, $version) }; - (x, $pu:literal, $sub_category:literal, $version:tt) => { - $crate::resource_key!( - $crate::ResourceCategory::PrivateUse($crate::internal::tinystr4!($pu)), - $sub_category, - $version - ) + (Calendar, $sub_category:literal, $version:tt) => { + $crate::resource_key!("calendar", $sub_category, $version) }; - ($category:expr, $sub_category:literal, $version:tt) => { - $crate::ResourceKey { - category: $category, - sub_category: $crate::internal::tinystr16!($sub_category), - version: $version, - } + (DateTime, $sub_category:literal, $version:tt) => { + $crate::resource_key!("datetime", $sub_category, $version) }; + (Decimal, $sub_category:literal, $version:tt) => { + $crate::resource_key!("decimal", $sub_category, $version) + }; + (LocaleCanonicalizer, $sub_category:literal, $version:tt) => { + $crate::resource_key!("locale_canonicalizer", $sub_category, $version) + }; + (Plurals, $sub_category:literal, $version:tt) => { + $crate::resource_key!("plurals", $sub_category, $version) + }; + (TimeZone, $sub_category:literal, $version:tt) => { + $crate::resource_key!("time_zone", $sub_category, $version) + }; + (Properties, $sub_category:literal, $version:tt) => { + $crate::resource_key!("props", $sub_category, $version) + }; + (ListFormatter, $sub_category:literal, $version:tt) => { + $crate::resource_key!("list_formatter", $sub_category, $version) + }; + (Segmenter, $sub_category:literal, $version:tt) => { + $crate::resource_key!("segmenter", $sub_category, $version) + }; + ($category:literal, $sub_category:literal, $version:tt) => {{ + // Force the ResourceKey into a const context + const RESOURCE_KEY_MACRO_CONST: $crate::ResourceKey = { + // Note: concat!() does not seem to work as a literal argument to another macro call. + // This branch will be deleted anyway in #570. + match $crate::ResourceKey::try_new(concat!( + $category, + "/", + $sub_category, + "@", + $version + )) { + Ok(v) => v, + Err(_) => panic!(concat!("Invalid resource key")), + } + }; + RESOURCE_KEY_MACRO_CONST + }}; } impl fmt::Debug for ResourceKey { @@ -146,19 +227,11 @@ impl fmt::Display for ResourceKey { impl Writeable for ResourceKey { fn write_to(&self, sink: &mut W) -> core::fmt::Result { - sink.write_str(&self.category.as_str())?; - sink.write_char('/')?; - sink.write_str(self.sub_category.as_str())?; - sink.write_char('@')?; - self.version.write_to(sink)?; - Ok(()) + self.path.write_to(sink) } fn write_len(&self) -> LengthHint { - LengthHint::exact(2) - + self.category.as_str().len() - + self.sub_category.len() - + self.version.write_len() + self.path.write_len() } } @@ -172,15 +245,39 @@ impl ResourceKey { /// use icu_provider::prelude::*; /// /// let resc_key = icu_provider::hello_world::key::HELLO_WORLD_V1; - /// let components = resc_key.get_components(); + /// let components: Vec<&str> = resc_key + /// .iter_components() + /// .collect(); /// /// assert_eq!( /// ["core", "helloworld@1"], - /// components.iter().collect::>()[..] + /// components[..] /// ); /// ``` - pub fn get_components(&self) -> ResourceKeyComponents { - self.into() + pub fn iter_components(&self) -> impl Iterator { + // TODO(#1516): Consider alternatives to this method. + self.get_path().split('/') + } + + /// Gets the last path component of a [`ResourceKey`] without the version suffix. + /// + /// # Examples + /// + /// ``` + /// use icu_provider::prelude::*; + /// + /// let resc_key = icu_provider::hello_world::key::HELLO_WORLD_V1; + /// assert_eq!("helloworld", resc_key.get_last_component_no_version()); + /// ``` + pub fn get_last_component_no_version(&self) -> &str { + // This cannot fail because of the preconditions on path (at least one '/' and '@') + // TODO(#1515): Consider deleting this method. + self.iter_components() + .last() + .unwrap() + .split('@') + .next() + .unwrap() } /// Returns [`Ok`] if this data key matches the argument, or the appropriate error. @@ -192,9 +289,9 @@ impl ResourceKey { /// ``` /// use icu_provider::prelude::*; /// - /// const FOO_BAR: ResourceKey = icu_provider::resource_key!(x, "foo", "bar", 1); - /// const FOO_BAZ: ResourceKey = icu_provider::resource_key!(x, "foo", "baz", 1); - /// const BAR_BAZ: ResourceKey = icu_provider::resource_key!(x, "bar", "baz", 1); + /// const FOO_BAR: ResourceKey = icu_provider::resource_key!("foo/bar@1"); + /// const FOO_BAZ: ResourceKey = icu_provider::resource_key!("foo/baz@1"); + /// const BAR_BAZ: ResourceKey = icu_provider::resource_key!("bar/baz@1"); /// /// assert!(matches!( /// FOO_BAR.match_key(FOO_BAR), @@ -224,33 +321,6 @@ impl ResourceKey { } } -/// The standard components of a [`ResourceKey`] path. -pub struct ResourceKeyComponents { - components: [Cow<'static, str>; 2], -} - -impl ResourceKeyComponents { - pub fn iter(&self) -> impl Iterator { - self.components.iter().map(|cow| cow.borrow()) - } -} - -impl From<&ResourceKey> for ResourceKeyComponents { - fn from(resc_key: &ResourceKey) -> Self { - Self { - components: [ - resc_key.category.as_str(), - // TODO: Evalute the size penalty of this format! - Cow::Owned(format!( - "{}@{}", - resc_key.sub_category.as_str(), - resc_key.version - )), - ], - } - } -} - /// A variant and language identifier, used for requesting data from a /// [`DataProvider`](crate::DataProvider). /// @@ -332,15 +402,23 @@ impl ResourceOptions { /// variant: Some(Cow::Borrowed("GBP")), /// langid: Some(langid!("pt_BR")), /// }; - /// let components = resc_options.get_components(); + /// let components: Vec = resc_options + /// .iter_components() + /// .map(|s| s.into_owned()) + /// .collect(); /// /// assert_eq!( /// ["GBP", "pt-BR"], - /// components.iter().collect::>()[..] + /// components[..] /// ); /// ``` - pub fn get_components(&self) -> ResourceOptionsComponents { - self.into() + pub fn iter_components(&self) -> impl Iterator> { + // TODO(#1516): Consider alternatives to this method. + let components_array: [Option>; 2] = [ + self.variant.clone(), + self.langid.as_ref().map(|s| Cow::Owned(s.to_string())), + ]; + IntoIterator::into_iter(components_array).flatten() } /// Returns whether this [`ResourceOptions`] has all empty fields (no components). @@ -349,33 +427,6 @@ impl ResourceOptions { } } -/// The standard components of a [`ResourceOptions`] path. -pub struct ResourceOptionsComponents { - components: [Option>; 2], -} - -impl ResourceOptionsComponents { - pub fn iter(&self) -> impl Iterator { - self.components - .iter() - .filter_map(|option| option.as_ref().map(|cow| cow.borrow())) - } -} - -impl From<&ResourceOptions> for ResourceOptionsComponents { - fn from(resc_options: &ResourceOptions) -> Self { - Self { - components: [ - resc_options.variant.as_ref().cloned(), - resc_options - .langid - .as_ref() - .map(|s| Cow::Owned(s.to_string())), - ], - } - } -} - #[derive(Clone, PartialEq)] pub struct ResourcePath { pub key: ResourceKey, @@ -416,33 +467,24 @@ impl writeable::Writeable for ResourcePath { #[cfg(test)] mod tests { use super::*; - use tinystr::tinystr4; struct KeyTestCase { pub resc_key: ResourceKey, pub expected: &'static str, } - fn get_key_test_cases() -> [KeyTestCase; 4] { + fn get_key_test_cases() -> [KeyTestCase; 3] { [ KeyTestCase { - resc_key: resource_key!(Core, "cardinal", 1), + resc_key: resource_key!("core/cardinal@1"), expected: "core/cardinal@1", }, KeyTestCase { - resc_key: ResourceKey { - category: ResourceCategory::PrivateUse(tinystr4!("priv")), - sub_category: tinystr::tinystr16!("cardinal"), - version: 1, - }, - expected: "x-priv/cardinal@1", - }, - KeyTestCase { - resc_key: resource_key!(Core, "maxlengthsubcatg", 1), + resc_key: resource_key!("core/maxlengthsubcatg@1"), expected: "core/maxlengthsubcatg@1", }, KeyTestCase { - resc_key: resource_key!(Core, "cardinal", 65535), + resc_key: resource_key!("core/cardinal@65535"), expected: "core/cardinal@65535", }, ] @@ -456,8 +498,7 @@ mod tests { assert_eq!( cas.expected, cas.resc_key - .get_components() - .iter() + .iter_components() .collect::>() .join("/") ); @@ -504,9 +545,8 @@ mod tests { assert_eq!( cas.expected, cas.resc_options - .get_components() - .iter() - .collect::>() + .iter_components() + .collect::>>() .join("/") ); } diff --git a/provider/core/src/struct_provider.rs b/provider/core/src/struct_provider.rs index 11ac7609bf8..c8ce55529ce 100644 --- a/provider/core/src/struct_provider.rs +++ b/provider/core/src/struct_provider.rs @@ -21,7 +21,7 @@ use crate::prelude::*; /// }; /// /// // A placeholder key to use to serve the data struct -/// const SAMPLE_KEY: ResourceKey = icu_provider::resource_key!(x, "xyz", "example", 1); +/// const SAMPLE_KEY: ResourceKey = icu_provider::resource_key!("xyz/example@1"); /// /// let provider = AnyPayloadProvider { /// key: SAMPLE_KEY, diff --git a/provider/core/tests/sizes.rs b/provider/core/tests/sizes.rs index f2377eea994..1c4a77c537c 100644 --- a/provider/core/tests/sizes.rs +++ b/provider/core/tests/sizes.rs @@ -5,7 +5,6 @@ use icu_provider::prelude::*; use static_assertions::const_assert_eq; -const_assert_eq!(8, core::mem::size_of::()); const_assert_eq!(16, core::mem::size_of::()); const_assert_eq!(4, core::mem::size_of::()); -const_assert_eq!(32, core::mem::size_of::()); +const_assert_eq!(24, core::mem::size_of::()); diff --git a/provider/fs/src/export/fs_exporter.rs b/provider/fs/src/export/fs_exporter.rs index d3ffeac95d0..0082808252a 100644 --- a/provider/fs/src/export/fs_exporter.rs +++ b/provider/fs/src/export/fs_exporter.rs @@ -73,8 +73,13 @@ impl DataExporter for FilesystemExporter { obj: DataPayload, ) -> Result<(), DataError> { let mut path_buf = self.root.clone(); - path_buf.extend(req.resource_path.key.get_components().iter()); - path_buf.extend(req.resource_path.options.get_components().iter()); + path_buf.extend(req.resource_path.key.iter_components()); + path_buf.extend( + req.resource_path + .options + .iter_components() + .map(|s| PathBuf::from(&*s)), + ); log::trace!("Writing: {}", req); self.write_to_path(path_buf, obj.get().deref())?; Ok(()) diff --git a/provider/fs/src/fs_data_provider.rs b/provider/fs/src/fs_data_provider.rs index 51dd2b5dbc2..64b2e760189 100644 --- a/provider/fs/src/fs_data_provider.rs +++ b/provider/fs/src/fs_data_provider.rs @@ -61,7 +61,7 @@ impl FsDataProvider { fn get_reader(&self, req: &DataRequest) -> Result<(impl Read, PathBuf), DataError> { let mut path_buf = self.res_root.clone(); - path_buf.extend(req.resource_path.key.get_components().iter()); + path_buf.extend(req.resource_path.key.iter_components()); if req.resource_path.options.is_empty() { path_buf.set_extension(self.manifest.get_file_extension()); } @@ -70,7 +70,12 @@ impl FsDataProvider { } if !req.resource_path.options.is_empty() { // TODO: Implement proper locale fallback - path_buf.extend(req.resource_path.options.get_components().iter()); + path_buf.extend( + req.resource_path + .options + .iter_components() + .map(|s| PathBuf::from(&*s)), + ); path_buf.set_extension(self.manifest.get_file_extension()); } if !path_buf.exists() { diff --git a/provider/uprops/src/bin_uniset.rs b/provider/uprops/src/bin_uniset.rs index 59cb2d77b05..c9d479769af 100644 --- a/provider/uprops/src/bin_uniset.rs +++ b/provider/uprops/src/bin_uniset.rs @@ -30,7 +30,7 @@ impl DataProvider for BinaryPropertyUnicodeSetDataProvi ) -> Result, DataError> { let data = &self .data - .get(&req.resource_path.key.sub_category) + .get(req.resource_path.key.get_last_component_no_version()) .ok_or_else(|| DataErrorKind::MissingResourceKey.with_req(req))?; let mut builder = UnicodeSetBuilder::new(); diff --git a/provider/uprops/src/enum_codepointtrie.rs b/provider/uprops/src/enum_codepointtrie.rs index b2926a288b9..d7cd91543fa 100644 --- a/provider/uprops/src/enum_codepointtrie.rs +++ b/provider/uprops/src/enum_codepointtrie.rs @@ -102,7 +102,7 @@ impl DataProvider> // For data resource keys that represent the CodePointTrie data for an enumerated // property, the ResourceKey sub-category string will just be the short alias // for the property. - let prop_name = &req.resource_path.key.sub_category; + let prop_name = req.resource_path.key.get_last_component_no_version(); let source_cpt_data = &self .data .get(prop_name) diff --git a/provider/uprops/src/enum_uniset.rs b/provider/uprops/src/enum_uniset.rs index 67ebdb004d3..aa923ddf70a 100644 --- a/provider/uprops/src/enum_uniset.rs +++ b/provider/uprops/src/enum_uniset.rs @@ -61,7 +61,7 @@ impl DataProvider for EnumeratedPropertyUnicodeSetDataP &self, req: &DataRequest, ) -> Result, DataError> { - let key = &req.resource_path.key.sub_category; + let key = &req.resource_path.key.get_last_component_no_version(); // ResourceKey subcategory strings for enumerated properties are // of the form "name=value", using the short name for both. @@ -70,21 +70,15 @@ impl DataProvider for EnumeratedPropertyUnicodeSetDataP if parts.len() != 2 { return Err(DataErrorKind::MissingResourceKey.with_req(req)); } - ( - parts[0].parse().map_err(|e| { - DataError::custom("Could not parse data request into a Unicode property name") - .with_error_context(&e) - })?, - parts[1], - ) + (parts[0], parts[1]) }; let toml_data = &self .data - .get(&prop_name) + .get(prop_name) .ok_or_else(|| DataErrorKind::MissingResourceKey.with_req(req))?; - let valid_names = expand_groupings(&prop_name, prop_value); + let valid_names = expand_groupings(prop_name, prop_value); let mut builder = UnicodeSetBuilder::new(); for range in &toml_data.ranges { diff --git a/provider/uprops/src/provider.rs b/provider/uprops/src/provider.rs index e305c641d88..4b4ee9b5533 100644 --- a/provider/uprops/src/provider.rs +++ b/provider/uprops/src/provider.rs @@ -34,7 +34,12 @@ impl DataProvider for PropertiesDataProvider { &self, req: &DataRequest, ) -> Result, DataError> { - if req.resource_path.key.sub_category.contains('=') { + if req + .resource_path + .key + .get_last_component_no_version() + .contains('=') + { self.enumerated.load_payload(req) } else { self.binary.load_payload(req) diff --git a/provider/uprops/src/script.rs b/provider/uprops/src/script.rs index c6a9f22997f..fe9be5de01c 100644 --- a/provider/uprops/src/script.rs +++ b/provider/uprops/src/script.rs @@ -11,7 +11,6 @@ use icu_properties::Script; use icu_provider::prelude::*; use std::convert::TryFrom; use std::path::Path; -use tinystr::tinystr16; use zerovec::{VarZeroVec, ZeroSlice, ZeroVec}; /// This data provider returns a [`crate::script::ScriptExtensions`] instance, @@ -81,7 +80,7 @@ impl DataProvider for ScriptExtensionsProperty &self, req: &DataRequest, ) -> Result, DataError> { - if req.resource_path.key.sub_category != tinystr16!("scx") { + if req.resource_path.key.get_last_component_no_version() != "scx" { return Err(DataErrorKind::MissingResourceKey.with_req(req)); } diff --git a/provider/uprops/src/uprops_helpers.rs b/provider/uprops/src/uprops_helpers.rs index aa29d761892..2a4351a1589 100644 --- a/provider/uprops/src/uprops_helpers.rs +++ b/provider/uprops/src/uprops_helpers.rs @@ -8,27 +8,18 @@ use crate::uprops_serde; use eyre::{eyre, WrapErr}; use std::collections::HashMap; use std::path::Path; -use tinystr::TinyStr16; -pub type TomlEnumerated = HashMap; -pub type TomlBinary = HashMap; +pub type TomlEnumerated = HashMap; +pub type TomlBinary = HashMap; pub fn load_binary_from_dir(root_dir: &Path) -> eyre::Result { let mut result = HashMap::new(); for path in get_dir_contents(root_dir)? { - let option_key: Option = path + let key: String = path .file_stem() .and_then(|p| p.to_str()) .ok_or_else(|| eyre::eyre!("Invalid file name: {:?}", path))? - .parse() - .ok(); - let key = if let Some(k) = option_key { - k - } else { - #[cfg(feature = "log")] - log::trace!("Filename does not fit in TinyStr16: {:?}", path); - continue; - }; + .to_string(); let toml_str = read_path_to_string(&path)?; let toml_obj: uprops_serde::binary::Main = toml::from_str(&toml_str) .wrap_err_with(|| format!("Could not parse TOML: {:?}", path))?; @@ -42,19 +33,11 @@ pub fn load_binary_from_dir(root_dir: &Path) -> eyre::Result { pub fn load_enumerated_from_dir(root_dir: &Path) -> eyre::Result { let mut result = HashMap::new(); for path in get_dir_contents(root_dir)? { - let option_key: Option = path + let key: String = path .file_stem() .and_then(|p| p.to_str()) .ok_or_else(|| eyre::eyre!("Invalid file name: {:?}", path))? - .parse() - .ok(); - let key = if let Some(k) = option_key { - k - } else { - #[cfg(feature = "log")] - log::trace!("Filename does not fit in TinyStr16: {:?}", path); - continue; - }; + .to_string(); let toml_str = read_path_to_string(&path)?; let toml_obj: uprops_serde::enumerated::Main = toml::from_str(&toml_str) .wrap_err_with(|| format!("Could not parse TOML: {:?}", path))?;