-
Notifications
You must be signed in to change notification settings - Fork 185
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add tinystr-neo to experimental/ #1508
Changes from all commits
8c13b98
ad88484
3044449
1361678
510c57a
73a799d
6bfbfc3
2cdeab3
347c955
601787d
d009c19
2d9e801
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
# This file is part of ICU4X. For terms of use, please see the file | ||
# called LICENSE at the top level of the ICU4X source tree | ||
# (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
|
||
[package] | ||
name = "tinystr-neo" | ||
version = "0.3.1" | ||
description = "A small ASCII-only bounded length string representation." | ||
authors = ["Manish Goregaokar <manishsmail@gmail.com>"] | ||
edition = "2021" | ||
repository = "https://github.com/unicode-org/icu4x" | ||
license-file = "LICENSE" | ||
keywords = ["string", "str", "small", "tiny", "no_std"] | ||
categories = ["data-structures"] | ||
include = [ | ||
"src/**/*", | ||
"examples/**/*", | ||
"benches/**/*", | ||
"Cargo.toml", | ||
"LICENSE", | ||
"README.md" | ||
] | ||
|
||
[package.metadata.docs.rs] | ||
all-features = true | ||
|
||
[dependencies] | ||
displaydoc = { version = "0.2.3", default-features = false } | ||
serde = { version = "1.0.123", optional = true, default-features = false, features = ["alloc"] } | ||
|
||
[dev-dependencies] | ||
serde_json = { version = "1.0", default-features = false, features = ["alloc"] } | ||
bincode = "1.3" | ||
postcard = { version = "0.7", features = ["use-std"] } |
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# tinystr-neo [data:image/s3,"s3://crabby-images/cc746/cc746fe82f20a0d5d7f4dde8b2bd6012c0d06a19" alt="crates.io"](https://crates.io/crates/tinystr-neo) | ||
|
||
|
||
|
||
## More Information | ||
|
||
For more information on development, authorship, contributing etc. please visit [`ICU4X home page`](https://github.com/unicode-org/icu4x). |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
// This file is part of ICU4X. For terms of use, please see the file | ||
// called LICENSE at the top level of the ICU4X source tree | ||
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
|
||
use crate::TinyStrError; | ||
use core::ops::Deref; | ||
use core::str::{self, FromStr}; | ||
|
||
#[repr(transparent)] | ||
#[derive(PartialEq, Eq, Ord, PartialOrd, Copy, Clone, Debug, Hash)] | ||
pub struct TinyAsciiStr<const N: usize> { | ||
bytes: [u8; N], | ||
} | ||
|
||
impl<const N: usize> TinyAsciiStr<N> { | ||
pub const fn from_bytes(bytes: &[u8]) -> Result<Self, TinyStrError> { | ||
if bytes.len() > N { | ||
return Err(TinyStrError::TooLarge { | ||
max: N, | ||
found: bytes.len(), | ||
}); | ||
} | ||
|
||
let mut out = [0; N]; | ||
let mut i = 0; | ||
while i < bytes.len() { | ||
if bytes[i] == 0 { | ||
return Err(TinyStrError::ContainsNull); | ||
} else if bytes[i] >= 0x80 { | ||
return Err(TinyStrError::NonAscii); | ||
} | ||
out[i] = bytes[i]; | ||
|
||
i += 1; | ||
} | ||
|
||
Ok(Self { bytes: out }) | ||
} | ||
|
||
pub const fn from_str(s: &str) -> Result<Self, TinyStrError> { | ||
Self::from_bytes(s.as_bytes()) | ||
} | ||
|
||
pub fn len(&self) -> usize { | ||
self.bytes.iter().position(|x| *x == 0).unwrap_or(N) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. note: previously we were able to use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah; this is one of the things I was wondering how we would implement efficiently. We should definitely test. I think this would be a good thing for IAI as opposed to Clippy. |
||
} | ||
|
||
pub fn is_empty(&self) -> bool { | ||
self.bytes[0] == 0 | ||
} | ||
|
||
pub fn as_bytes(&self) -> &[u8] { | ||
&self.bytes[0..self.len()] | ||
} | ||
|
||
pub fn all_bytes(&self) -> &[u8; N] { | ||
&self.bytes | ||
} | ||
|
||
/// # Safety | ||
/// Must be called with a bytes array made of valid ASCII bytes, with no null bytes | ||
/// between ASCII characters | ||
pub const unsafe fn from_bytes_unchecked(bytes: [u8; N]) -> Self { | ||
Self { bytes } | ||
} | ||
} | ||
|
||
impl<const N: usize> Deref for TinyAsciiStr<N> { | ||
type Target = str; | ||
fn deref(&self) -> &str { | ||
unsafe { str::from_utf8_unchecked(self.as_bytes()) } | ||
} | ||
} | ||
|
||
impl<const N: usize> FromStr for TinyAsciiStr<N> { | ||
type Err = TinyStrError; | ||
fn from_str(s: &str) -> Result<Self, TinyStrError> { | ||
Self::from_str(s) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
// This file is part of ICU4X. For terms of use, please see the file | ||
// called LICENSE at the top level of the ICU4X source tree | ||
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
|
||
use displaydoc::Display; | ||
|
||
#[derive(Display, Debug)] | ||
pub enum TinyStrError { | ||
#[displaydoc("found string of larger length {found} when constructing string of length {max}")] | ||
TooLarge { max: usize, found: usize }, | ||
#[displaydoc("tinystr types do not support strings with null bytes")] | ||
ContainsNull, | ||
#[displaydoc("attempted to construct TinyStrAuto from a non-ascii string")] | ||
NonAscii, | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
// This file is part of ICU4X. For terms of use, please see the file | ||
// called LICENSE at the top level of the ICU4X source tree | ||
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
|
||
#![cfg_attr(not(test), no_std)] | ||
|
||
mod macros; | ||
|
||
mod ascii; | ||
mod error; | ||
|
||
#[cfg(feature = "serde")] | ||
mod serde; | ||
|
||
#[cfg(feature = "serde")] | ||
extern crate alloc; | ||
|
||
pub use ascii::TinyAsciiStr; | ||
pub use error::TinyStrError; | ||
|
||
// /// Allows unit tests to use the macro | ||
// #[cfg(test)] | ||
// mod tinystr { | ||
// pub use super::{TinyAsciiStr, TinyStrError}; | ||
// } |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
// This file is part of ICU4X. For terms of use, please see the file | ||
// called LICENSE at the top level of the ICU4X source tree | ||
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
|
||
#[macro_export] | ||
macro_rules! tinystr { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thought: We maybe don't need the macro any more. People can just use the const function constructors directly. Fewer imports and friendlier to the compiler. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So as mentioned in #1511 (comment) , I think it's important to give users a convenient way to call this in a const context, given that it's rare and unusual for Rust programmers to MBEs (non proc macros, like this one) are pretty friendly to the compiler; and this is going to be even smaller once I rebase over the rust update. I don't find that argument that compelling. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, |
||
($n:literal, $s:literal) => {{ | ||
// Force it into a const context; otherwise it may get evaluated at runtime instead. | ||
const TINYSTR_MACRO_CONST: $crate::TinyAsciiStr<$n> = { | ||
match $crate::TinyAsciiStr::from_bytes($s.as_bytes()) { | ||
Ok(s) => s, | ||
// Cannot format the error since formatting isn't const yet | ||
Err(_) => panic!(concat!("Failed to construct tinystr from ", $s)), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Praise: Good idea to use |
||
} | ||
}; | ||
TINYSTR_MACRO_CONST | ||
}}; | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
#[test] | ||
fn test_macro_construction() { | ||
let s1 = tinystr!(8, "foobar"); | ||
assert_eq!(&*s1, "foobar"); | ||
|
||
let s1 = tinystr!(12, "foobarbaz"); | ||
assert_eq!(&*s1, "foobarbaz"); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
// This file is part of ICU4X. For terms of use, please see the file | ||
// called LICENSE at the top level of the ICU4X source tree | ||
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
|
||
use crate::TinyAsciiStr; | ||
use alloc::borrow::Cow; | ||
use alloc::string::ToString; | ||
use core::fmt; | ||
use core::marker::PhantomData; | ||
use core::ops::Deref; | ||
use serde::de::{Error, SeqAccess, Visitor}; | ||
use serde::ser::SerializeTuple; | ||
use serde::{Deserialize, Deserializer, Serialize, Serializer}; | ||
|
||
impl<const N: usize> Serialize for TinyAsciiStr<N> { | ||
#[inline] | ||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> | ||
where | ||
S: Serializer, | ||
{ | ||
if serializer.is_human_readable() { | ||
self.deref().serialize(serializer) | ||
} else { | ||
let mut seq = serializer.serialize_tuple(N)?; | ||
for byte in self.all_bytes() { | ||
seq.serialize_element(byte)?; | ||
} | ||
seq.end() | ||
} | ||
} | ||
} | ||
|
||
struct TinyAsciiStrVisitor<const N: usize> { | ||
marker: PhantomData<TinyAsciiStr<N>>, | ||
} | ||
|
||
impl<const N: usize> TinyAsciiStrVisitor<N> { | ||
fn new() -> Self { | ||
TinyAsciiStrVisitor { | ||
marker: PhantomData, | ||
} | ||
} | ||
} | ||
|
||
impl<'de, const N: usize> Visitor<'de> for TinyAsciiStrVisitor<N> { | ||
type Value = TinyAsciiStr<N>; | ||
|
||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { | ||
write!(formatter, "a TinyAsciiStr<{}>", N) | ||
} | ||
|
||
#[inline] | ||
fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. note: this is likely not as efficient as the deserialization from the tinystr crate. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could add TinyStr4/etc as aligned wrappers that serialize to little-endian numbers. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not too worried about |
||
where | ||
A: SeqAccess<'de>, | ||
{ | ||
let mut bytes = [0u8; N]; | ||
let mut zeroes = false; | ||
for out in &mut bytes.iter_mut().take(N) { | ||
let byte = seq | ||
.next_element()? | ||
.ok_or_else(|| Error::invalid_length(N, &self))?; | ||
if byte == 0 { | ||
zeroes = true; | ||
} else if zeroes { | ||
return Err(Error::custom("TinyAsciiStr cannot contain null bytes")); | ||
} | ||
|
||
if byte >= 0x80 { | ||
return Err(Error::custom("TinyAsciiStr cannot contain non-ascii bytes")); | ||
} | ||
*out = byte; | ||
} | ||
|
||
Ok(unsafe { TinyAsciiStr::from_bytes_unchecked(bytes) }) | ||
} | ||
} | ||
|
||
impl<'de, const N: usize> Deserialize<'de> for TinyAsciiStr<N> { | ||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> | ||
where | ||
D: Deserializer<'de>, | ||
{ | ||
if deserializer.is_human_readable() { | ||
let x: Cow<'de, str> = Deserialize::deserialize(deserializer)?; | ||
TinyAsciiStr::from_str(&x).map_err(|e| Error::custom(e.to_string())) | ||
} else { | ||
deserializer.deserialize_tuple(N, TinyAsciiStrVisitor::<N>::new()) | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
// This file is part of ICU4X. For terms of use, please see the file | ||
// called LICENSE at the top level of the ICU4X source tree | ||
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
|
||
use tinystr_neo::*; | ||
|
||
// Tests largely adapted from `tinystr` crate | ||
// https://github.com/zbraniecki/tinystr/blob/4e4eab55dd6bded7f29a18b41452c506c461716c/tests/serde.rs | ||
|
||
macro_rules! test_roundtrip { | ||
($f:ident, $n:literal, $val:expr) => { | ||
#[test] | ||
fn $f() { | ||
let tiny: TinyAsciiStr<$n> = $val.parse().unwrap(); | ||
let json_string = serde_json::to_string(&tiny).unwrap(); | ||
let expected_json = concat!("\"", $val, "\""); | ||
assert_eq!(json_string, expected_json); | ||
let recover: TinyAsciiStr<$n> = serde_json::from_str(&json_string).unwrap(); | ||
assert_eq!(&*tiny, &*recover); | ||
|
||
let bin = bincode::serialize(&tiny).unwrap(); | ||
assert_eq!(bin, &tiny.all_bytes()[..]); | ||
let debin: TinyAsciiStr<$n> = bincode::deserialize(&bin).unwrap(); | ||
assert_eq!(&*tiny, &*debin); | ||
|
||
let post = postcard::to_stdvec(&tiny).unwrap(); | ||
assert_eq!(post, &tiny.all_bytes()[..]); | ||
let unpost: TinyAsciiStr<$n> = postcard::from_bytes(&post).unwrap(); | ||
sffc marked this conversation as resolved.
Show resolved
Hide resolved
|
||
assert_eq!(&*tiny, &*unpost); | ||
} | ||
}; | ||
} | ||
|
||
test_roundtrip!(test_roundtrip4_1, 4, "en"); | ||
test_roundtrip!(test_roundtrip4_2, 4, "Latn"); | ||
test_roundtrip!(test_roundtrip8, 8, "calendar"); | ||
test_roundtrip!(test_roundtrip16, 16, "verylongstring"); | ||
test_roundtrip!(test_roundtrip10, 11, "shortstring"); | ||
test_roundtrip!(test_roundtrip30, 24, "veryveryverylongstring"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
note: can't be a for loop in const context