-
Notifications
You must be signed in to change notification settings - Fork 185
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add tinystr-neo to experimental/ (#1508)
* Add tinystr_neo experimental crate * Basic TinyAsciiStr type * Add serde impls * Add tests for serde * fix var name * rm bytes * fmt * tidy * Make macro panic at compile time * clippy * move panic * clip
- Loading branch information
1 parent
521ac66
commit 30a3f07
Showing
11 changed files
with
664 additions
and
0 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
# This file is part of ICU4X. For terms of use, please see the file | ||
# called LICENSE at the top level of the ICU4X source tree | ||
# (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
|
||
[package] | ||
name = "tinystr-neo" | ||
version = "0.3.1" | ||
description = "A small ASCII-only bounded length string representation." | ||
authors = ["Manish Goregaokar <manishsmail@gmail.com>"] | ||
edition = "2021" | ||
repository = "https://github.com/unicode-org/icu4x" | ||
license-file = "LICENSE" | ||
keywords = ["string", "str", "small", "tiny", "no_std"] | ||
categories = ["data-structures"] | ||
include = [ | ||
"src/**/*", | ||
"examples/**/*", | ||
"benches/**/*", | ||
"Cargo.toml", | ||
"LICENSE", | ||
"README.md" | ||
] | ||
|
||
[package.metadata.docs.rs] | ||
all-features = true | ||
|
||
[dependencies] | ||
displaydoc = { version = "0.2.3", default-features = false } | ||
serde = { version = "1.0.123", optional = true, default-features = false, features = ["alloc"] } | ||
|
||
[dev-dependencies] | ||
serde_json = { version = "1.0", default-features = false, features = ["alloc"] } | ||
bincode = "1.3" | ||
postcard = { version = "0.7", features = ["use-std"] } |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# tinystr-neo [](https://crates.io/crates/tinystr-neo) | ||
|
||
|
||
|
||
## More Information | ||
|
||
For more information on development, authorship, contributing etc. please visit [`ICU4X home page`](https://github.com/unicode-org/icu4x). |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
// This file is part of ICU4X. For terms of use, please see the file | ||
// called LICENSE at the top level of the ICU4X source tree | ||
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
|
||
use crate::TinyStrError; | ||
use core::ops::Deref; | ||
use core::str::{self, FromStr}; | ||
|
||
#[repr(transparent)] | ||
#[derive(PartialEq, Eq, Ord, PartialOrd, Copy, Clone, Debug, Hash)] | ||
pub struct TinyAsciiStr<const N: usize> { | ||
bytes: [u8; N], | ||
} | ||
|
||
impl<const N: usize> TinyAsciiStr<N> { | ||
pub const fn from_bytes(bytes: &[u8]) -> Result<Self, TinyStrError> { | ||
if bytes.len() > N { | ||
return Err(TinyStrError::TooLarge { | ||
max: N, | ||
found: bytes.len(), | ||
}); | ||
} | ||
|
||
let mut out = [0; N]; | ||
let mut i = 0; | ||
while i < bytes.len() { | ||
if bytes[i] == 0 { | ||
return Err(TinyStrError::ContainsNull); | ||
} else if bytes[i] >= 0x80 { | ||
return Err(TinyStrError::NonAscii); | ||
} | ||
out[i] = bytes[i]; | ||
|
||
i += 1; | ||
} | ||
|
||
Ok(Self { bytes: out }) | ||
} | ||
|
||
pub const fn from_str(s: &str) -> Result<Self, TinyStrError> { | ||
Self::from_bytes(s.as_bytes()) | ||
} | ||
|
||
pub fn len(&self) -> usize { | ||
self.bytes.iter().position(|x| *x == 0).unwrap_or(N) | ||
} | ||
|
||
pub fn is_empty(&self) -> bool { | ||
self.bytes[0] == 0 | ||
} | ||
|
||
pub fn as_bytes(&self) -> &[u8] { | ||
&self.bytes[0..self.len()] | ||
} | ||
|
||
pub fn all_bytes(&self) -> &[u8; N] { | ||
&self.bytes | ||
} | ||
|
||
/// # Safety | ||
/// Must be called with a bytes array made of valid ASCII bytes, with no null bytes | ||
/// between ASCII characters | ||
pub const unsafe fn from_bytes_unchecked(bytes: [u8; N]) -> Self { | ||
Self { bytes } | ||
} | ||
} | ||
|
||
impl<const N: usize> Deref for TinyAsciiStr<N> { | ||
type Target = str; | ||
fn deref(&self) -> &str { | ||
unsafe { str::from_utf8_unchecked(self.as_bytes()) } | ||
} | ||
} | ||
|
||
impl<const N: usize> FromStr for TinyAsciiStr<N> { | ||
type Err = TinyStrError; | ||
fn from_str(s: &str) -> Result<Self, TinyStrError> { | ||
Self::from_str(s) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
// This file is part of ICU4X. For terms of use, please see the file | ||
// called LICENSE at the top level of the ICU4X source tree | ||
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
|
||
use displaydoc::Display; | ||
|
||
#[derive(Display, Debug)] | ||
pub enum TinyStrError { | ||
#[displaydoc("found string of larger length {found} when constructing string of length {max}")] | ||
TooLarge { max: usize, found: usize }, | ||
#[displaydoc("tinystr types do not support strings with null bytes")] | ||
ContainsNull, | ||
#[displaydoc("attempted to construct TinyStrAuto from a non-ascii string")] | ||
NonAscii, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
// This file is part of ICU4X. For terms of use, please see the file | ||
// called LICENSE at the top level of the ICU4X source tree | ||
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
|
||
#![cfg_attr(not(test), no_std)] | ||
|
||
mod macros; | ||
|
||
mod ascii; | ||
mod error; | ||
|
||
#[cfg(feature = "serde")] | ||
mod serde; | ||
|
||
#[cfg(feature = "serde")] | ||
extern crate alloc; | ||
|
||
pub use ascii::TinyAsciiStr; | ||
pub use error::TinyStrError; | ||
|
||
// /// Allows unit tests to use the macro | ||
// #[cfg(test)] | ||
// mod tinystr { | ||
// pub use super::{TinyAsciiStr, TinyStrError}; | ||
// } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
// This file is part of ICU4X. For terms of use, please see the file | ||
// called LICENSE at the top level of the ICU4X source tree | ||
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
|
||
#[macro_export] | ||
macro_rules! tinystr { | ||
($n:literal, $s:literal) => {{ | ||
// Force it into a const context; otherwise it may get evaluated at runtime instead. | ||
const TINYSTR_MACRO_CONST: $crate::TinyAsciiStr<$n> = { | ||
match $crate::TinyAsciiStr::from_bytes($s.as_bytes()) { | ||
Ok(s) => s, | ||
// Cannot format the error since formatting isn't const yet | ||
Err(_) => panic!(concat!("Failed to construct tinystr from ", $s)), | ||
} | ||
}; | ||
TINYSTR_MACRO_CONST | ||
}}; | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
#[test] | ||
fn test_macro_construction() { | ||
let s1 = tinystr!(8, "foobar"); | ||
assert_eq!(&*s1, "foobar"); | ||
|
||
let s1 = tinystr!(12, "foobarbaz"); | ||
assert_eq!(&*s1, "foobarbaz"); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
// This file is part of ICU4X. For terms of use, please see the file | ||
// called LICENSE at the top level of the ICU4X source tree | ||
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
|
||
use crate::TinyAsciiStr; | ||
use alloc::borrow::Cow; | ||
use alloc::string::ToString; | ||
use core::fmt; | ||
use core::marker::PhantomData; | ||
use core::ops::Deref; | ||
use serde::de::{Error, SeqAccess, Visitor}; | ||
use serde::ser::SerializeTuple; | ||
use serde::{Deserialize, Deserializer, Serialize, Serializer}; | ||
|
||
impl<const N: usize> Serialize for TinyAsciiStr<N> { | ||
#[inline] | ||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> | ||
where | ||
S: Serializer, | ||
{ | ||
if serializer.is_human_readable() { | ||
self.deref().serialize(serializer) | ||
} else { | ||
let mut seq = serializer.serialize_tuple(N)?; | ||
for byte in self.all_bytes() { | ||
seq.serialize_element(byte)?; | ||
} | ||
seq.end() | ||
} | ||
} | ||
} | ||
|
||
struct TinyAsciiStrVisitor<const N: usize> { | ||
marker: PhantomData<TinyAsciiStr<N>>, | ||
} | ||
|
||
impl<const N: usize> TinyAsciiStrVisitor<N> { | ||
fn new() -> Self { | ||
TinyAsciiStrVisitor { | ||
marker: PhantomData, | ||
} | ||
} | ||
} | ||
|
||
impl<'de, const N: usize> Visitor<'de> for TinyAsciiStrVisitor<N> { | ||
type Value = TinyAsciiStr<N>; | ||
|
||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { | ||
write!(formatter, "a TinyAsciiStr<{}>", N) | ||
} | ||
|
||
#[inline] | ||
fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error> | ||
where | ||
A: SeqAccess<'de>, | ||
{ | ||
let mut bytes = [0u8; N]; | ||
let mut zeroes = false; | ||
for out in &mut bytes.iter_mut().take(N) { | ||
let byte = seq | ||
.next_element()? | ||
.ok_or_else(|| Error::invalid_length(N, &self))?; | ||
if byte == 0 { | ||
zeroes = true; | ||
} else if zeroes { | ||
return Err(Error::custom("TinyAsciiStr cannot contain null bytes")); | ||
} | ||
|
||
if byte >= 0x80 { | ||
return Err(Error::custom("TinyAsciiStr cannot contain non-ascii bytes")); | ||
} | ||
*out = byte; | ||
} | ||
|
||
Ok(unsafe { TinyAsciiStr::from_bytes_unchecked(bytes) }) | ||
} | ||
} | ||
|
||
impl<'de, const N: usize> Deserialize<'de> for TinyAsciiStr<N> { | ||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> | ||
where | ||
D: Deserializer<'de>, | ||
{ | ||
if deserializer.is_human_readable() { | ||
let x: Cow<'de, str> = Deserialize::deserialize(deserializer)?; | ||
TinyAsciiStr::from_str(&x).map_err(|e| Error::custom(e.to_string())) | ||
} else { | ||
deserializer.deserialize_tuple(N, TinyAsciiStrVisitor::<N>::new()) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
// This file is part of ICU4X. For terms of use, please see the file | ||
// called LICENSE at the top level of the ICU4X source tree | ||
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
|
||
use tinystr_neo::*; | ||
|
||
// Tests largely adapted from `tinystr` crate | ||
// https://github.com/zbraniecki/tinystr/blob/4e4eab55dd6bded7f29a18b41452c506c461716c/tests/serde.rs | ||
|
||
macro_rules! test_roundtrip { | ||
($f:ident, $n:literal, $val:expr) => { | ||
#[test] | ||
fn $f() { | ||
let tiny: TinyAsciiStr<$n> = $val.parse().unwrap(); | ||
let json_string = serde_json::to_string(&tiny).unwrap(); | ||
let expected_json = concat!("\"", $val, "\""); | ||
assert_eq!(json_string, expected_json); | ||
let recover: TinyAsciiStr<$n> = serde_json::from_str(&json_string).unwrap(); | ||
assert_eq!(&*tiny, &*recover); | ||
|
||
let bin = bincode::serialize(&tiny).unwrap(); | ||
assert_eq!(bin, &tiny.all_bytes()[..]); | ||
let debin: TinyAsciiStr<$n> = bincode::deserialize(&bin).unwrap(); | ||
assert_eq!(&*tiny, &*debin); | ||
|
||
let post = postcard::to_stdvec(&tiny).unwrap(); | ||
assert_eq!(post, &tiny.all_bytes()[..]); | ||
let unpost: TinyAsciiStr<$n> = postcard::from_bytes(&post).unwrap(); | ||
assert_eq!(&*tiny, &*unpost); | ||
} | ||
}; | ||
} | ||
|
||
test_roundtrip!(test_roundtrip4_1, 4, "en"); | ||
test_roundtrip!(test_roundtrip4_2, 4, "Latn"); | ||
test_roundtrip!(test_roundtrip8, 8, "calendar"); | ||
test_roundtrip!(test_roundtrip16, 16, "verylongstring"); | ||
test_roundtrip!(test_roundtrip10, 11, "shortstring"); | ||
test_roundtrip!(test_roundtrip30, 24, "veryveryverylongstring"); |