Skip to content

Commit

Permalink
Add tinystr-neo to experimental/ (#1508)
Browse files Browse the repository at this point in the history
* Add tinystr_neo experimental crate

* Basic TinyAsciiStr type

* Add serde impls

* Add tests for serde

* fix var name

* rm bytes

* fmt

* tidy

* Make macro panic at compile time

* clippy

* move panic

* clip
  • Loading branch information
Manishearth authored Jan 18, 2022
1 parent 521ac66 commit 30a3f07
Show file tree
Hide file tree
Showing 11 changed files with 664 additions and 0 deletions.
11 changes: 11 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ members = [
"experimental/list_formatter",
"experimental/segmenter",
"experimental/segmenter_lstm",
"experimental/tinystr_neo",
"ffi/diplomat",
"ffi/ecma402",
"provider/blob",
Expand Down
34 changes: 34 additions & 0 deletions experimental/tinystr_neo/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# This file is part of ICU4X. For terms of use, please see the file
# called LICENSE at the top level of the ICU4X source tree
# (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

[package]
name = "tinystr-neo"
version = "0.3.1"
description = "A small ASCII-only bounded length string representation."
authors = ["Manish Goregaokar <manishsmail@gmail.com>"]
edition = "2021"
repository = "https://github.com/unicode-org/icu4x"
license-file = "LICENSE"
keywords = ["string", "str", "small", "tiny", "no_std"]
categories = ["data-structures"]
include = [
"src/**/*",
"examples/**/*",
"benches/**/*",
"Cargo.toml",
"LICENSE",
"README.md"
]

[package.metadata.docs.rs]
all-features = true

[dependencies]
displaydoc = { version = "0.2.3", default-features = false }
serde = { version = "1.0.123", optional = true, default-features = false, features = ["alloc"] }

[dev-dependencies]
serde_json = { version = "1.0", default-features = false, features = ["alloc"] }
bincode = "1.3"
postcard = { version = "0.7", features = ["use-std"] }
331 changes: 331 additions & 0 deletions experimental/tinystr_neo/LICENSE

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions experimental/tinystr_neo/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# tinystr-neo [![crates.io](https://img.shields.io/crates/v/tinystr-neo)](https://crates.io/crates/tinystr-neo)



## More Information

For more information on development, authorship, contributing etc. please visit [`ICU4X home page`](https://github.com/unicode-org/icu4x).
80 changes: 80 additions & 0 deletions experimental/tinystr_neo/src/ascii.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use crate::TinyStrError;
use core::ops::Deref;
use core::str::{self, FromStr};

#[repr(transparent)]
#[derive(PartialEq, Eq, Ord, PartialOrd, Copy, Clone, Debug, Hash)]
pub struct TinyAsciiStr<const N: usize> {
bytes: [u8; N],
}

impl<const N: usize> TinyAsciiStr<N> {
pub const fn from_bytes(bytes: &[u8]) -> Result<Self, TinyStrError> {
if bytes.len() > N {
return Err(TinyStrError::TooLarge {
max: N,
found: bytes.len(),
});
}

let mut out = [0; N];
let mut i = 0;
while i < bytes.len() {
if bytes[i] == 0 {
return Err(TinyStrError::ContainsNull);
} else if bytes[i] >= 0x80 {
return Err(TinyStrError::NonAscii);
}
out[i] = bytes[i];

i += 1;
}

Ok(Self { bytes: out })
}

pub const fn from_str(s: &str) -> Result<Self, TinyStrError> {
Self::from_bytes(s.as_bytes())
}

pub fn len(&self) -> usize {
self.bytes.iter().position(|x| *x == 0).unwrap_or(N)
}

pub fn is_empty(&self) -> bool {
self.bytes[0] == 0
}

pub fn as_bytes(&self) -> &[u8] {
&self.bytes[0..self.len()]
}

pub fn all_bytes(&self) -> &[u8; N] {
&self.bytes
}

/// # Safety
/// Must be called with a bytes array made of valid ASCII bytes, with no null bytes
/// between ASCII characters
pub const unsafe fn from_bytes_unchecked(bytes: [u8; N]) -> Self {
Self { bytes }
}
}

impl<const N: usize> Deref for TinyAsciiStr<N> {
type Target = str;
fn deref(&self) -> &str {
unsafe { str::from_utf8_unchecked(self.as_bytes()) }
}
}

impl<const N: usize> FromStr for TinyAsciiStr<N> {
type Err = TinyStrError;
fn from_str(s: &str) -> Result<Self, TinyStrError> {
Self::from_str(s)
}
}
15 changes: 15 additions & 0 deletions experimental/tinystr_neo/src/error.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use displaydoc::Display;

#[derive(Display, Debug)]
pub enum TinyStrError {
#[displaydoc("found string of larger length {found} when constructing string of length {max}")]
TooLarge { max: usize, found: usize },
#[displaydoc("tinystr types do not support strings with null bytes")]
ContainsNull,
#[displaydoc("attempted to construct TinyStrAuto from a non-ascii string")]
NonAscii,
}
25 changes: 25 additions & 0 deletions experimental/tinystr_neo/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

#![cfg_attr(not(test), no_std)]

mod macros;

mod ascii;
mod error;

#[cfg(feature = "serde")]
mod serde;

#[cfg(feature = "serde")]
extern crate alloc;

pub use ascii::TinyAsciiStr;
pub use error::TinyStrError;

// /// Allows unit tests to use the macro
// #[cfg(test)]
// mod tinystr {
// pub use super::{TinyAsciiStr, TinyStrError};
// }
30 changes: 30 additions & 0 deletions experimental/tinystr_neo/src/macros.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

#[macro_export]
macro_rules! tinystr {
($n:literal, $s:literal) => {{
// Force it into a const context; otherwise it may get evaluated at runtime instead.
const TINYSTR_MACRO_CONST: $crate::TinyAsciiStr<$n> = {
match $crate::TinyAsciiStr::from_bytes($s.as_bytes()) {
Ok(s) => s,
// Cannot format the error since formatting isn't const yet
Err(_) => panic!(concat!("Failed to construct tinystr from ", $s)),
}
};
TINYSTR_MACRO_CONST
}};
}

#[cfg(test)]
mod tests {
#[test]
fn test_macro_construction() {
let s1 = tinystr!(8, "foobar");
assert_eq!(&*s1, "foobar");

let s1 = tinystr!(12, "foobarbaz");
assert_eq!(&*s1, "foobarbaz");
}
}
91 changes: 91 additions & 0 deletions experimental/tinystr_neo/src/serde.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use crate::TinyAsciiStr;
use alloc::borrow::Cow;
use alloc::string::ToString;
use core::fmt;
use core::marker::PhantomData;
use core::ops::Deref;
use serde::de::{Error, SeqAccess, Visitor};
use serde::ser::SerializeTuple;
use serde::{Deserialize, Deserializer, Serialize, Serializer};

impl<const N: usize> Serialize for TinyAsciiStr<N> {
#[inline]
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
if serializer.is_human_readable() {
self.deref().serialize(serializer)
} else {
let mut seq = serializer.serialize_tuple(N)?;
for byte in self.all_bytes() {
seq.serialize_element(byte)?;
}
seq.end()
}
}
}

struct TinyAsciiStrVisitor<const N: usize> {
marker: PhantomData<TinyAsciiStr<N>>,
}

impl<const N: usize> TinyAsciiStrVisitor<N> {
fn new() -> Self {
TinyAsciiStrVisitor {
marker: PhantomData,
}
}
}

impl<'de, const N: usize> Visitor<'de> for TinyAsciiStrVisitor<N> {
type Value = TinyAsciiStr<N>;

fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
write!(formatter, "a TinyAsciiStr<{}>", N)
}

#[inline]
fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
where
A: SeqAccess<'de>,
{
let mut bytes = [0u8; N];
let mut zeroes = false;
for out in &mut bytes.iter_mut().take(N) {
let byte = seq
.next_element()?
.ok_or_else(|| Error::invalid_length(N, &self))?;
if byte == 0 {
zeroes = true;
} else if zeroes {
return Err(Error::custom("TinyAsciiStr cannot contain null bytes"));
}

if byte >= 0x80 {
return Err(Error::custom("TinyAsciiStr cannot contain non-ascii bytes"));
}
*out = byte;
}

Ok(unsafe { TinyAsciiStr::from_bytes_unchecked(bytes) })
}
}

impl<'de, const N: usize> Deserialize<'de> for TinyAsciiStr<N> {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
if deserializer.is_human_readable() {
let x: Cow<'de, str> = Deserialize::deserialize(deserializer)?;
TinyAsciiStr::from_str(&x).map_err(|e| Error::custom(e.to_string()))
} else {
deserializer.deserialize_tuple(N, TinyAsciiStrVisitor::<N>::new())
}
}
}
39 changes: 39 additions & 0 deletions experimental/tinystr_neo/tests/serde.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use tinystr_neo::*;

// Tests largely adapted from `tinystr` crate
// https://github.com/zbraniecki/tinystr/blob/4e4eab55dd6bded7f29a18b41452c506c461716c/tests/serde.rs

macro_rules! test_roundtrip {
($f:ident, $n:literal, $val:expr) => {
#[test]
fn $f() {
let tiny: TinyAsciiStr<$n> = $val.parse().unwrap();
let json_string = serde_json::to_string(&tiny).unwrap();
let expected_json = concat!("\"", $val, "\"");
assert_eq!(json_string, expected_json);
let recover: TinyAsciiStr<$n> = serde_json::from_str(&json_string).unwrap();
assert_eq!(&*tiny, &*recover);

let bin = bincode::serialize(&tiny).unwrap();
assert_eq!(bin, &tiny.all_bytes()[..]);
let debin: TinyAsciiStr<$n> = bincode::deserialize(&bin).unwrap();
assert_eq!(&*tiny, &*debin);

let post = postcard::to_stdvec(&tiny).unwrap();
assert_eq!(post, &tiny.all_bytes()[..]);
let unpost: TinyAsciiStr<$n> = postcard::from_bytes(&post).unwrap();
assert_eq!(&*tiny, &*unpost);
}
};
}

test_roundtrip!(test_roundtrip4_1, 4, "en");
test_roundtrip!(test_roundtrip4_2, 4, "Latn");
test_roundtrip!(test_roundtrip8, 8, "calendar");
test_roundtrip!(test_roundtrip16, 16, "verylongstring");
test_roundtrip!(test_roundtrip10, 11, "shortstring");
test_roundtrip!(test_roundtrip30, 24, "veryveryverylongstring");

0 comments on commit 30a3f07

Please sign in to comment.