diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 8bfec7e4..83d0181e 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -23,7 +23,7 @@ jobs: with: profile: minimal # nightly can be very volatile--pin this to a version we know works well... - toolchain: nightly-2022-05-01 + toolchain: nightly-2022-10-12 override: true - name: Cargo Test uses: actions-rs/cargo@v1 diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 6e3d4bf8..a41fedc7 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -34,7 +34,11 @@ jobs: uses: actions-rs/toolchain@v1 with: profile: minimal - toolchain: stable + # We're temporarily testing against `beta` to have access to GATs. When Rust v1.65 is released in + # 2022-11, we should change this back to `stable`. + # See: https://github.com/amzn/ion-rust/issues/443 + toolchain: beta + components: rustfmt, clippy override: true - name: Cargo Build uses: actions-rs/cargo@v1 diff --git a/Cargo.toml b/Cargo.toml index f3001863..a3c561c2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,7 +38,6 @@ num-bigint = "0.3" num-integer = "0.1.44" num-traits = "0.2" arrayvec = "0.7" -hashlink = "0.8.1" smallvec = "1.9.0" [dev-dependencies] diff --git a/examples/read_all_values.rs b/examples/read_all_values.rs index d20ef267..ad3503b4 100644 --- a/examples/read_all_values.rs +++ b/examples/read_all_values.rs @@ -62,7 +62,7 @@ fn read_all_values(reader: &mut R) -> IonResult { match ion_type { Struct | List | SExpression => reader.step_in()?, String => { - let _text = reader.map_string(|_s| ())?; + reader.map_string(|_s| ())?; } Symbol => { let _symbol_id = reader.read_symbol()?; @@ -83,10 +83,10 @@ fn read_all_values(reader: &mut R) -> IonResult { let _boolean = reader.read_bool()?; } Blob => { - let _blob = reader.map_blob(|_b| ())?; + reader.map_blob(|_b| ())?; } Clob => { - let _clob = reader.map_clob(|_c| ())?; + reader.map_clob(|_c| ())?; } Null => {} } diff --git a/src/binary/int.rs b/src/binary/int.rs index bb7fce64..acc5d1ab 100644 --- a/src/binary/int.rs +++ b/src/binary/int.rs @@ -125,7 +125,7 @@ impl DecodedInt { let empty_leading_bytes: u32 = (magnitude.leading_zeros() - 1) >> 3; let first_occupied_byte = empty_leading_bytes as usize; - let mut magnitude_bytes: [u8; mem::size_of::()] = (magnitude as u64).to_be_bytes(); + let mut magnitude_bytes: [u8; mem::size_of::()] = magnitude.to_be_bytes(); let bytes_to_write: &mut [u8] = &mut magnitude_bytes[first_occupied_byte..]; if value < 0 { bytes_to_write[0] |= 0b1000_0000; diff --git a/src/binary/non_blocking/raw_binary_reader.rs b/src/binary/non_blocking/raw_binary_reader.rs index ba03ae71..5be7008a 100644 --- a/src/binary/non_blocking/raw_binary_reader.rs +++ b/src/binary/non_blocking/raw_binary_reader.rs @@ -86,7 +86,7 @@ struct EncodedValue { impl EncodedValue { /// Returns the offset of the current value's type descriptor byte. fn header_offset(&self) -> usize { - self.header_offset as usize + self.header_offset } /// Returns the length of this value's header, including the type descriptor byte and any @@ -734,7 +734,7 @@ impl> IonReader for RawBinaryBufferReader { let coefficient_size_in_bytes = encoded_value.value_length() - exponent_var_int.size_in_bytes(); - let exponent = exponent_var_int.value() as i64; + let exponent = exponent_var_int.value(); let coefficient = buffer.read_int(coefficient_size_in_bytes)?; if coefficient.is_negative_zero() { diff --git a/src/binary/raw_binary_reader.rs b/src/binary/raw_binary_reader.rs index d9351270..03eb003b 100644 --- a/src/binary/raw_binary_reader.rs +++ b/src/binary/raw_binary_reader.rs @@ -91,7 +91,7 @@ struct EncodedValue { impl EncodedValue { /// Returns the offset of the current value's type descriptor byte. fn header_offset(&self) -> usize { - self.header_offset as usize + self.header_offset } /// Returns the length of this value's header, including the type descriptor byte and any @@ -526,7 +526,7 @@ impl IonReader for RawBinaryReader { let coefficient_size_in_bytes = self.cursor.value.value_length - exponent_var_int.size_in_bytes(); - let exponent = exponent_var_int.value() as i64; + let exponent = exponent_var_int.value(); let coefficient = self.read_int(coefficient_size_in_bytes)?; if coefficient.is_negative_zero() { @@ -938,7 +938,7 @@ where #[inline(always)] fn read_var_int(&mut self) -> IonResult { let var_int = VarInt::read(&mut self.data_source)?; - self.cursor.bytes_read += var_int.size_in_bytes() as usize; + self.cursor.bytes_read += var_int.size_in_bytes(); Ok(var_int) } diff --git a/src/binary/var_uint.rs b/src/binary/var_uint.rs index d51d1a8e..c36f64f5 100644 --- a/src/binary/var_uint.rs +++ b/src/binary/var_uint.rs @@ -117,7 +117,7 @@ impl VarUInt { /// unsigned integer #[inline(always)] pub fn size_in_bytes(&self) -> usize { - self.size_in_bytes as usize + self.size_in_bytes } } diff --git a/src/symbol_ref.rs b/src/symbol_ref.rs index 65957e5d..eccdcd98 100644 --- a/src/symbol_ref.rs +++ b/src/symbol_ref.rs @@ -75,6 +75,21 @@ impl AsSymbolRef for Symbol { } } +impl AsSymbolRef for &Symbol { + fn as_symbol_ref(&self) -> SymbolRef { + self.text() + .map(SymbolRef::with_text) + .unwrap_or_else(SymbolRef::with_unknown_text) + } +} + +impl<'borrow, 'data> AsSymbolRef for &'borrow SymbolRef<'data> { + fn as_symbol_ref(&self) -> SymbolRef<'data> { + // This is essentially free; the only data inside is an Option<&str> + (*self).clone() + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/value/borrowed.rs b/src/value/borrowed.rs index e51b4741..d0415779 100644 --- a/src/value/borrowed.rs +++ b/src/value/borrowed.rs @@ -13,12 +13,15 @@ use crate::symbol_ref::{AsSymbolRef, SymbolRef}; use crate::types::decimal::Decimal; use crate::types::integer::Integer; use crate::types::timestamp::Timestamp; +use crate::value::iterators::{ + ElementRefIterator, FieldRefIterator, FieldValueRefsIterator, IndexVec, SymbolRefIterator, +}; use crate::value::Builder; use crate::IonType; -use hashlink::LinkedHashMap; use num_bigint::BigInt; -use smallvec::SmallVec; +use std::collections::HashMap; use std::iter::FromIterator; +use std::rc::Rc; impl<'a> IonSymbolToken for SymbolRef<'a> { fn text(&self) -> Option<&str> { @@ -137,12 +140,16 @@ impl<'val> Builder for ElementRef<'val> { /// A borrowed implementation of [`Sequence`]. #[derive(Debug, Clone)] pub struct SequenceRef<'val> { - children: Vec>, + // TODO: Since we've moved the elements Vec to the heap, we could consider replacing it with a + // SmallVec that can store some number of elements (4? 8?) inline. We'd need to benchmark. + children: Rc>>, } impl<'val> SequenceRef<'val> { pub fn new(children: Vec>) -> Self { - Self { children } + Self { + children: Rc::new(children), + } } } @@ -153,15 +160,18 @@ impl<'val> FromIterator> for SequenceRef<'val> { for elem in iter { children.push(elem); } - Self { children } + Self { + children: Rc::new(children), + } } } impl<'val> IonSequence for SequenceRef<'val> { type Element = ElementRef<'val>; + type ElementsIterator<'a> = ElementRefIterator<'a, 'val> where 'val: 'a; - fn iter<'a>(&'a self) -> Box + 'a> { - Box::new(self.children.iter()) + fn iter(&self) -> Self::ElementsIterator<'_> { + ElementRefIterator::new(&self.children) } fn get(&self, index: usize) -> Option<&Self::Element> { @@ -188,42 +198,33 @@ impl<'val> Eq for SequenceRef<'val> {} /// A borrowed implementation of [`Struct`] #[derive(Debug, Clone)] pub struct StructRef<'val> { - // A mapping of field name to any values associated with that name. - // If a field name is repeated, each value will be in the associated SmallVec. - // Since repeated field names are not common, we store the values in a SmallVec; - // the first value will be stored directly in the map while additional values will - // be stored elsewhere on the heap. - fields: LinkedHashMap, SmallVec<[ElementRef<'val>; 1]>>, - // `fields.len()` will only tell us the number of *distinct* field names. If the struct - // contains any repeated field names, it will be an under-count. Therefore, we track the number - // of fields separately. - number_of_fields: usize, + fields: Rc>, } impl<'val> StructRef<'val> { fn fields_eq(&self, other: &Self) -> bool { - // For each (field name, field value list) in `self`... - for (field_name, field_values) in &self.fields { - // ...get the corresponding field value list from `other`. - let other_values = match other.fields.get(field_name) { - // If there's no such list, they're not equal. + // For each field name in `self`, get the list of indexes that contain a value with that name. + for (field_name, field_value_indexes) in &self.fields.by_name { + let other_value_indexes = match other.fields.get_indexes(field_name) { + Some(indexes) => indexes, + // The other struct doesn't have a field with this name so they're not equal. None => return false, - Some(values) => values, }; - // If `other` has a corresponding list but it's a different length, they're not equal. - if field_values.len() != other_values.len() { + if field_value_indexes.len() != other_value_indexes.len() { + // The other struct has fields with the same name, but a different number of them. return false; } - // If any of the values in `self`'s value list are not also in `other`'s value list, - // they're not equal. - if field_values.iter().any(|value| { - other_values - .iter() - .all(|other_value| !value.ion_eq(other_value)) - }) { - return false; + for field_value in self.fields.get_values_at_indexes(field_value_indexes) { + if other + .fields + .get_values_at_indexes(other_value_indexes) + .all(|other_value| !field_value.ion_eq(other_value)) + { + // Couldn't find an equivalent field in the other struct + return false; + } } } @@ -233,7 +234,7 @@ impl<'val> StructRef<'val> { /// Returns the number of fields in this Struct. pub fn len(&self) -> usize { - self.number_of_fields + self.fields.by_index.len() } /// Returns `true` if this struct has zero fields. @@ -249,84 +250,126 @@ where { /// Returns a borrowed struct from the given iterator of field names/values. fn from_iter>(iter: I) -> Self { - let mut fields: LinkedHashMap, SmallVec<[ElementRef<'val>; 1]>> = - LinkedHashMap::new(); - let mut number_of_fields: usize = 0; + let mut by_index = Vec::new(); + let mut by_name = HashMap::new(); + for (field_name, field_value) in iter { + let field_name = field_name.into(); + let field_value = field_value.into(); + + by_name + .entry(field_name.clone()) + .or_insert_with(IndexVec::new) + .push(by_index.len()); + by_index.push((field_name, field_value)); + } - for (k, v) in iter { - let key = k.into(); - let val = v.into(); + let fields = Rc::new(FieldRefs { by_index, by_name }); + Self { fields } + } +} - fields.entry(key).or_insert_with(SmallVec::new).push(val); +// This collection is broken out into its own type to allow instances of it to be shared with Rc. +#[derive(Debug)] +struct FieldRefs<'a> { + // Key/value pairs in the order they were inserted + by_index: Vec<(SymbolRef<'a>, ElementRef<'a>)>, + // Maps symbols to a list of indexes where values may be found in `by_index` above + by_name: HashMap, IndexVec>, +} - number_of_fields += 1; +impl<'data> FieldRefs<'data> { + /// Gets all of the indexes that contain a value associated with the given field name. + fn get_indexes(&self, field_name: A) -> Option<&IndexVec> { + field_name + .as_symbol_ref() + .text() + .map(|text| { + // If the symbol has defined text, look it up by &str + self.by_name.get(text) + }) + .unwrap_or_else(|| { + // Otherwise, construct a SymbolRef with unknown text... + let symbol = SymbolRef::with_unknown_text(); + // ...and use the unknown text symbol to look up matching field values + self.by_name.get(&symbol) + }) + } + + /// Iterates over the values found at the specified indexes. + fn get_values_at_indexes<'iter>( + &'iter self, + indexes: &'iter IndexVec, + ) -> FieldValueRefsIterator<'iter, 'data> { + FieldValueRefsIterator { + current: 0, + indexes: Some(indexes), + fields: &self.by_index, } + } - Self { - fields, - number_of_fields, + /// Gets the last value in the Struct that is associated with the specified field name. + /// + /// Note that the Ion data model views a struct as a bag of (name, value) pairs and does not + /// have a notion of field ordering. In most use cases, field names are distinct and the last + /// appearance of a field in the struct's serialized form will have been the _only_ appearance. + /// If a field name appears more than once, this method makes the arbitrary decision to return + /// the value associated with the last appearance. If your application uses structs that repeat + /// field names, you are encouraged to use [get_all] instead. + fn get_last<'iter, A: AsSymbolRef>( + &'iter self, + field_name: A, + ) -> Option<&'iter ElementRef<'data>> { + self.get_indexes(field_name) + .and_then(|indexes| indexes.last()) + .and_then(|index| self.by_index.get(*index)) + .map(|(_name, value)| value) + } + + /// Iterates over all of the values associated with the given field name. + fn get_all<'iter, A: AsSymbolRef>( + &'iter self, + field_name: A, + ) -> FieldValueRefsIterator<'iter, 'data> { + let indexes = self.get_indexes(field_name); + FieldValueRefsIterator { + current: 0, + indexes, + fields: &self.by_index, } } + + /// Iterates over all of the (field name, field value) pairs in the struct. + fn iter<'iter>( + &'iter self, + ) -> impl Iterator, ElementRef<'data>)> { + self.by_index.iter() + } } impl<'val> IonStruct for StructRef<'val> { type FieldName = SymbolRef<'val>; type Element = ElementRef<'val>; + type FieldsIterator<'a> = FieldRefIterator<'a, 'val> where 'val: 'a; + type ValuesIterator<'a> = FieldValueRefsIterator<'a, 'val> where 'val: 'a; - fn iter<'a>( - &'a self, - ) -> Box + 'a> { + fn iter<'a>(&'a self) -> FieldRefIterator<'a, 'val> { // flattens the fields map - Box::new( - self.fields - .iter() - .flat_map(|(name, values)| values.iter().map(move |value| (name, value))), - ) + FieldRefIterator::new(&self.fields.by_index) } fn get(&self, field_name: T) -> Option<&Self::Element> { - match field_name.as_symbol_ref().text() { - None => { - // Build a cheap, stack-allocated `SymbolRef` that represents unknown text - let symbol = SymbolRef::with_unknown_text(); - // Use the unknown text symbol to look up matching field values - self.fields.get(&symbol)?.last() - } - Some(text) => { - // Otherwise, look it up by text - self.fields.get(text)?.last() - } - } + self.fields.get_last(field_name) } - fn get_all<'a, T: AsSymbolRef>( - &'a self, - field_name: T, - ) -> Box + 'a> { - let values = match field_name.as_symbol_ref().text() { - None => { - // Build a cheap, stack-allocated `SymbolRef` that represents unknown text - let symbol = SymbolRef::with_unknown_text(); - // Use the unknown text symbol to look up matching field values - self.fields.get(&symbol) - } - Some(text) => { - // Otherwise, look it up by text - self.fields.get(text) - } - }; - - match values { - None => Box::new(std::iter::empty()), - Some(values) => Box::new(values.iter()), - } + fn get_all<'a, T: AsSymbolRef>(&'a self, field_name: T) -> FieldValueRefsIterator<'a, 'val> { + self.fields.get_all(field_name) } } impl<'val> PartialEq for StructRef<'val> { fn eq(&self, other: &Self) -> bool { // check if both fields have same length - self.fields.len() == other.fields.len() + self.len() == other.len() // we need to test equality in both directions for both fields // A good example for this is annotated vs not annotated values in struct // { a:4, a:4 } vs. { a:4, a:a::4 } // returns true @@ -493,6 +536,7 @@ impl<'val> IonElement for ElementRef<'val> { type Sequence = SequenceRef<'val>; type Struct = StructRef<'val>; type Builder = ElementRef<'val>; + type AnnotationsIterator<'a> = SymbolRefIterator<'a, 'val> where 'val: 'a; fn ion_type(&self) -> IonType { use ValueRef::*; @@ -513,8 +557,8 @@ impl<'val> IonElement for ElementRef<'val> { } } - fn annotations<'a>(&'a self) -> Box + 'a> { - Box::new(self.annotations.iter()) + fn annotations<'a>(&'a self) -> SymbolRefIterator<'a, 'val> { + SymbolRefIterator::new(&self.annotations) } fn with_annotations>(self, annotations: I) -> Self { diff --git a/src/value/element_stream_reader.rs b/src/value/element_stream_reader.rs index 8d159270..d3eb5070 100644 --- a/src/value/element_stream_reader.rs +++ b/src/value/element_stream_reader.rs @@ -1,5 +1,6 @@ use crate::result::{decoding_error, illegal_operation, illegal_operation_raw}; use crate::text::parent_container::ParentContainer; +use crate::value::iterators::SymbolsIterator; use crate::value::owned::Element; use crate::value::{IonElement, IonSequence, IonStruct}; use crate::{ @@ -78,7 +79,7 @@ impl ElementStreamReader { // If the parent is not empty that means we are inside a container // Get the next value of the container using the iterator let next_item = self.current_iter.next(); - if next_item == None { + if next_item.is_none() { // If there are no next values left within the iterator // then early return self.current_value = None; @@ -176,7 +177,7 @@ impl IonReader for ElementStreamReader { .current_value .as_ref() .map(|value| value.annotations()) - .unwrap_or_else(|| Box::new(std::iter::empty())) + .unwrap_or_else(|| SymbolsIterator::empty()) .cloned() // The annotations are already in memory and are already resolved to text, so // this step cannot fail. Map each token to Ok(token). diff --git a/src/value/iterators.rs b/src/value/iterators.rs new file mode 100644 index 00000000..088bc227 --- /dev/null +++ b/src/value/iterators.rs @@ -0,0 +1,193 @@ +use crate::value::borrowed::ElementRef; +use crate::value::owned::Element; +use crate::{Symbol, SymbolRef}; +use smallvec::SmallVec; +// This macro defines a new iterator type for a given `Iterator => Item` type name pair. +// +// The implementation produced can be used to iterate over any `Vec` or `&[Item]`. +macro_rules! create_new_slice_iterator_type { + ($($iterator_name:ident => $item_name:ty),+) => ($( + // Define a new type called '$iterator_name' that's a thin wrapper around a slice iterator. + // We wrap the slice iterator in an `Option` so we can provide the functionality of an empty + // iterator without requiring that an empty Vec or slice be provided. This sidesteps some + // hairy lifetime issues. + pub struct $iterator_name<'a> { + values: Option> + } + + impl<'a> $iterator_name<'a> { + // Define a constructor that takes a slice of `Item` + pub(crate) fn new(data: &'a [$item_name]) -> Self { + $iterator_name { + values: Some(data.iter()), + } + } + + // Define a constructor that takes no parameters and returns an empty iterator + pub(crate) fn empty() -> $iterator_name<'static> { + $iterator_name { values: None } + } + } + + // Implement the Iterator trait for our new type + impl<'a> Iterator for $iterator_name<'a> { + type Item = &'a $item_name; + + fn next(&mut self) -> Option { + self.values.as_mut().and_then(|iter| iter.next()) + } + } + )*) +} +create_new_slice_iterator_type!( + // Used for iterating over an Element's annotations + SymbolsIterator => Symbol, + // Used for iterating over a Sequence's Elements + ElementsIterator => Element +); + +// Like the `create_new_slice_iterator_type` macro defined above, but works for Ref types that have +// an additional lifetime. +macro_rules! create_new_ref_slice_iterator_type { + ($($iterator_name:ident => $item_name:ident),+) => ($( + // Define a new type called '$iterator_name' that's a thin wrapper around a slice iterator. + // We wrap the slice iterator in an `Option` so we can provide the functionality of an empty + // iterator without requiring that an empty Vec or slice be provided. This sidesteps some + // hairy lifetime issues. + pub struct $iterator_name<'iter, 'data: 'iter> { + values: Option >> + } + + impl<'iter, 'data: 'iter> $iterator_name<'iter, 'data> { + // Define a constructor that takes a slice of `Item` + pub(crate) fn new(data: &'iter [$item_name<'data>]) -> $iterator_name<'iter, 'data> { + $iterator_name { + values: Some(data.iter()), + } + } + + // Define a constructor that takes no parameters and returns an empty iterator + pub(crate) fn empty() -> $iterator_name<'static, 'static> { + $iterator_name { values: None } + } + } + + // Implement the Iterator trait for our new type + impl<'iter, 'data: 'iter> Iterator for $iterator_name<'iter, 'data> { + type Item = &'iter $item_name<'data>; + + fn next(&mut self) -> Option { + self.values.as_mut().and_then(|iter| iter.next()) + } + } + )*) +} + +create_new_ref_slice_iterator_type!( + // Used for iterating over an ElementRef's annotations + SymbolRefIterator => SymbolRef, + // Used for iterating over a SequenceRef's ElementRefs + ElementRefIterator => ElementRef +); + +// A convenient type alias for a vector capable of storing a single `usize` inline +// without heap allocation. This type should not be used in public interfaces directly. +pub(crate) type IndexVec = SmallVec<[usize; 1]>; + +/// Iterates over the (field name, field value) pairs in a Struct. +pub struct FieldIterator<'a> { + values: Option>, +} + +impl<'a> FieldIterator<'a> { + pub(crate) fn new(data: &'a [(Symbol, Element)]) -> Self { + FieldIterator { + values: Some(data.iter()), + } + } + + pub(crate) fn empty() -> FieldIterator<'static> { + FieldIterator { values: None } + } +} + +impl<'a> Iterator for FieldIterator<'a> { + type Item = (&'a Symbol, &'a Element); + + fn next(&mut self) -> Option { + self.values + .as_mut() + // Get the next &(name, value) and convert it to (&name, &value) + .and_then(|iter| iter.next().map(|field| (&field.0, &field.1))) + } +} + +/// Iterates over the values associated with a given field name in a Struct. +pub struct FieldValuesIterator<'a> { + pub(super) current: usize, + pub(super) indexes: Option<&'a IndexVec>, + pub(super) by_index: &'a Vec<(Symbol, Element)>, +} + +impl<'a> Iterator for FieldValuesIterator<'a> { + type Item = &'a Element; + + fn next(&mut self) -> Option { + self.indexes + .and_then(|i| i.get(self.current)) + .and_then(|i| { + self.current += 1; + self.by_index.get(*i) + }) + .map(|(_name, value)| value) + } +} + +/// Iterates over the (field name, field value) ref pairs in a StructRef. +pub struct FieldRefIterator<'iter, 'data> { + values: Option, ElementRef<'data>)>>, +} + +impl<'iter, 'data> FieldRefIterator<'iter, 'data> { + pub(crate) fn new(data: &'iter [(SymbolRef<'data>, ElementRef<'data>)]) -> Self { + FieldRefIterator { + values: Some(data.iter()), + } + } + + pub(crate) fn empty() -> FieldRefIterator<'static, 'static> { + FieldRefIterator { values: None } + } +} + +impl<'iter, 'data> Iterator for FieldRefIterator<'iter, 'data> { + type Item = (&'iter SymbolRef<'data>, &'iter ElementRef<'data>); + + fn next(&mut self) -> Option { + self.values + .as_mut() + // Get the next &(name, value) and convert it to (&name, &value) + .and_then(|iter| iter.next().map(|field| (&field.0, &field.1))) + } +} + +/// Iterates over the value refs associated with a given field name in a StructRef. +pub struct FieldValueRefsIterator<'iter, 'data> { + pub(super) current: usize, + pub(super) indexes: Option<&'iter IndexVec>, + pub(super) fields: &'iter Vec<(SymbolRef<'data>, ElementRef<'data>)>, +} + +impl<'iter, 'data> Iterator for FieldValueRefsIterator<'iter, 'data> { + type Item = &'iter ElementRef<'data>; + + fn next(&mut self) -> Option { + self.indexes + .and_then(|i| i.get(self.current)) + .and_then(|i| { + self.current += 1; + self.fields.get(*i) + }) + .map(|(_name, value)| value) + } +} diff --git a/src/value/mod.rs b/src/value/mod.rs index a1d0ddef..a075470a 100644 --- a/src/value/mod.rs +++ b/src/value/mod.rs @@ -214,6 +214,7 @@ use std::fmt::Debug; pub mod borrowed; mod element_stream_reader; +mod iterators; pub mod native_reader; pub mod native_writer; pub mod owned; @@ -314,6 +315,9 @@ where + Debug + PartialEq; type Builder: Builder + ?Sized; + type AnnotationsIterator<'a>: Iterator + where + Self: 'a; /// The type of data this element represents. fn ion_type(&self) -> IonType; @@ -351,13 +355,7 @@ where /// assert!(&borrowed_elem.has_annotation("a")); /// assert!(!&borrowed_elem.has_annotation("d")); /// ``` - /// - /// Note that this uses a `Box>` to capture the borrow cleanly without - /// without [generic associated types (GAT)][gat]. In theory, when GAT lands, this could - /// be replaced with static polymorphism. - /// - /// [gat]: https://rust-lang.github.io/rfcs/1598-generic_associated_types.html - fn annotations<'a>(&'a self) -> Box + 'a>; + fn annotations(&self) -> Self::AnnotationsIterator<'_>; /// Return an `Element` with given annotations fn with_annotations>(self, annotations: I) -> Self; @@ -432,15 +430,12 @@ where /// Represents the _value_ of sequences of Ion elements (i.e. `list` and `sexp`). pub trait IonSequence: Debug + PartialEq { type Element: IonElement + ?Sized; + type ElementsIterator<'a>: Iterator + where + Self: 'a; /// The children of the sequence. - /// - /// Note that this uses a `Box>` to capture the borrow cleanly without - /// without [generic associated types (GAT)][gat]. In theory, when GAT lands, this could - /// be replaced with static polymorphism. - /// - /// [gat]: https://rust-lang.github.io/rfcs/1598-generic_associated_types.html - fn iter<'a>(&'a self) -> Box + 'a>; + fn iter(&self) -> Self::ElementsIterator<'_>; /// Returns a reference to the element in the sequence at the given index or /// returns `None` if the index is out of the bounds. @@ -457,17 +452,15 @@ pub trait IonSequence: Debug + PartialEq { pub trait IonStruct: Debug + PartialEq { type FieldName: IonSymbolToken + ?Sized; type Element: IonElement + ?Sized; + type FieldsIterator<'a>: Iterator + where + Self: 'a; + type ValuesIterator<'a>: Iterator + where + Self: 'a; /// The fields of the structure. - /// - /// Note that this uses a `Box>` to capture the borrow cleanly without - /// without [generic associated types (GAT)][gat]. In theory, when GAT lands, this could - /// be replaced with static polymorphism. - /// - /// [gat]: https://rust-lang.github.io/rfcs/1598-generic_associated_types.html - fn iter<'a>( - &'a self, - ) -> Box + 'a>; + fn iter(&self) -> Self::FieldsIterator<'_>; /// Returns the last value corresponding to the field_name in the struct or /// returns `None` if the field_name does not exist in the struct @@ -523,10 +516,7 @@ pub trait IonStruct: Debug + PartialEq { /// owned.get_all("d").flat_map(|e| e.as_str()).collect::>() /// ); /// ``` - fn get_all<'a, T: AsSymbolRef>( - &'a self, - field_name: T, - ) -> Box + 'a>; + fn get_all(&self, field_name: T) -> Self::ValuesIterator<'_>; } pub trait Builder { diff --git a/src/value/owned.rs b/src/value/owned.rs index 1a3bce19..b2cca516 100644 --- a/src/value/owned.rs +++ b/src/value/owned.rs @@ -14,13 +14,16 @@ use crate::types::timestamp::Timestamp; use crate::types::SymbolId; use crate::value::Builder; use crate::{IonType, Symbol}; -use hashlink::LinkedHashMap; use num_bigint::BigInt; +use std::collections::HashMap; use std::fmt::{Display, Formatter}; use std::iter::FromIterator; +use std::rc::Rc; use crate::symbol_ref::AsSymbolRef; -use smallvec::SmallVec; +use crate::value::iterators::{ + ElementsIterator, FieldIterator, FieldValuesIterator, IndexVec, SymbolsIterator, +}; impl IonSymbolToken for Symbol { fn text(&self) -> Option<&str> { @@ -139,12 +142,16 @@ impl Builder for Element { /// An owned implementation of [`Sequence`] #[derive(Debug, Clone)] pub struct Sequence { - children: Vec, + // TODO: Since we've moved the elements Vec to the heap, we could consider replacing it with a + // SmallVec that can store some number of elements (4? 8?) inline. We'd need to benchmark. + children: Rc>, } impl Sequence { pub fn new(children: Vec) -> Self { - Self { children } + Self { + children: Rc::new(children), + } } } @@ -155,15 +162,18 @@ impl FromIterator for Sequence { for elem in iter { children.push(elem); } - Self { children } + Self { + children: Rc::new(children), + } } } impl IonSequence for Sequence { type Element = Element; + type ElementsIterator<'a> = ElementsIterator<'a>; - fn iter<'a>(&'a self) -> Box + 'a> { - Box::new(self.children.iter()) + fn iter(&self) -> Self::ElementsIterator<'_> { + ElementsIterator::new(&self.children) } fn get(&self, index: usize) -> Option<&Self::Element> { @@ -187,19 +197,77 @@ impl PartialEq for Sequence { impl Eq for Sequence {} +// This collection is broken out into its own type to allow instances of it to be shared with Rc. +#[derive(Debug)] +struct Fields { + // Key/value pairs in the order they were inserted + by_index: Vec<(Symbol, Element)>, + // Maps symbols to a list of indexes where values may be found in `by_index` above + by_name: HashMap, +} + +impl Fields { + /// Gets all of the indexes that contain a value associated with the given field name. + fn get_indexes(&self, field_name: A) -> Option<&IndexVec> { + field_name + .as_symbol_ref() + .text() + .map(|text| { + // If the symbol has defined text, look it up by &str + self.by_name.get(text) + }) + .unwrap_or_else(|| { + // Otherwise, construct a (cheap, stack-allocated) Symbol with unknown text... + let symbol = Symbol::unknown_text(); + // ...and use the unknown text symbol to look up matching field values + self.by_name.get(&symbol) + }) + } + + /// Iterates over the values found at the specified indexes. + fn get_values_at_indexes<'a>(&'a self, indexes: &'a IndexVec) -> FieldValuesIterator<'a> { + FieldValuesIterator { + current: 0, + indexes: Some(indexes), + by_index: &self.by_index, + } + } + + /// Gets the last value in the Struct that is associated with the specified field name. + /// + /// Note that the Ion data model views a struct as a bag of (name, value) pairs and does not + /// have a notion of field ordering. In most use cases, field names are distinct and the last + /// appearance of a field in the struct's serialized form will have been the _only_ appearance. + /// If a field name appears more than once, this method makes the arbitrary decision to return + /// the value associated with the last appearance. If your application uses structs that repeat + /// field names, you are encouraged to use [get_all] instead. + fn get_last(&self, field_name: A) -> Option<&Element> { + self.get_indexes(field_name) + .and_then(|indexes| indexes.last()) + .and_then(|index| self.by_index.get(*index)) + .map(|(_name, value)| value) + } + + /// Iterates over all of the values associated with the given field name. + fn get_all(&self, field_name: A) -> FieldValuesIterator { + let indexes = self.get_indexes(field_name); + FieldValuesIterator { + current: 0, + indexes, + by_index: &self.by_index, + } + } + + /// Iterates over all of the (field name, field value) pairs in the struct. + fn iter(&self) -> impl Iterator { + self.by_index.iter() + } +} + /// An owned implementation of [`IonStruct`] #[derive(Debug, Clone)] pub struct Struct { - // A mapping of field name to any values associated with that name. - // If a field name is repeated, each value will be in the associated SmallVec. - // Since repeated field names are not common, we store the values in a SmallVec; - // the first value will be stored directly in the map while additional values will - // be stored elsewhere on the heap. - fields: LinkedHashMap>, - // `fields.len()` will only tell us the number of *distinct* field names. If the struct - // contains any repeated field names, it will be an under-count. Therefore, we track the number - // of fields separately. - number_of_fields: usize, + fields: Rc, } impl Struct { @@ -207,32 +275,35 @@ impl Struct { pub fn fields(&self) -> impl Iterator { self.fields .iter() - .flat_map(|(name, values)| values.iter().map(move |value| (name, value))) + // Here we convert from &(name, value) to (&name, &value). + // The former makes a stronger assertion about how the data is being stored. We don't + // want that to be a mandatory part of the public API. + .map(|(name, element)| (name, element)) } fn fields_eq(&self, other: &Self) -> bool { - // For each (field name, field value list) in `self`... - for (field_name, field_values) in &self.fields { - // ...get the corresponding field value list from `other`. - let other_values = match other.fields.get(field_name) { - // If there's no such list, they're not equal. + // For each field name in `self`, get the list of indexes that contain a value with that name. + for (field_name, field_value_indexes) in &self.fields.by_name { + let other_value_indexes = match other.fields.get_indexes(field_name) { + Some(indexes) => indexes, + // The other struct doesn't have a field with this name so they're not equal. None => return false, - Some(values) => values, }; - // If `other` has a corresponding list but it's a different length, they're not equal. - if field_values.len() != other_values.len() { + if field_value_indexes.len() != other_value_indexes.len() { + // The other struct has fields with the same name, but a different number of them. return false; } - // If any of the values in `self`'s value list are not also in `other`'s value list, - // they're not equal. - if field_values.iter().any(|value| { - other_values - .iter() - .all(|other_value| !value.ion_eq(other_value)) - }) { - return false; + for field_value in self.fields.get_values_at_indexes(field_value_indexes) { + if other + .fields + .get_values_at_indexes(other_value_indexes) + .all(|other_value| !field_value.ion_eq(other_value)) + { + // Couldn't find an equivalent field in the other struct + return false; + } } } @@ -242,7 +313,7 @@ impl Struct { /// Returns the number of fields in this Struct. pub fn len(&self) -> usize { - self.number_of_fields + self.fields.by_index.len() } /// Returns `true` if this struct has zero fields. @@ -258,76 +329,40 @@ where { /// Returns an owned struct from the given iterator of field names/values. fn from_iter>(iter: I) -> Self { - let mut fields: LinkedHashMap> = LinkedHashMap::new(); - let mut number_of_fields: usize = 0; - + let mut by_index: Vec<(Symbol, Element)> = Vec::new(); + let mut by_name: HashMap = HashMap::new(); for (field_name, field_value) in iter { let field_name = field_name.into(); let field_value = field_value.into(); - fields - .entry(field_name) - .or_insert_with(SmallVec::new) - .push(field_value); - - number_of_fields += 1; + by_name + .entry(field_name.clone()) + .or_insert_with(IndexVec::new) + .push(by_index.len()); + by_index.push((field_name, field_value)); } - Self { - fields, - number_of_fields, - } + let fields = Rc::new(Fields { by_index, by_name }); + Self { fields } } } impl IonStruct for Struct { type FieldName = Symbol; type Element = Element; + type FieldsIterator<'a> = FieldIterator<'a>; + type ValuesIterator<'a> = FieldValuesIterator<'a>; - fn iter<'a>( - &'a self, - ) -> Box + 'a> { - Box::new(self.fields()) + fn iter(&self) -> FieldIterator<'_> { + FieldIterator::new(&self.fields.by_index) } fn get(&self, field_name: A) -> Option<&Self::Element> { - match field_name.as_symbol_ref().text() { - None => { - // Build a cheap, stack-allocated `Symbol` that represents unknown text - let symbol = Symbol::unknown_text(); - // Use the unknown text symbol to look up matching field values - self.fields.get(&symbol)?.last() - } - Some(text) => { - // Otherwise, look it up by text - self.fields.get(text)?.last() - } - } + self.fields.get_last(field_name) } - fn get_all<'a, A: AsSymbolRef>( - &'a self, - field_name: A, - ) -> Box + 'a> { - // TODO: This method has to box its return value, which is why we're not re-using it - // for the implementation of the `get` method. Once we remove the `Box` - // (via GATs or RPITIT?) we can have `get` call `get_all`. - let values = match field_name.as_symbol_ref().text() { - None => { - // Build a cheap, stack-allocated `Symbol` that represents unknown text - let symbol = Symbol::unknown_text(); - self.fields.get(&symbol) - } - Some(text) => { - // Otherwise, look it up by text - self.fields.get(text) - } - }; - - match values { - Some(values) => Box::new(values.iter()), - None => Box::new(None.iter()), // An empty iterator - } + fn get_all(&self, field_name: A) -> FieldValuesIterator<'_> { + self.fields.get_all(field_name) } } @@ -521,11 +556,39 @@ impl From for Element { } } +// TODO: explain motivation (static Rc no good) +// TODO: better name? +// Wraps a standard library slice iterator in an `Option`. +struct ElementDataIterator<'a, T: 'a> { + values: Option>, +} + +impl<'a, T: 'a> ElementDataIterator<'a, T> { + fn new<'f, V: 'f>(data: &'f [V]) -> ElementDataIterator<'f, V> { + ElementDataIterator { + values: Some(data.iter()), + } + } + + fn empty() -> Self { + ElementDataIterator { values: None } + } +} + +impl<'a, T: 'a> Iterator for ElementDataIterator<'a, T> { + type Item = &'a T; + + fn next(&mut self) -> Option { + self.values.as_mut().and_then(|iter| iter.next()) + } +} + impl IonElement for Element { type SymbolToken = Symbol; type Sequence = Sequence; type Struct = Struct; type Builder = Element; + type AnnotationsIterator<'a> = SymbolsIterator<'a>; fn ion_type(&self) -> IonType { use Value::*; @@ -547,8 +610,8 @@ impl IonElement for Element { } } - fn annotations<'a>(&'a self) -> Box + 'a> { - Box::new(self.annotations.iter()) + fn annotations(&self) -> Self::AnnotationsIterator<'_> { + SymbolsIterator::new(&self.annotations) } fn with_annotations>(self, annotations: I) -> Self { diff --git a/tests/element_test_vectors.rs b/tests/element_test_vectors.rs index c82a38b2..ba2e7e02 100644 --- a/tests/element_test_vectors.rs +++ b/tests/element_test_vectors.rs @@ -26,7 +26,7 @@ fn contains_path(paths: &[&str], file_name: &str) -> bool { paths .iter() // TODO construct the paths in a not so hacky way - .map(|p| p.replace("/", &PATH_SEPARATOR.to_string())) + .map(|p| p.replace('/', &PATH_SEPARATOR.to_string())) .any(|p| p == file_name) } @@ -110,7 +110,7 @@ trait ElementApi { source_elements.ion_eq(&new_elements), "Roundtrip via {:?} failed: {}", format, - Self::not_eq_error_message(&source_elements, &new_elements) + Self::not_eq_error_message(source_elements, &new_elements) ); Ok(new_elements) }