From 52ca23424c28506b01d9b47ad6a42f95d553cedc Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sat, 15 Feb 2020 13:04:30 -0500 Subject: [PATCH 1/2] Add benches for op with scalar and strided array --- benches/bench1.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/benches/bench1.rs b/benches/bench1.rs index 35a1d6e7e..291a25e97 100644 --- a/benches/bench1.rs +++ b/benches/bench1.rs @@ -431,6 +431,22 @@ fn scalar_add_2(bench: &mut test::Bencher) { bench.iter(|| n + &a); } +#[bench] +fn scalar_add_strided_1(bench: &mut test::Bencher) { + let a = + Array::from_shape_fn((64, 64 * 2), |(i, j)| (i * 64 + j) as f32).slice_move(s![.., ..;2]); + let n = 1.; + bench.iter(|| &a + n); +} + +#[bench] +fn scalar_add_strided_2(bench: &mut test::Bencher) { + let a = + Array::from_shape_fn((64, 64 * 2), |(i, j)| (i * 64 + j) as f32).slice_move(s![.., ..;2]); + let n = 1.; + bench.iter(|| n + &a); +} + #[bench] fn scalar_sub_1(bench: &mut test::Bencher) { let a = Array::::zeros((64, 64)); From dcf38e8bb56141abe7b021a05f307ada952612b7 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sat, 15 Feb 2020 13:21:23 -0500 Subject: [PATCH 2/2] FEAT: Performance improvement for &Array + scalar and scalar + &Array * The new implementation avoids cloning the elements twice, and it avoids iterating over the elements twice. (The old implementation called `.to_owned()` followed by the arithmetic operation, while the new implementation clones the elements and performs the arithmetic operation in the same iteration.) On my machine, this change improves the performance for both contiguous and discontiguous arrays. (`scalar_add_1/2` go from ~530 ns/iter to ~380 ns/iter, and `scalar_add_strided_1/2` go from ~1540 ns/iter to ~1420 ns/iter.) (Other changes to impl applicability removed from this commit.) Co-authored-by: bluss --- src/impl_ops.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/impl_ops.rs b/src/impl_ops.rs index 4804356e8..51d432ee6 100644 --- a/src/impl_ops.rs +++ b/src/impl_ops.rs @@ -159,8 +159,8 @@ impl<'a, A, S, D, B> $trt for &'a ArrayBase B: ScalarOperand, { type Output = Array; - fn $mth(self, x: B) -> Array { - self.to_owned().$mth(x) + fn $mth(self, x: B) -> Self::Output { + self.map(move |elt| elt.clone() $operator x.clone()) } } ); @@ -210,11 +210,11 @@ impl<'a, S, D> $trt<&'a ArrayBase> for $scalar D: Dimension, { type Output = Array<$scalar, D>; - fn $mth(self, rhs: &ArrayBase) -> Array<$scalar, D> { + fn $mth(self, rhs: &ArrayBase) -> Self::Output { if_commutative!($commutative { rhs.$mth(self) } or { - self.$mth(rhs.to_owned()) + rhs.map(move |elt| self.clone() $operator elt.clone()) }) } }