Skip to content

Commit 8f86e52

Browse files
committed
Optimize extract() for float32<4> on SSE2
1 parent 97283e5 commit 8f86e52

File tree

1 file changed

+10
-1
lines changed

1 file changed

+10
-1
lines changed

simdpp/detail/insn/extract.h

+10-1
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,16 @@ float i_extract(const float32<4>& a)
350350
#if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP
351351
return a.el(id);
352352
#elif SIMDPP_USE_SSE2
353-
return _mm_cvtss_f32(_mm_shuffle_ps(a.native(), a.native(), _MM_SHUFFLE(id, id, id, id)));
353+
switch (id) {
354+
case 0: return _mm_cvtss_f32(a.native());
355+
#if SIMDPP_USE_SSE3
356+
case 1: return _mm_cvtss_f32(_mm_movehdup_ps(a.native()));
357+
#else
358+
case 1: return _mm_cvtss_f32(_mm_shuffle_ps(a.native(), a.native(), _MM_SHUFFLE(id, id, id, id)));
359+
#endif
360+
case 2: return _mm_cvtss_f32(_mm_unpackhi_ps(a.native(), a.native()));
361+
case 3: return _mm_cvtss_f32(_mm_shuffle_ps(a.native(), a.native(), _MM_SHUFFLE(id, id, id, id)));
362+
}
354363
#elif SIMDPP_USE_NEON
355364
return vgetq_lane_f32(a.native(), id);
356365
#elif SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA

0 commit comments

Comments
 (0)