Skip to content

Commit f3cb489

Browse files
committed
Implement extract_bits_any() for 64-bit types
1 parent 8f86e52 commit f3cb489

File tree

3 files changed

+99
-0
lines changed

3 files changed

+99
-0
lines changed

simdpp/core/extract_bits.h

+11
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,17 @@ SIMDPP_INL uint8_t extract_bits_any(const any_vec32<8, V>& a)
6262
{
6363
return detail::insn::i_extract_bits_any(uint32<8>(a));
6464
}
65+
template<class V>
66+
SIMDPP_INL uint8_t extract_bits_any(const any_vec64<2, V>& a)
67+
{
68+
return detail::insn::i_extract_bits_any(uint64<2>(a));
69+
}
70+
template<class V>
71+
SIMDPP_INL uint8_t extract_bits_any(const any_vec64<4, V>& a)
72+
{
73+
return detail::insn::i_extract_bits_any(uint64<4>(a));
74+
}
75+
6576

6677
/** Extracts specific bit from each byte of each element of a int8x16 vector.
6778

simdpp/detail/insn/extract_bits.h

+53
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,59 @@ SIMDPP_INL uint8_t i_extract_bits_any(const uint32<8>& ca)
206206
#endif
207207
}
208208

209+
SIMDPP_INL uint8_t i_extract_bits_any(const uint64<2>& ca)
210+
{
211+
uint64<2> a = ca;
212+
#if SIMDPP_USE_NULL
213+
uint8_t r = 0;
214+
for (unsigned i = 0; i < a.length; i++) {
215+
uint8_t x = ca.el(i);
216+
x = x & 1;
217+
r = (r >> 1) | (x << 1);
218+
}
219+
return r;
220+
#elif SIMDPP_USE_SSE2
221+
return _mm_movemask_pd(_mm_castsi128_pd(a.native()));
222+
#elif SIMDPP_USE_NEON
223+
uint64<2> mask = make_uint(0x1, 0x2);
224+
a = bit_and(a, mask);
225+
uint64x1_t r = vadd_u64(vget_low_u64(r2), vget_high_u64(r2));
226+
return vget_lane_u8(vreinterpret_u8_u64(r), 0);
227+
#elif SIMDPP_USE_ALTIVEC
228+
uint32<4> mask = make_uint(0x1, 0x0, 0x2, 0x0);
229+
a = bit_and(a, mask);
230+
uint32<4> zero = make_zero();
231+
uint32<4> s = (int32x4)vec_sums((__vector int32_t)a.native(),
232+
(__vector int32_t)zero.native());
233+
#if SIMDPP_BIG_ENDIAN
234+
return extract<7>(uint16x8(s));
235+
#else
236+
return extract<6>(uint16x8(s));
237+
#endif
238+
#elif SIMDPP_USE_MSA
239+
uint32<4> mask = make_uint(0x1, 0x0, 0x2, 0x0);
240+
a = bit_and(a, mask);
241+
a = (v4u32) __msa_hadd_u_d(a.native(), a.native());
242+
a = bit_or(a, move4_l<2>(a));
243+
return extract<0>(a);
244+
#endif
245+
}
246+
247+
SIMDPP_INL uint8_t i_extract_bits_any(const uint64<4>& ca)
248+
{
249+
#if SIMDPP_USE_AVX512DQ
250+
return _mm256_movepi64_mask(ca.native());
251+
#elif SIMDPP_USE_AVX2
252+
return _mm256_movemask_pd(_mm256_castsi256_pd(ca.native()));
253+
#else
254+
// FIXME: can be improved
255+
uint64<2> lo, hi;
256+
lo = ca.vec<0>();
257+
hi = ca.vec<1>();
258+
return i_extract_bits_any(lo) | (i_extract_bits_any(hi) << 2);
259+
#endif
260+
}
261+
209262
template<unsigned id> SIMDPP_INL
210263
uint16_t i_extract_bits(const uint8<16>& ca)
211264
{

test/insn/bitwise.cc

+35
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,39 @@ void test_extract_bits32_n(TestResultsSet& tc)
106106
TEST_PUSH_ALL_COMB_OP1_T(tc, std::uint32_t, uint32<B/4>, extract_bits_any, s);
107107
}
108108

109+
template<unsigned B>
110+
void test_extract_bits64_n(TestResultsSet& tc)
111+
{
112+
using namespace simdpp;
113+
114+
TestData<uint64<B/8>> s;
115+
116+
s.add(make_uint(0x0000000000000000, 0x0000000000000000));
117+
s.add(make_uint(0xffffffffffffffff, 0x0000000000000000));
118+
s.add(make_uint(0x0000000000000000, 0xffffffffffffffff));
119+
s.add(make_uint(0xffffffffffffffff, 0xffffffffffffffff));
120+
121+
TEST_PUSH_ALL_COMB_OP1_T(tc, std::uint32_t, uint64<B/8>, extract_bits_any, s);
122+
}
123+
124+
template<unsigned B>
125+
void test_extract_bits64_n2(TestResultsSet& tc)
126+
{
127+
using namespace simdpp;
128+
129+
TestData<uint64<B/8>> s;
130+
131+
s.add(make_uint(0xffffffffffffffff, 0x0000000000000000, 0xffffffffffffffff, 0x0000000000000000));
132+
s.add(make_uint(0x0000000000000000, 0xffffffffffffffff, 0x0000000000000000, 0xffffffffffffffff));
133+
s.add(make_uint(0x0000000000000000, 0x0000000000000000, 0xffffffffffffffff, 0x0000000000000000));
134+
s.add(make_uint(0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xffffffffffffffff));
135+
s.add(make_uint(0x0000000000000000, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff));
136+
s.add(make_uint(0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff));
137+
s.add(make_uint(0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000));
138+
139+
TEST_PUSH_ALL_COMB_OP1_T(tc, std::uint32_t, uint64<B/8>, extract_bits_any, s);
140+
}
141+
109142
template<class V, class V32I>
110143
void test_popcnt_type(TestResultsSet& tc)
111144
{
@@ -231,6 +264,8 @@ void test_bitwise(TestResults& res, TestReporter& tr)
231264
test_extract_bits16_n<32>(tc);
232265
test_extract_bits32_n<16>(tc);
233266
test_extract_bits32_n<32>(tc);
267+
test_extract_bits64_n<16>(tc);
268+
test_extract_bits64_n2<32>(tc);
234269
}
235270

236271
} // namespace SIMDPP_ARCH_NAMESPACE

0 commit comments

Comments
 (0)