Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ipc4: mixin: Improve HiFi3 mix with gain impl #9810

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 44 additions & 102 deletions src/audio/mixin_mixout/mixin_mixout_hifi3.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ static void mix_s16(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe
ae_int16x4 out_sample;
ae_int16x4 *in;
ae_int16x4 *out;
ae_valign inu = AE_ZALIGN64();
ae_valign outu1 = AE_ZALIGN64();
ae_valign inu;
ae_valign outu1;
ae_valign outu2 = AE_ZALIGN64();
/* cir_buf_wrap() is required and is done below in a loop */
ae_int16 *dst = (ae_int16 *)sink->ptr + start_sample;
Expand Down Expand Up @@ -108,16 +108,19 @@ static void mix_s16_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t
ae_int16x4 out_sample;
ae_int16x4 *in;
ae_int16x4 *out;
ae_valign inu = AE_ZALIGN64();
ae_valign outu1 = AE_ZALIGN64();
ae_valign inu;
ae_valign outu1;
ae_valign outu2 = AE_ZALIGN64();
/* cir_buf_wrap() is required and is done below in a loop */
ae_int16 *dst = (ae_int16 *)sink->ptr + start_sample;
ae_int16 *src = source->ptr;
ae_int16x4 gain_vec;
ae_int32x2 tmpl, tmph;
ae_f16x4 gain_vec;

/* this func does not support unity gain as 1 cannot be represented as Q1.15 value */
assert(gain < IPC4_MIXIN_UNITY_GAIN);

gain_vec = AE_L16_I((ae_int16 *)&gain, 0);
gain_vec = AE_SLAI16S(gain_vec, 5); /* convert to Q1.15 */

assert(mixed_samples >= start_sample);
samples_to_mix = AE_MIN_32_signed(mixed_samples - start_sample, sample_count);
Expand All @@ -141,13 +144,8 @@ static void mix_s16_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t
/* process 4 frames per loop */
for (i = 0; i < m; i++) {
AE_LA16X4_IP(in_sample, inu, in);

/* apply gain to in_sample */
AE_MUL16X4(tmph, tmpl, in_sample, gain_vec);
tmpl = AE_SRAI32(tmpl, IPC4_MIXIN_GAIN_SHIFT);
tmph = AE_SRAI32(tmph, IPC4_MIXIN_GAIN_SHIFT);
in_sample = AE_CVT16X4(tmph, tmpl);

in_sample = AE_MULFP16X4S(in_sample, gain_vec);
AE_LA16X4_IP(out_sample, outu1, out);
out--;
out_sample = AE_ADD16S(in_sample, out_sample);
Expand All @@ -160,11 +158,7 @@ static void mix_s16_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t
*/
for (i = 0; i < left ; i++) {
AE_L16_IP(in_sample, (ae_int16 *)in, sizeof(ae_int16));

AE_MUL16X4(tmph, tmpl, in_sample, gain_vec);
tmpl = AE_SRAI32(tmpl, IPC4_MIXIN_GAIN_SHIFT);
in_sample = AE_CVT16X4(tmpl, tmpl);

in_sample = AE_MULFP16X4S(in_sample, gain_vec);
AE_L16_IP(out_sample, (ae_int16 *)out, 0);
out_sample = AE_ADD16S(in_sample, out_sample);
AE_S16_0_IP(out_sample, (ae_int16 *)out, sizeof(ae_int16));
Expand All @@ -187,12 +181,7 @@ static void mix_s16_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t
/* process 4 frames per loop */
for (i = 0; i < m; i++) {
AE_LA16X4_IP(in_sample, inu, in);

AE_MUL16X4(tmph, tmpl, in_sample, gain_vec);
tmpl = AE_SRAI32(tmpl, IPC4_MIXIN_GAIN_SHIFT);
tmph = AE_SRAI32(tmph, IPC4_MIXIN_GAIN_SHIFT);
in_sample = AE_CVT16X4(tmph, tmpl);

in_sample = AE_MULFP16X4S(in_sample, gain_vec);
AE_SA16X4_IP(in_sample, outu2, out);
}
AE_SA64POS_FP(outu2, out);
Expand All @@ -202,11 +191,7 @@ static void mix_s16_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t
*/
for (i = 0; i < left ; i++) {
AE_L16_IP(in_sample, (ae_int16 *)in, sizeof(ae_int16));

AE_MUL16X4(tmph, tmpl, in_sample, gain_vec);
tmpl = AE_SRAI32(tmpl, IPC4_MIXIN_GAIN_SHIFT);
in_sample = AE_CVT16X4(tmpl, tmpl);

in_sample = AE_MULFP16X4S(in_sample, gain_vec);
AE_S16_0_IP(in_sample, (ae_int16 *)out, sizeof(ae_int16));
}
}
Expand All @@ -224,8 +209,8 @@ static void mix_s24(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe
ae_int32x2 out_sample;
ae_int32x2 *in;
ae_int32x2 *out;
ae_valign inu = AE_ZALIGN64();
ae_valign outu1 = AE_ZALIGN64();
ae_valign inu;
ae_valign outu1;
ae_valign outu2 = AE_ZALIGN64();
/* cir_buf_wrap() is required and is done below in a loop */
int32_t *dst = (int32_t *)sink->ptr + start_sample;
Expand Down Expand Up @@ -309,20 +294,24 @@ static void mix_s24_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t
{
int samples_to_mix, samples_to_copy, left_samples;
int n, nmax, i, m, left;
ae_int32x2 in_sample, in_sample32;
ae_int32x2 in_sample;
ae_int32x2 out_sample;
ae_int32x2 *in;
ae_int32x2 *out;
ae_valign inu = AE_ZALIGN64();
ae_valign outu1 = AE_ZALIGN64();
ae_valign inu;
ae_valign outu1;
ae_valign outu2 = AE_ZALIGN64();
/* cir_buf_wrap() is required and is done below in a loop */
int32_t *dst = (int32_t *)sink->ptr + start_sample;
int32_t *src = source->ptr;
ae_int16x4 gain_vec;
ae_int64 tmph, tmpl;
ae_f24x2 gain_vec;
ae_int32 gain32 = (ae_int32)gain;

gain_vec = AE_L16_I((ae_int16 *)&gain, 0);
/* this func does not support unity gain as 1 cannot be represented as Q1.23 value */
assert(gain < IPC4_MIXIN_UNITY_GAIN);

gain_vec = AE_MOVF24X2_FROMINT32X2(AE_L32_I(&gain32, 0));
gain_vec = AE_SLAI24S(gain_vec, 13); /* convert to Q1.23 */

assert(mixed_samples >= start_sample);
samples_to_mix = AE_MIN_32_signed(mixed_samples - start_sample, sample_count);
Expand All @@ -346,18 +335,10 @@ static void mix_s24_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t
/* process 2 samples per time */
for (i = 0; i < m; i++) {
AE_LA32X2_IP(in_sample, inu, in);

/* apply gain to in_sample */
in_sample32 = AE_SLAI32(in_sample, 8); /* sign extension */
tmpl = AE_MUL32X16_L0(in_sample32, gain_vec);
tmph = AE_MUL32X16_H0(in_sample32, gain_vec);
tmpl = AE_SRAI64(tmpl, 8 + IPC4_MIXIN_GAIN_SHIFT);
tmph = AE_SRAI64(tmph, 8 + IPC4_MIXIN_GAIN_SHIFT);
in_sample = AE_SEL32_LL(AE_MOVINT32X2_FROMINT64(tmph),
AE_MOVINT32X2_FROMINT64(tmpl));

in_sample = AE_MULFP24X2R(AE_MOVF24X2_FROMINT32X2(in_sample), gain_vec);
AE_LA32X2_IP(out_sample, outu1, out);
out--;
/* out samples are already sign extended by other mixin in a loop below */
out_sample = AE_ADD24S(in_sample, out_sample);
AE_SA32X2_IP(out_sample, outu2, out);
}
Expand All @@ -366,13 +347,9 @@ static void mix_s24_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t
/* process the left sample to avoid memory access overrun */
if (left) {
AE_L32_IP(in_sample, (ae_int32 *)in, sizeof(ae_int32));

in_sample32 = AE_SLAI32(in_sample, 8); /* sign extension */
tmpl = AE_MUL32X16_L0(in_sample32, gain_vec);
tmpl = AE_SRAI64(tmpl, 8 + IPC4_MIXIN_GAIN_SHIFT);
in_sample = AE_MOVINT32X2_FROMINT64(tmpl);

in_sample = AE_MULFP24X2R(AE_MOVF24X2_FROMINT32X2(in_sample), gain_vec);
AE_L32_IP(out_sample, (ae_int32 *)out, 0);
/* out samples are already sign extended by other mixin in a loop below */
out_sample = AE_ADD24S(in_sample, out_sample);
AE_S32_L_IP(out_sample, (ae_int32 *)out, sizeof(ae_int32));
}
Expand All @@ -392,27 +369,14 @@ static void mix_s24_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t
left = n & 1;
for (i = 0; i < m; i++) {
AE_LA32X2_IP(in_sample, inu, in);

in_sample32 = AE_SLAI32(in_sample, 8); /* sign extension */
tmpl = AE_MUL32X16_L0(in_sample32, gain_vec);
tmph = AE_MUL32X16_H0(in_sample32, gain_vec);
tmpl = AE_SRAI64(tmpl, 8 + IPC4_MIXIN_GAIN_SHIFT);
tmph = AE_SRAI64(tmph, 8 + IPC4_MIXIN_GAIN_SHIFT);
in_sample = AE_SEL32_LL(AE_MOVINT32X2_FROMINT64(tmph),
AE_MOVINT32X2_FROMINT64(tmpl));

in_sample = AE_MULFP24X2R(AE_MOVF24X2_FROMINT32X2(in_sample), gain_vec);
AE_SA32X2_IP(in_sample, outu2, out);
}
AE_SA64POS_FP(outu2, out);
/* process the left sample to avoid memory access overrun */
if (left) {
AE_L32_IP(in_sample, (ae_int32 *)in, sizeof(ae_int32));

in_sample32 = AE_SLAI32(in_sample, 8); /* sign extension */
tmpl = AE_MUL32X16_L0(in_sample32, gain_vec);
tmpl = AE_SRAI64(tmpl, 8 + IPC4_MIXIN_GAIN_SHIFT);
in_sample = AE_MOVINT32X2_FROMINT64(tmpl);

in_sample = AE_MULFP24X2R(AE_MOVF24X2_FROMINT32X2(in_sample), gain_vec);
AE_S32_L_IP(in_sample, (ae_int32 *)out, sizeof(ae_int32));
}
}
Expand All @@ -431,8 +395,8 @@ static void mix_s32(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe
ae_int32x2 out_sample;
ae_int32x2 *in;
ae_int32x2 *out;
ae_valign inu = AE_ZALIGN64();
ae_valign outu1 = AE_ZALIGN64();
ae_valign inu;
ae_valign outu1;
ae_valign outu2 = AE_ZALIGN64();
/* cir_buf_wrap() is required and is done below in a loop */
int32_t *dst = (int32_t *)sink->ptr + start_sample;
Expand Down Expand Up @@ -512,16 +476,19 @@ static void mix_s32_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t
ae_int32x2 out_sample;
ae_int32x2 *in;
ae_int32x2 *out;
ae_valign inu = AE_ZALIGN64();
ae_valign outu1 = AE_ZALIGN64();
ae_valign inu;
ae_valign outu1;
ae_valign outu2 = AE_ZALIGN64();
/* cir_buf_wrap() is required and is done below in a loop */
int32_t *dst = (int32_t *)sink->ptr + start_sample;
int32_t *src = source->ptr;
ae_int16x4 gain_vec;
ae_int64 tmpl, tmph;
ae_f16x4 gain_vec;

/* this func does not support unity gain as 1 cannot be represented as Q1.15 value */
assert(gain < IPC4_MIXIN_UNITY_GAIN);

gain_vec = AE_L16_I((ae_int16 *)&gain, 0);
gain_vec = AE_SLAI16S(gain_vec, 5); /* convert to Q1.15 */

assert(mixed_samples >= start_sample);
samples_to_mix = AE_MIN_32_signed(mixed_samples - start_sample, sample_count);
Expand All @@ -544,32 +511,18 @@ static void mix_s32_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t
left = n & 1;
for (i = 0; i < m; i++) {
AE_LA32X2_IP(in_sample, inu, in);

/* apply gain to in_sample */
tmpl = AE_MUL32X16_L0(in_sample, gain_vec);
tmph = AE_MUL32X16_H0(in_sample, gain_vec);
tmpl = AE_SRAI64(tmpl, IPC4_MIXIN_GAIN_SHIFT);
tmph = AE_SRAI64(tmph, IPC4_MIXIN_GAIN_SHIFT);
in_sample = AE_SEL32_LL(AE_MOVINT32X2_FROMINT64(tmph),
AE_MOVINT32X2_FROMINT64(tmpl));

AE_LA32X2_IP(out_sample, outu1, out);
out--;
out_sample = AE_ADD32S(in_sample, out_sample);
AE_MULAFP32X16X2RS_L(out_sample, in_sample, gain_vec);
AE_SA32X2_IP(out_sample, outu2, out);
}
AE_SA64POS_FP(outu2, out);

/* process the left sample to avoid memory access overrun */
if (left) {
AE_L32_IP(in_sample, (ae_int32 *)in, sizeof(ae_int32));

tmpl = AE_MUL32X16_L0(in_sample, gain_vec);
tmpl = AE_SRAI64(tmpl, IPC4_MIXIN_GAIN_SHIFT);
in_sample = AE_MOVINT32X2_FROMINT64(tmpl);

AE_L32_IP(out_sample, (ae_int32 *)out, 0);
out_sample = AE_ADD32S(in_sample, out_sample);
AE_MULAFP32X16X2RS_L(out_sample, in_sample, gain_vec);
AE_S32_L_IP(out_sample, (ae_int32 *)out, sizeof(ae_int32));
}
}
Expand All @@ -589,26 +542,15 @@ static void mix_s32_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t
left = n & 1;
for (i = 0; i < m; i++) {
AE_LA32X2_IP(in_sample, inu, in);

tmpl = AE_MUL32X16_L0(in_sample, gain_vec);
tmph = AE_MUL32X16_H0(in_sample, gain_vec);
tmpl = AE_SRAI64(tmpl, IPC4_MIXIN_GAIN_SHIFT);
tmph = AE_SRAI64(tmph, IPC4_MIXIN_GAIN_SHIFT);
in_sample = AE_SEL32_LL(AE_MOVINT32X2_FROMINT64(tmph),
AE_MOVINT32X2_FROMINT64(tmpl));

in_sample = AE_MULFP32X16X2RS_L(in_sample, gain_vec);
AE_SA32X2_IP(in_sample, outu2, out);
}
AE_SA64POS_FP(outu2, out);

/* process the left sample to avoid memory access overrun */
if (left) {
AE_L32_IP(in_sample, (ae_int32 *)in, sizeof(ae_int32));

tmpl = AE_MUL32X16_L0(in_sample, gain_vec);
tmpl = AE_SRAI64(tmpl, IPC4_MIXIN_GAIN_SHIFT);
in_sample = AE_MOVINT32X2_FROMINT64(tmpl);

in_sample = AE_MULFP32X16X2RS_L(in_sample, gain_vec);
AE_S32_L_IP(in_sample, (ae_int32 *)out, sizeof(ae_int32));
}
}
Expand Down
Loading