Skip to content

Commit

Permalink
Math: FIR: Copy HiFi5 filter functions from HiFi3 version
Browse files Browse the repository at this point in the history
This patch contains only minimal changes to build this with
HiFi5 toolchain. Changed SOF_USE_HIFI() / SOF_USE_MIN_HIFI()
macros usage and included hifi5 header.

Signed-off-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
  • Loading branch information
singalsu committed Mar 3, 2025
1 parent 87b9fc6 commit 5273054
Show file tree
Hide file tree
Showing 5 changed files with 260 additions and 2 deletions.
2 changes: 1 addition & 1 deletion src/math/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ add_local_sources_ifdef(CONFIG_POWER_FIXED sof power.c)

add_local_sources_ifdef(CONFIG_BINARY_LOGARITHM_FIXED sof base2log.c)

add_local_sources_ifdef(CONFIG_MATH_FIR sof fir_generic.c fir_hifi2ep.c fir_hifi3.c)
add_local_sources_ifdef(CONFIG_MATH_FIR sof fir_generic.c fir_hifi2ep.c fir_hifi3.c fir_hifi5.c)

if(CONFIG_MATH_FFT)
add_subdirectory(fft)
Expand Down
2 changes: 1 addition & 1 deletion src/math/fir_hifi3.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#include <sof/math/fir_config.h>
#include <sof/common.h>

#if SOF_USE_MIN_HIFI(3, FILTER)
#if SOF_USE_HIFI(3, FILTER) || SOF_USE_HIFI(4, FILTER)

#include <sof/audio/buffer.h>
#include <sof/math/fir_hifi3.h>
Expand Down
256 changes: 256 additions & 0 deletions src/math/fir_hifi5.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
// SPDX-License-Identifier: BSD-3-Clause
//
// Copyright(c) 2017-2025 Intel Corporation.
//
// Author: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>

#include <sof/math/fir_config.h>
#include <sof/common.h>

#if SOF_USE_MIN_HIFI(5, FILTER)

#include <sof/audio/buffer.h>
#include <sof/math/fir_hifi3.h>
#include <user/fir.h>
#include <xtensa/config/defs.h>
#include <xtensa/tie/xt_hifi5.h>
#include <rtos/symbol.h>
#include <errno.h>
#include <stddef.h>
#include <stdint.h>

/*
* EQ FIR algorithm code
*/

void fir_reset(struct fir_state_32x16 *fir)
{
fir->taps = 0;
fir->length = 0;
fir->out_shift = 0;
fir->coef = NULL;
/* There may need to know the beginning of dynamic allocation after
* reset so omitting setting also fir->delay to NULL.
*/
}
EXPORT_SYMBOL(fir_reset);

int fir_delay_size(struct sof_fir_coef_data *config)
{
/* Check FIR tap count for implementation specific constraints */
if (config->length > SOF_FIR_MAX_LENGTH || config->length < 4)
return -EINVAL;

/* The optimization requires the tap count to be multiple of four */
if (config->length & 0x3)
return -EINVAL;

/* The dual sample version needs one more delay entry. To preserve
* align for 64 bits need to add two.
*/
return (config->length + 2) * sizeof(int32_t);
}
EXPORT_SYMBOL(fir_delay_size);

int fir_init_coef(struct fir_state_32x16 *fir,
struct sof_fir_coef_data *config)
{
/* The length is taps plus two since the filter computes two
* samples per call. Length plus one would be minimum but the add
* must be even. The even length is needed for 64 bit loads from delay
* lines with 32 bit samples.
*/
fir->taps = (int)config->length;
fir->length = fir->taps + 2;
fir->out_shift = (int)config->out_shift;
fir->coef = (ae_f16x4 *)&config->coef[0];
return 0;
}
EXPORT_SYMBOL(fir_init_coef);

void fir_init_delay(struct fir_state_32x16 *fir, int32_t **data)
{
fir->delay = (ae_int32 *)*data;
fir->delay_end = fir->delay + fir->length;
fir->rwp = (ae_int32 *)(fir->delay + fir->length - 1);
*data += fir->length; /* Point to next delay line start */
}
EXPORT_SYMBOL(fir_init_delay);

void fir_get_lrshifts(struct fir_state_32x16 *fir, int *lshift,
int *rshift)
{
*lshift = (fir->out_shift < 0) ? -fir->out_shift : 0;
*rshift = (fir->out_shift > 0) ? fir->out_shift : 0;
}
EXPORT_SYMBOL(fir_get_lrshifts);

/* HiFi EP has the follow number of reqisters that should not be exceeded
* 4x 56 bit registers in register file Q
* 8x 48 bit registers in register file P
*/

void fir_32x16(struct fir_state_32x16 *fir, ae_int32 x, ae_int32 *y, int shift)
{
/* This function uses
* 1x 56 bit registers Q,
* 4x 48 bit registers P
* 3x integers
* 2x address pointers,
*/
ae_f64 a;
ae_valign u;
ae_f32x2 data2;
ae_f16x4 coefs;
ae_f32x2 d0;
ae_f32x2 d1;
int i;
ae_int32 *dp = fir->rwp;
ae_int16x4 *coefp = (ae_int16x4 *)fir->coef;
const int taps_div_4 = fir->taps >> 2;
const int inc = sizeof(int32_t);

/* Bypass samples if taps count is zero. */
if (!taps_div_4) {
*y = x;
return;
}

/* Write sample to delay */
AE_S32_L_XC(x, fir->rwp, -sizeof(int32_t));

/* Prime the coefficients stream */
u = AE_LA64_PP(coefp);

/* Note: If the next function is converted to handle two samples
* per call the data load can be done with single instruction
* AE_LP24X2F_C(data2, dp, sizeof(ae_p24x2f));
*/
a = AE_ZEROQ56();
for (i = 0; i < taps_div_4; i++) {
/* Load four coefficients. Coef_3 contains tap h[n],
* coef_2 contains h[n+1], coef_1 contains h[n+2], and
* coef_0 contains h[n+3];
*/
AE_LA16X4_IP(coefs, u, coefp);

/* Load two data samples and pack to d0 to data2_h and
* d1 to data2_l.
*/
AE_L32_XC(d0, dp, inc);
AE_L32_XC(d1, dp, inc);
data2 = AE_SEL32_LL(d0, d1);

/* Accumulate
* a += data2_h * coefs_3 + data2_l * coefs_2. The Q1.31
* data and Q1.15 coefficients are used as 24 bits as
* Q1.23 values.
*/
AE_MULAAFD32X16_H3_L2(a, data2, coefs);

/* Repeat the same for next two taps and increase coefp.
* a += data2_h * coefs_1 + data2_l * coefs_0.
*/
AE_L32_XC(d0, dp, inc);
AE_L32_XC(d1, dp, inc);
data2 = AE_SEL32_LL(d0, d1);
AE_MULAAFD32X16_H1_L0(a, data2, coefs);
}

/* Do scaling shifts and store sample. */
a = AE_SLAA64S(a, shift);
AE_S32_L_I(AE_ROUND32F48SSYM(a), (ae_int32 *)y, 0);
}
EXPORT_SYMBOL(fir_32x16);

/* HiFi EP has the follow number of reqisters that should not be exceeded
* 4x 56 bit registers in register file Q
* 8x 48 bit registers in register file P
*/

void fir_32x16_2x(struct fir_state_32x16 *fir, ae_int32 x0, ae_int32 x1,
ae_int32 *y0, ae_int32 *y1, int shift)
{
/* This function uses
* 2x 56 bit registers Q,
* 4x 48 bit registers P
* 3x integers
* 2x address pointers,
*/
ae_f64 a;
ae_f64 b;
ae_valign u;
ae_f32x2 d0;
ae_f32x2 d1;
ae_f16x4 coefs;
int i;
ae_f32x2 *dp;
ae_f16x4 *coefp = fir->coef;
const int taps_div_4 = fir->taps >> 2;
const int inc = 2 * sizeof(int32_t);

/* Bypass samples if taps count is zero. */
if (!taps_div_4) {
*y0 = x0;
*y1 = x1;
return;
}

/* Write samples to delay */
AE_S32_L_XC(x0, fir->rwp, -sizeof(int32_t));
dp = (ae_f32x2 *)fir->rwp;
AE_S32_L_XC(x1, fir->rwp, -sizeof(int32_t));

/* Note: If the next function is converted to handle two samples
* per call the data load can be done with single instruction
* AE_LP24X2F_C(data2, dp, sizeof(ae_p24x2f));
*/
a = AE_ZERO64();
b = AE_ZERO64();

/* Prime the coefficients stream */
u = AE_LA64_PP(coefp);

/* Load two data samples and pack to d0 to data2_h and
* d1 to data2_l.
*/
AE_L32X2_XC(d0, dp, inc);
for (i = 0; i < taps_div_4; i++) {
/* Load four coefficients. Coef_3 contains tap h[n],
* coef_2 contains h[n+1], coef_1 contains h[n+2], and
* coef_0 contains h[n+3];
*/
AE_LA16X4_IP(coefs, u, coefp);

/* Load two data samples. Upper part d1_h is x[n+1] and
* lower part d1_l is x[n].
*/
AE_L32X2_XC(d1, dp, inc);

/* Quad MAC (HH)
* b += d0_h * coefs_3 + d0_l * coefs_2
* a += d0_l * coefs_3 + d1_h * coefs_2
*/
AE_MULAFD32X16X2_FIR_HH(b, a, d0, d1, coefs);
d0 = d1;

/* Repeat the same for next two taps and increase coefp. */
AE_L32X2_XC(d1, dp, inc);

/* Quad MAC (HL)
* b += d0_h * coefs_1 + d0_l * coefs_0
* a += d0_l * coefs_1 + d1_h * coefs_0
*/
AE_MULAFD32X16X2_FIR_HL(b, a, d0, d1, coefs);
d0 = d1;
}

/* Do scaling shifts and store sample. */
b = AE_SLAA64S(b, shift);
a = AE_SLAA64S(a, shift);
AE_S32_L_I(AE_ROUND32F48SSYM(b), (ae_int32 *)y1, 0);
AE_S32_L_I(AE_ROUND32F48SSYM(a), (ae_int32 *)y0, 0);
}
EXPORT_SYMBOL(fir_32x16_2x);

#endif
1 change: 1 addition & 0 deletions src/math/fir_llext/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@ sof_llext_build("fir"
../fir_generic.c
../fir_hifi2ep.c
../fir_hifi3.c
../fir_hifi5.c
LIB openmodules
)
1 change: 1 addition & 0 deletions zephyr/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -687,6 +687,7 @@ elseif(CONFIG_MATH_FIR)
${SOF_MATH_PATH}/fir_generic.c
${SOF_MATH_PATH}/fir_hifi2ep.c
${SOF_MATH_PATH}/fir_hifi3.c
${SOF_MATH_PATH}/fir_hifi5.c
)
endif()

Expand Down

0 comments on commit 5273054

Please sign in to comment.