Math: Inline function sofm_lut_sin_fixed_16b() for performance

singalsu · singalsu · commit 5412a28fd8dc · 2025-01-27T15:01:52.000+02:00
This patch inlines the function sofm_lut_sin_fixed_16b() and
moves it to header file lut_trig.h. The lookup table is kept
in lut_trig.c and made global.

The DRC component use a lot the sine function (the fast lookup
tables version). The function seems to not get improvement from
HiFi intrinsics rewrite but making it inline improves DRC
performance in MTL platform by 0.54 MCPS, from 12.62 MCPS to
12.08 MCPS.

In Multiband-DRC the saving multiplies by number of bands, e.g.
1.58 MCPS saving with three bands.

Signed-off-by: Seppo Ingalsuo &lt;seppo.ingalsuo@linux.intel.com&gt;
diff --git a/src/include/sof/math/lut_trig.h b/src/include/sof/math/lut_trig.h
@@ -10,6 +10,53 @@
 
 #include <stdint.h>
 
-int16_t sofm_lut_sin_fixed_16b(int32_t w); /* Input is Q4.28, output is Q1.15 */
+#define SOFM_LUT_SINE_C_Q20	341782638	/* 2 * SINE_NQUART / pi in Q12.20 */
+#define SOFM_LUT_SINE_NQUART	512		/* Must be 2^N */
+#define SOFM_LUT_SINE_SIZE	(SOFM_LUT_SINE_NQUART + 1)
+
+extern const uint16_t sofm_lut_sine_table_s16[];
+
+/* Sine lookup table read */
+static inline int32_t sofm_sine_lookup_16b(int idx)
+{
+	uint16_t s;
+	int i1;
+
+	i1 = idx & (2 * SOFM_LUT_SINE_NQUART - 1);
+	if (i1 > SOFM_LUT_SINE_NQUART)
+		i1 = 2 * SOFM_LUT_SINE_NQUART - i1;
+
+	s = sofm_lut_sine_table_s16[i1];
+	if (idx > 2 * SOFM_LUT_SINE_NQUART)
+		return -((int32_t)s);
+
+	return (int32_t)s;
+}
+
+/**
+ * Compute fixed point sine with table lookup and interpolation
+ * @param w	Input angle in radians Q4.28
+ * @return	Sine value Q1.15
+ */
+static inline int16_t sofm_lut_sin_fixed_16b(int32_t w)
+{
+	int64_t idx;
+	int32_t sine;
+	int32_t frac;
+	int32_t delta;
+	int32_t s0;
+	int32_t s1;
+	int64_t idx_tmp;
+
+	/* Q4.28 x Q12.20 -> Q16.48 --> Q16.31*/
+	idx_tmp = ((int64_t)w * SOFM_LUT_SINE_C_Q20) >> 17;
+	idx = (idx_tmp >> 31); /* Shift to Q0 */
+	frac = (int32_t)(idx_tmp - (idx << 31)); /* Get fraction Q1.31*/
+	s0 = sofm_sine_lookup_16b(idx); /* Q1.16 */
+	s1 = sofm_sine_lookup_16b(idx + 1); /* Q1.16 */
+	delta = s1 - s0; /* Q1.16 */
+	sine = s0 + q_mults_32x32(frac, delta, Q_SHIFT_BITS_64(31, 16, 16)); /* Q1.16 */
+	return sat_int16((sine + 1) >> 1); /* Round to Q1.15 */
+}
 
 #endif /* __SOF_MATH_LUT_TRIG_H__ */
diff --git a/src/math/lut_trig.c b/src/math/lut_trig.c
@@ -5,21 +5,16 @@
 // Author: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
 
 #include <sof/audio/format.h>
-#include <sof/math/lut_trig.h>
 #include <rtos/symbol.h>
 #include <stdint.h>
 
-#define SOFM_LUT_SINE_C_Q20	341782638	/* 2 * SINE_NQUART / pi in Q12.20 */
-#define SOFM_LUT_SINE_NQUART	512		/* Must be 2^N */
-#define SOFM_LUT_SINE_SIZE	(SOFM_LUT_SINE_NQUART + 1)
-
 /* Sine values 0 to pi/2, calculated with Octave
  *	w = linspace(0, pi/2, 513);
  *	s = 2^16;
  *	x = min(round(s * sin(w)), s - 1);
  */
 
-static const uint16_t sofm_lut_sine_table_s16[SOFM_LUT_SINE_SIZE] = {
+const uint16_t sofm_lut_sine_table_s16[] = {
 	     0,    201,    402,    603,    804,   1005,   1206,   1407,   1608,   1809,   2010,
 	  2211,   2412,   2613,   2814,   3015,   3216,   3417,   3617,   3818,   4019,   4219,
 	  4420,   4621,   4821,   5022,   5222,   5422,   5623,   5823,   6023,   6224,   6424,
@@ -68,43 +63,3 @@ static const uint16_t sofm_lut_sine_table_s16[SOFM_LUT_SINE_SIZE] = {
 	 65447,  65457,  65467,  65476,  65484,  65492,  65499,  65505,  65511,  65516,  65521,
 	 65525,  65528,  65531,  65533,  65535,  65535,  65535
 };
-
-/* Sine lookup table read */
-static inline int32_t sofm_sine_lookup_16b(int idx)
-{
-	uint16_t s;
-	int i1;
-
-	i1 = idx & (2 * SOFM_LUT_SINE_NQUART - 1);
-	if (i1 > SOFM_LUT_SINE_NQUART)
-		i1 = 2 * SOFM_LUT_SINE_NQUART - i1;
-
-	s = sofm_lut_sine_table_s16[i1];
-	if (idx > 2 * SOFM_LUT_SINE_NQUART)
-		return -((int32_t)s);
-
-	return (int32_t)s;
-}
-
-/* Compute fixed point sine with table lookup and interpolation */
-int16_t sofm_lut_sin_fixed_16b(int32_t w)
-{
-	int64_t idx;
-	int32_t sine;
-	int32_t frac;
-	int32_t delta;
-	int32_t s0;
-	int32_t s1;
-	int64_t idx_tmp;
-
-	/* Q4.28 x Q12.20 -> Q16.48 --> Q16.31*/
-	idx_tmp = ((int64_t)w * SOFM_LUT_SINE_C_Q20) >> 17;
-	idx = (idx_tmp >> 31); /* Shift to Q0 */
-	frac = (int32_t)(idx_tmp - (idx << 31)); /* Get fraction Q1.31*/
-	s0 = sofm_sine_lookup_16b(idx); /* Q1.16 */
-	s1 = sofm_sine_lookup_16b(idx + 1); /* Q1.16 */
-	delta = s1 - s0; /* Q1.16 */
-	sine = s0 + q_mults_32x32(frac, delta, Q_SHIFT_BITS_64(31, 16, 16)); /* Q1.16 */
-	return sat_int16((sine + 1) >> 1); /* Round to Q1.15 */
-}
-EXPORT_SYMBOL(sofm_lut_sin_fixed_16b);