Skip to content
Merged
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
406e722
Use four samples at a time for estimating corr.
tmiw Dec 31, 2022
12b526b
Handle case where Npsam isn't divisible by 4.
tmiw Dec 31, 2022
49a2398
ofdm_destroy() should be done after we're finished with the ofdm object.
tmiw Dec 31, 2022
fa7f28c
We're actually only dealing with two samples, not four.
tmiw Dec 31, 2022
89569c1
Use double vectors instead of float for vector operations.
tmiw Dec 31, 2022
9b5643e
Test: go back to only using floats.
tmiw Dec 31, 2022
5c86481
Revert "Test: go back to only using floats."
tmiw Dec 31, 2022
9353b24
Accumulate positive as well as negative real values.
tmiw Dec 31, 2022
611bfe2
macOS ctest fixes.
tmiw Jan 1, 2023
572c688
Additional macOS test tweaks.
tmiw Jan 1, 2023
64c1470
M1 Mac is fast enough to require USE_MUTEX for test_fifo to consisten…
tmiw Jan 1, 2023
666406b
Tweak test_freedv_data_raw_fsk_ldpc_100 expected results to allow it …
tmiw Jan 1, 2023
88b95ad
Use codec2_math for complex dot product on embedded platforms.
tmiw Jan 1, 2023
4157765
Correct old comment.
tmiw Jan 1, 2023
17c7a76
Refactor code and allow the vectorized dot product implementation to …
tmiw Jan 1, 2023
c5e3dae
Revert "M1 Mac is fast enough to require USE_MUTEX for test_fifo to c…
tmiw Jan 2, 2023
f1bba6d
Merge branch 'ms-ofdm-timing-vec' of github.com:drowe67/codec2 into m…
tmiw Jan 2, 2023
4cc0bba
Revert "macOS ctest fixes."
tmiw Jan 2, 2023
6c7902b
Revert "Tweak test_freedv_data_raw_fsk_ldpc_100 expected results to a…
tmiw Jan 2, 2023
ceb0d73
Undo remaining ctest fix.
tmiw Jan 2, 2023
7ba1452
Revert "ofdm_destroy() should be done after we're finished with the o…
tmiw Jan 2, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 86 additions & 12 deletions src/ofdm.c
Original file line number Diff line number Diff line change
Expand Up @@ -671,6 +671,86 @@ static complex float vector_sum(complex float *a, int num_elements) {
return sum;
}

/* Determine if we can use vector ops below. Only for non-embedded platforms
as double can be significantly slower on those. */
#ifndef __EMBEDDED__
#if __GNUC__ > 4 || \
(__GNUC__ == 4 && (__GNUC_MINOR__ > 6 || \
(__GNUC_MINOR__ == 6 && \
__GNUC_PATCHLEVEL__ > 0)))
#define USE_VECTOR_OPS 1
#elif __clang_major__ > 3 || \
(__clang_minor__ == 3 && (__clang_minor__ > 7 || \
(__clang_minor__ == 7 && \
__clang_patchlevel__ > 0)))
#define USE_VECTOR_OPS 1
#endif
#else
#include "codec2_math.h"
#endif /* __EMBEDDED__ */

#if USE_VECTOR_OPS
typedef float float4 __attribute__ ((vector_size (16)));
#endif /* USE_VECTOR_OPS */

static complex float ofdm_complex_dot_product(complex float *left, complex float *right, int numSamples)
{
complex float result;

#if USE_VECTOR_OPS
float *leftPtr = (float*)left;
float *rightPtr = (float*)right;
float4 accumPos = { 0, 0, 0, 0 };
float4 accumNeg = { 0, 0, 0, 0 };
float4 accumImag = { 0, 0, 0, 0 };
float resultReal = 0;
float resultImag = 0;
int numBlocks = numSamples >> 1;
for (int i = 0; i < numBlocks; i++)
{
/* Lay out vectors as follows:
vec1 = rx[0].a, rx[0].b, rx[1].a, rx[1].b, ...
vec2 = mvec[0].c, mvec[0].d, mvec[1].c, mvec1[1].d, ... */
float4 vec1 = { leftPtr[0], leftPtr[1], leftPtr[2], leftPtr[3] };
float4 vec2 = { rightPtr[0], rightPtr[1], rightPtr[2], rightPtr[3] };

accumPos += vec1 * vec2;
accumNeg -= vec1 * vec2;

/* Lay out vec3 as { rx[0].b, rx[0].a, rx[1].b, rx[0].b, ... }.
Multiply vec3 by vec2 to get us bc, ad, bc, ad
and add to second accumulator. */
float4 vec3 = { leftPtr[1], leftPtr[0], leftPtr[3], leftPtr[2] };
accumImag += vec3 * vec2;

/* Shift pointers forward by 4 (2 complex floats). */
leftPtr += 4; rightPtr += 4;
}

/* dot product: (a + bi)(c + di) = (ac - bd) + i(bc + ad) */
resultReal = accumPos[0] + accumNeg[1] + accumPos[2] + accumNeg[3];
resultImag = accumImag[0] + accumImag[1] + accumImag[2] + accumImag[3];
result = resultReal + I * resultImag;

/* Add remaining values to corr that couldn't be vectorized above. */
for (int i = numBlocks << 1; i < numSamples; i++)
{
result += left[i] * right[i];
}
#elif __EMBEDDED__
float resultReal = 0, resultImag = 0;
codec2_complex_dot_product_f32((COMP*)left, (COMP*)right, numSamples, &resultReal, &resultImag);
result = resultReal + I * resultImag;
#else
for (int i = 0; i < numSamples; i++)
{
result += left[i] * right[i];
}
#endif /* USE_VECTOR_OPS */

return result;
}


/*
* Correlates the OFDM pilot symbol samples with a window of received
Expand Down Expand Up @@ -748,10 +828,9 @@ static int est_timing(struct OFDM *ofdm, complex float *rx, int length,

#ifdef __EMBEDDED__
#ifdef __REAL__
// Note: this code untested
float re,im;

codec2_dot_product_f32(&rx_real[i], wvec_pilot_real, ofdm->samplespersymbol, &re);
codec2_dot_product_f32(&rx_real[i], wvec_pilot_real, ofdm->samplespersymbol, &re);
codec2_dot_product_f32(&rx_real[i], wvec_pilot_imag, ofdm->samplespersymbol, &im);
corr_st = re + im * I;

Expand All @@ -769,12 +848,8 @@ static int est_timing(struct OFDM *ofdm, complex float *rx, int length,
corr_en = re + im * I;
#endif
#else
for (j = 0; j < ofdm->samplespersymbol; j++) {
int ind = i + j;

corr_st = corr_st + (rx[ind ] * wvec_pilot[j]);
corr_en = corr_en + (rx[ind + ofdm->samplesperframe] * wvec_pilot[j]);
}
corr_st = ofdm_complex_dot_product(&rx[i], wvec_pilot, ofdm->samplespersymbol);
corr_en = ofdm_complex_dot_product(&rx[i + ofdm->samplesperframe], wvec_pilot, ofdm->samplespersymbol);
#endif // __EMBEDDED__
corr[i] = (cabsf(corr_st) + cabsf(corr_en)) * av_level;
}
Expand Down Expand Up @@ -1140,12 +1215,11 @@ static float est_timing_and_freq(struct OFDM *ofdm,
complex float mvec[Npsam];
for(int i=0; i<Npsam; i++) {
complex float ph = cmplx(w*i);
mvec[i] = known_samples[i]*ph;
mvec[i] = conjf(known_samples[i]*ph);
}
for(int t=0; t<Ncorr; t+=tstep) {
complex float corr = 0;
for(int i=0; i<Npsam; i++)
corr += rx[i+t]*conjf(mvec[i]);
complex float corr = ofdm_complex_dot_product(&rx[t], mvec, Npsam);

if (cabsf(corr) > max_corr) {
max_corr = cabsf(corr);
*t_est = t;
Expand Down