Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix bugs in convolutional decoder #736

Merged
merged 2 commits into from
Jan 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions kernels/volk/volk_8u_conv_k7_r2puppet_8u.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,8 @@ static inline int chainback_viterbi(unsigned char* data,
#include <stdio.h>
#include <xmmintrin.h>

static inline void volk_8u_conv_k7_r2puppet_8u_spiral(unsigned char* syms,
unsigned char* dec,
static inline void volk_8u_conv_k7_r2puppet_8u_spiral(unsigned char* dec,
unsigned char* syms,
unsigned int framebits)
{
if (framebits < 12) {
Expand Down Expand Up @@ -181,8 +181,8 @@ static inline void volk_8u_conv_k7_r2puppet_8u_spiral(unsigned char* syms,

#include "volk/sse2neon.h"

static inline void volk_8u_conv_k7_r2puppet_8u_neonspiral(unsigned char* syms,
unsigned char* dec,
static inline void volk_8u_conv_k7_r2puppet_8u_neonspiral(unsigned char* dec,
unsigned char* syms,
unsigned int framebits)
{
if (framebits < 12) {
Expand Down Expand Up @@ -266,8 +266,8 @@ static inline void volk_8u_conv_k7_r2puppet_8u_neonspiral(unsigned char* syms,
//#include <immintrin.h>
//#include <stdio.h>
//
// static inline void volk_8u_conv_k7_r2puppet_8u_avx2(unsigned char* syms,
// unsigned char* dec,
// static inline void volk_8u_conv_k7_r2puppet_8u_avx2(unsigned char* dec,
// unsigned char* syms,
// unsigned int framebits)
//{
// if (framebits < 12) {
Expand Down Expand Up @@ -349,8 +349,8 @@ static inline void volk_8u_conv_k7_r2puppet_8u_neonspiral(unsigned char* syms,
#if LV_HAVE_GENERIC


static inline void volk_8u_conv_k7_r2puppet_8u_generic(unsigned char* syms,
unsigned char* dec,
static inline void volk_8u_conv_k7_r2puppet_8u_generic(unsigned char* dec,
unsigned char* syms,
unsigned int framebits)
{
if (framebits < 12) {
Expand Down
192 changes: 89 additions & 103 deletions kernels/volk/volk_8u_x4_conv_k7_r2_8u.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,19 +57,17 @@ typedef union {
#endif


static inline void renormalize(unsigned char* X, unsigned char threshold)
static inline void renormalize(unsigned char* X)
{
int NUMSTATES = 64;
int i;

unsigned char min = X[0];
// if(min > threshold) {
for (i = 0; i < NUMSTATES; i++)
if (min > X[i])
min = X[i];
for (i = 0; i < NUMSTATES; i++)
X[i] -= min;
//}
}


Expand All @@ -85,16 +83,17 @@ static inline void BFLY(int i,
int j;
unsigned int decision0, decision1;
unsigned char metric, m0, m1, m2, m3;
unsigned short metricsum;

int NUMSTATES = 64;
int RATE = 2;
int METRICSHIFT = 2;
int METRICSHIFT = 1;
int PRECISIONSHIFT = 2;

metric = 0;
metricsum = 1;
for (j = 0; j < RATE; j++)
metric += (Branchtab[i + j * NUMSTATES / 2] ^ syms[s * RATE + j]) >> METRICSHIFT;
metric = metric >> PRECISIONSHIFT;
metricsum += (Branchtab[i + j * NUMSTATES / 2] ^ syms[s * RATE + j]);
metric = (metricsum >> METRICSHIFT) >> PRECISIONSHIFT;

unsigned char max = ((RATE * ((256 - 1) >> METRICSHIFT)) >> PRECISIONSHIFT);

Expand All @@ -103,8 +102,8 @@ static inline void BFLY(int i,
m2 = X[i] + (max - metric);
m3 = X[i + NUMSTATES / 2] + metric;

decision0 = (signed int)(m0 - m1) > 0;
decision1 = (signed int)(m2 - m3) > 0;
decision0 = (signed int)(m0 - m1) >= 0;
decision1 = (signed int)(m2 - m3) >= 0;

Y[2 * i] = decision0 ? m1 : m0;
Y[2 * i + 1] = decision1 ? m3 : m2;
Expand Down Expand Up @@ -297,7 +296,7 @@ static inline void BFLY(int i,
// }
// }
//
// renormalize(X, 210);
// renormalize(X);
//
// unsigned int j;
// for (j = 0; j < (framebits + excess) % 2; ++j) {
Expand All @@ -312,7 +311,7 @@ static inline void BFLY(int i,
// Branchtab);
// }
//
// renormalize(Y, 210);
// renormalize(Y);
// }
// /*skip*/
//}
Expand Down Expand Up @@ -438,27 +437,25 @@ static inline void volk_8u_x4_conv_k7_r2_8u_spiral(unsigned char* Y,
*(a112) = s28;
a113 = (a95 + 3);
*(a113) = s29;
if ((((unsigned char*)Y)[0] > 210)) {
__m128i m5, m6;
m5 = ((__m128i*)Y)[0];
m5 = _mm_min_epu8(m5, ((__m128i*)Y)[1]);
m5 = _mm_min_epu8(m5, ((__m128i*)Y)[2]);
m5 = _mm_min_epu8(m5, ((__m128i*)Y)[3]);
__m128i m7;
m7 = _mm_min_epu8(_mm_srli_si128(m5, 8), m5);
m7 =
((__m128i)_mm_min_epu8(((__m128i)_mm_srli_epi64(m7, 32)), ((__m128i)m7)));
m7 =
((__m128i)_mm_min_epu8(((__m128i)_mm_srli_epi64(m7, 16)), ((__m128i)m7)));
m7 = ((__m128i)_mm_min_epu8(((__m128i)_mm_srli_epi64(m7, 8)), ((__m128i)m7)));
m7 = _mm_unpacklo_epi8(m7, m7);
m7 = _mm_shufflelo_epi16(m7, _MM_SHUFFLE(0, 0, 0, 0));
m6 = _mm_unpacklo_epi64(m7, m7);
((__m128i*)Y)[0] = _mm_subs_epu8(((__m128i*)Y)[0], m6);
((__m128i*)Y)[1] = _mm_subs_epu8(((__m128i*)Y)[1], m6);
((__m128i*)Y)[2] = _mm_subs_epu8(((__m128i*)Y)[2], m6);
((__m128i*)Y)[3] = _mm_subs_epu8(((__m128i*)Y)[3], m6);
}

__m128i m5, m6;
m5 = ((__m128i*)Y)[0];
m5 = _mm_min_epu8(m5, ((__m128i*)Y)[1]);
m5 = _mm_min_epu8(m5, ((__m128i*)Y)[2]);
m5 = _mm_min_epu8(m5, ((__m128i*)Y)[3]);
__m128i m7;
m7 = _mm_min_epu8(_mm_srli_si128(m5, 8), m5);
m7 = ((__m128i)_mm_min_epu8(((__m128i)_mm_srli_epi64(m7, 32)), ((__m128i)m7)));
m7 = ((__m128i)_mm_min_epu8(((__m128i)_mm_srli_epi64(m7, 16)), ((__m128i)m7)));
m7 = ((__m128i)_mm_min_epu8(((__m128i)_mm_srli_epi64(m7, 8)), ((__m128i)m7)));
m7 = _mm_unpacklo_epi8(m7, m7);
m7 = _mm_shufflelo_epi16(m7, _MM_SHUFFLE(0, 0, 0, 0));
m6 = _mm_unpacklo_epi64(m7, m7);
((__m128i*)Y)[0] = _mm_subs_epu8(((__m128i*)Y)[0], m6);
((__m128i*)Y)[1] = _mm_subs_epu8(((__m128i*)Y)[1], m6);
((__m128i*)Y)[2] = _mm_subs_epu8(((__m128i*)Y)[2], m6);
((__m128i*)Y)[3] = _mm_subs_epu8(((__m128i*)Y)[3], m6);

unsigned char a188, a194;
int a186, a205;
short int s48, s49, s54, s55;
Expand Down Expand Up @@ -561,31 +558,27 @@ static inline void volk_8u_x4_conv_k7_r2_8u_spiral(unsigned char* Y,
*(a225) = s56;
a226 = (a208 + 3);
*(a226) = s57;
if ((((unsigned char*)X)[0] > 210)) {
__m128i m12, m13;
m12 = ((__m128i*)X)[0];
m12 = _mm_min_epu8(m12, ((__m128i*)X)[1]);
m12 = _mm_min_epu8(m12, ((__m128i*)X)[2]);
m12 = _mm_min_epu8(m12, ((__m128i*)X)[3]);
__m128i m14;
m14 = _mm_min_epu8(_mm_srli_si128(m12, 8), m12);
m14 = ((__m128i)_mm_min_epu8(((__m128i)_mm_srli_epi64(m14, 32)),
((__m128i)m14)));
m14 = ((__m128i)_mm_min_epu8(((__m128i)_mm_srli_epi64(m14, 16)),
((__m128i)m14)));
m14 = ((__m128i)_mm_min_epu8(((__m128i)_mm_srli_epi64(m14, 8)),
((__m128i)m14)));
m14 = _mm_unpacklo_epi8(m14, m14);
m14 = _mm_shufflelo_epi16(m14, _MM_SHUFFLE(0, 0, 0, 0));
m13 = _mm_unpacklo_epi64(m14, m14);
((__m128i*)X)[0] = _mm_subs_epu8(((__m128i*)X)[0], m13);
((__m128i*)X)[1] = _mm_subs_epu8(((__m128i*)X)[1], m13);
((__m128i*)X)[2] = _mm_subs_epu8(((__m128i*)X)[2], m13);
((__m128i*)X)[3] = _mm_subs_epu8(((__m128i*)X)[3], m13);
}

__m128i m12, m13;
m12 = ((__m128i*)X)[0];
m12 = _mm_min_epu8(m12, ((__m128i*)X)[1]);
m12 = _mm_min_epu8(m12, ((__m128i*)X)[2]);
m12 = _mm_min_epu8(m12, ((__m128i*)X)[3]);
__m128i m14;
m14 = _mm_min_epu8(_mm_srli_si128(m12, 8), m12);
m14 = ((__m128i)_mm_min_epu8(((__m128i)_mm_srli_epi64(m14, 32)), ((__m128i)m14)));
m14 = ((__m128i)_mm_min_epu8(((__m128i)_mm_srli_epi64(m14, 16)), ((__m128i)m14)));
m14 = ((__m128i)_mm_min_epu8(((__m128i)_mm_srli_epi64(m14, 8)), ((__m128i)m14)));
m14 = _mm_unpacklo_epi8(m14, m14);
m14 = _mm_shufflelo_epi16(m14, _MM_SHUFFLE(0, 0, 0, 0));
m13 = _mm_unpacklo_epi64(m14, m14);
((__m128i*)X)[0] = _mm_subs_epu8(((__m128i*)X)[0], m13);
((__m128i*)X)[1] = _mm_subs_epu8(((__m128i*)X)[1], m13);
((__m128i*)X)[2] = _mm_subs_epu8(((__m128i*)X)[2], m13);
((__m128i*)X)[3] = _mm_subs_epu8(((__m128i*)X)[3], m13);
}

renormalize(X, 210);
renormalize(X);

/*int ch;
for(ch = 0; ch < 64; ch++) {
Expand All @@ -607,7 +600,7 @@ static inline void volk_8u_x4_conv_k7_r2_8u_spiral(unsigned char* Y,
}


renormalize(Y, 210);
renormalize(Y);

/*printf("\n");
for(ch = 0; ch < 64; ch++) {
Expand Down Expand Up @@ -734,27 +727,25 @@ static inline void volk_8u_x4_conv_k7_r2_8u_neonspiral(unsigned char* Y,
*(a112) = s28;
a113 = (a95 + 3);
*(a113) = s29;
if ((((unsigned char*)Y)[0] > 210)) {
__m128i m5, m6;
m5 = ((__m128i*)Y)[0];
m5 = _mm_min_epu8(m5, ((__m128i*)Y)[1]);
m5 = _mm_min_epu8(m5, ((__m128i*)Y)[2]);
m5 = _mm_min_epu8(m5, ((__m128i*)Y)[3]);
__m128i m7;
m7 = _mm_min_epu8(_mm_srli_si128(m5, 8), m5);
m7 =
((__m128i)_mm_min_epu8(((__m128i)_mm_srli_epi64(m7, 32)), ((__m128i)m7)));
m7 =
((__m128i)_mm_min_epu8(((__m128i)_mm_srli_epi64(m7, 16)), ((__m128i)m7)));
m7 = ((__m128i)_mm_min_epu8(((__m128i)_mm_srli_epi64(m7, 8)), ((__m128i)m7)));
m7 = _mm_unpacklo_epi8(m7, m7);
m7 = _mm_shufflelo_epi16(m7, _MM_SHUFFLE(0, 0, 0, 0));
m6 = _mm_unpacklo_epi64(m7, m7);
((__m128i*)Y)[0] = _mm_subs_epu8(((__m128i*)Y)[0], m6);
((__m128i*)Y)[1] = _mm_subs_epu8(((__m128i*)Y)[1], m6);
((__m128i*)Y)[2] = _mm_subs_epu8(((__m128i*)Y)[2], m6);
((__m128i*)Y)[3] = _mm_subs_epu8(((__m128i*)Y)[3], m6);
}

__m128i m5, m6;
m5 = ((__m128i*)Y)[0];
m5 = _mm_min_epu8(m5, ((__m128i*)Y)[1]);
m5 = _mm_min_epu8(m5, ((__m128i*)Y)[2]);
m5 = _mm_min_epu8(m5, ((__m128i*)Y)[3]);
__m128i m7;
m7 = _mm_min_epu8(_mm_srli_si128(m5, 8), m5);
m7 = ((__m128i)_mm_min_epu8(((__m128i)_mm_srli_epi64(m7, 32)), ((__m128i)m7)));
m7 = ((__m128i)_mm_min_epu8(((__m128i)_mm_srli_epi64(m7, 16)), ((__m128i)m7)));
m7 = ((__m128i)_mm_min_epu8(((__m128i)_mm_srli_epi64(m7, 8)), ((__m128i)m7)));
m7 = _mm_unpacklo_epi8(m7, m7);
m7 = _mm_shufflelo_epi16(m7, _MM_SHUFFLE(0, 0, 0, 0));
m6 = _mm_unpacklo_epi64(m7, m7);
((__m128i*)Y)[0] = _mm_subs_epu8(((__m128i*)Y)[0], m6);
((__m128i*)Y)[1] = _mm_subs_epu8(((__m128i*)Y)[1], m6);
((__m128i*)Y)[2] = _mm_subs_epu8(((__m128i*)Y)[2], m6);
((__m128i*)Y)[3] = _mm_subs_epu8(((__m128i*)Y)[3], m6);

unsigned char a188, a194;
int a186, a205;
short int s48, s49, s54, s55;
Expand Down Expand Up @@ -857,31 +848,27 @@ static inline void volk_8u_x4_conv_k7_r2_8u_neonspiral(unsigned char* Y,
*(a225) = s56;
a226 = (a208 + 3);
*(a226) = s57;
if ((((unsigned char*)X)[0] > 210)) {
__m128i m12, m13;
m12 = ((__m128i*)X)[0];
m12 = _mm_min_epu8(m12, ((__m128i*)X)[1]);
m12 = _mm_min_epu8(m12, ((__m128i*)X)[2]);
m12 = _mm_min_epu8(m12, ((__m128i*)X)[3]);
__m128i m14;
m14 = _mm_min_epu8(_mm_srli_si128(m12, 8), m12);
m14 = ((__m128i)_mm_min_epu8(((__m128i)_mm_srli_epi64(m14, 32)),
((__m128i)m14)));
m14 = ((__m128i)_mm_min_epu8(((__m128i)_mm_srli_epi64(m14, 16)),
((__m128i)m14)));
m14 = ((__m128i)_mm_min_epu8(((__m128i)_mm_srli_epi64(m14, 8)),
((__m128i)m14)));
m14 = _mm_unpacklo_epi8(m14, m14);
m14 = _mm_shufflelo_epi16(m14, _MM_SHUFFLE(0, 0, 0, 0));
m13 = _mm_unpacklo_epi64(m14, m14);
((__m128i*)X)[0] = _mm_subs_epu8(((__m128i*)X)[0], m13);
((__m128i*)X)[1] = _mm_subs_epu8(((__m128i*)X)[1], m13);
((__m128i*)X)[2] = _mm_subs_epu8(((__m128i*)X)[2], m13);
((__m128i*)X)[3] = _mm_subs_epu8(((__m128i*)X)[3], m13);
}

__m128i m12, m13;
m12 = ((__m128i*)X)[0];
m12 = _mm_min_epu8(m12, ((__m128i*)X)[1]);
m12 = _mm_min_epu8(m12, ((__m128i*)X)[2]);
m12 = _mm_min_epu8(m12, ((__m128i*)X)[3]);
__m128i m14;
m14 = _mm_min_epu8(_mm_srli_si128(m12, 8), m12);
m14 = ((__m128i)_mm_min_epu8(((__m128i)_mm_srli_epi64(m14, 32)), ((__m128i)m14)));
m14 = ((__m128i)_mm_min_epu8(((__m128i)_mm_srli_epi64(m14, 16)), ((__m128i)m14)));
m14 = ((__m128i)_mm_min_epu8(((__m128i)_mm_srli_epi64(m14, 8)), ((__m128i)m14)));
m14 = _mm_unpacklo_epi8(m14, m14);
m14 = _mm_shufflelo_epi16(m14, _MM_SHUFFLE(0, 0, 0, 0));
m13 = _mm_unpacklo_epi64(m14, m14);
((__m128i*)X)[0] = _mm_subs_epu8(((__m128i*)X)[0], m13);
((__m128i*)X)[1] = _mm_subs_epu8(((__m128i*)X)[1], m13);
((__m128i*)X)[2] = _mm_subs_epu8(((__m128i*)X)[2], m13);
((__m128i*)X)[3] = _mm_subs_epu8(((__m128i*)X)[3], m13);
}

renormalize(X, 210);
renormalize(X);

/*int ch;
for(ch = 0; ch < 64; ch++) {
Expand All @@ -903,7 +890,7 @@ static inline void volk_8u_x4_conv_k7_r2_8u_neonspiral(unsigned char* Y,
}


renormalize(Y, 210);
renormalize(Y);

/*printf("\n");
for(ch = 0; ch < 64; ch++) {
Expand All @@ -928,7 +915,6 @@ static inline void volk_8u_x4_conv_k7_r2_8u_generic(unsigned char* Y,
{
int nbits = framebits + excess;
int NUMSTATES = 64;
int RENORMALIZE_THRESHOLD = 210;

int s, i;
for (s = 0; s < nbits; s++) {
Expand All @@ -937,7 +923,7 @@ static inline void volk_8u_x4_conv_k7_r2_8u_generic(unsigned char* Y,
BFLY(i, s, syms, Y, X, (decision_t*)dec, Branchtab);
}

renormalize(Y, RENORMALIZE_THRESHOLD);
renormalize(Y);

/// Swap pointers to old and new metrics
tmp = (void*)X;
Expand Down
Loading