|
56 | 56 | #include <inttypes.h> |
57 | 57 | #include <stdio.h> |
58 | 58 |
|
| 59 | +#ifdef LV_HAVE_GENERIC |
| 60 | +/* Adapted from https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel |
| 61 | + * Where they reverse the bits in an N-bit word. But who's stoppng me from doing the same |
| 62 | + * on byte level? |
| 63 | + * Idea is simple: swap the elementary units with half of them "selected" each step, in a |
| 64 | + * Hadamard kind of selection. |
| 65 | + */ |
| 66 | + |
| 67 | +static inline void volk_64u_byteswap_generic(uint64_t* intsToSwap, |
| 68 | + unsigned int num_points) |
| 69 | +{ |
| 70 | + for (unsigned int point = 0; point < num_points; point++, intsToSwap++) { |
| 71 | + uint64_t in = *intsToSwap; |
| 72 | + /* swap individual bytes */ |
| 73 | + in = (in & 0x00FF00FF00FF00FF) << 8 | (in & 0xFF00FF00FF00FF00) >> 8; |
| 74 | + /* swap individual shorts */ |
| 75 | + in = (in & 0x0000FFFF0000FFFF) << 16 | (in & 0xFFFF0000FFFF0000) >> 16; |
| 76 | + /* swap the two 32 bit words */ |
| 77 | + in = (in & 0x00000000FFFFFFFF) << 32 | (in & 0xFFFFFFFF00000000) >> 32; |
| 78 | + *intsToSwap = in; |
| 79 | + } |
| 80 | +} |
| 81 | +#endif |
| 82 | + |
59 | 83 | #ifdef LV_HAVE_SSE2 |
60 | 84 | #include <emmintrin.h> |
61 | 85 |
|
@@ -109,30 +133,6 @@ static inline void volk_64u_byteswap_u_sse2(uint64_t* intsToSwap, unsigned int n |
109 | 133 | } |
110 | 134 | #endif /* LV_HAVE_SSE2 */ |
111 | 135 |
|
112 | | - |
113 | | -#ifdef LV_HAVE_GENERIC |
114 | | - |
115 | | -static inline void volk_64u_byteswap_generic(uint64_t* intsToSwap, |
116 | | - unsigned int num_points) |
117 | | -{ |
118 | | - uint32_t* inputPtr = (uint32_t*)intsToSwap; |
119 | | - unsigned int point; |
120 | | - for (point = 0; point < num_points; point++) { |
121 | | - uint32_t output1 = *inputPtr; |
122 | | - uint32_t output2 = inputPtr[1]; |
123 | | - |
124 | | - output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | |
125 | | - ((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000)); |
126 | | - |
127 | | - output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | |
128 | | - ((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000)); |
129 | | - |
130 | | - *inputPtr++ = output2; |
131 | | - *inputPtr++ = output1; |
132 | | - } |
133 | | -} |
134 | | -#endif /* LV_HAVE_GENERIC */ |
135 | | - |
136 | 136 | #if LV_HAVE_AVX2 |
137 | 137 | #include <immintrin.h> |
138 | 138 | static inline void volk_64u_byteswap_a_avx2(uint64_t* intsToSwap, unsigned int num_points) |
@@ -476,8 +476,8 @@ static inline void volk_64u_byteswap_u_ssse3(uint64_t* intsToSwap, |
476 | 476 |
|
477 | 477 | #ifdef LV_HAVE_GENERIC |
478 | 478 |
|
479 | | -static inline void volk_64u_byteswap_a_generic(uint64_t* intsToSwap, |
480 | | - unsigned int num_points) |
| 479 | +static inline void volk_64u_byteswap_generic_decompose(uint64_t* intsToSwap, |
| 480 | + unsigned int num_points) |
481 | 481 | { |
482 | 482 | uint32_t* inputPtr = (uint32_t*)intsToSwap; |
483 | 483 | unsigned int point; |
|
0 commit comments