Skip to content

Commit

Permalink
Merge pull request #735 from argilo/remove-lib-simdmath
Browse files Browse the repository at this point in the history
Remove references to simdmath library
  • Loading branch information
jdemel authored Jan 7, 2024
2 parents c5b539e + 4be6128 commit 4266ee8
Show file tree
Hide file tree
Showing 5 changed files with 0 additions and 487 deletions.
117 changes: 0 additions & 117 deletions kernels/volk/volk_32f_s32f_power_32f.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,123 +62,6 @@
#include <math.h>
#include <stdio.h>

#ifdef LV_HAVE_SSE4_1
#include <tmmintrin.h>

#ifdef LV_HAVE_LIB_SIMDMATH
#include <simdmath.h>
#endif /* LV_HAVE_LIB_SIMDMATH */

static inline void volk_32f_s32f_power_32f_a_sse4_1(float* cVector,
const float* aVector,
const float power,
unsigned int num_points)
{
unsigned int number = 0;

float* cPtr = cVector;
const float* aPtr = aVector;

#ifdef LV_HAVE_LIB_SIMDMATH
const unsigned int quarterPoints = num_points / 4;
__m128 vPower = _mm_set_ps1(power);
__m128 zeroValue = _mm_setzero_ps();
__m128 signMask;
__m128 negatedValues;
__m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
__m128 onesMask = _mm_set_ps1(1);

__m128 aVal, cVal;
for (; number < quarterPoints; number++) {

aVal = _mm_load_ps(aPtr);
signMask = _mm_cmplt_ps(aVal, zeroValue);
negatedValues = _mm_sub_ps(zeroValue, aVal);
aVal = _mm_blendv_ps(aVal, negatedValues, signMask);

// powf4 doesn't support negative values in the base, so we mask them off and then
// apply the negative after
cVal = powf4(aVal, vPower); // Takes each input value to the specified power

cVal = _mm_mul_ps(_mm_blendv_ps(onesMask, negativeOneToPower, signMask), cVal);

_mm_store_ps(cPtr, cVal); // Store the results back into the C container

aPtr += 4;
cPtr += 4;
}

number = quarterPoints * 4;
#endif /* LV_HAVE_LIB_SIMDMATH */

for (; number < num_points; number++) {
*cPtr++ = powf((*aPtr++), power);
}
}

#endif /* LV_HAVE_SSE4_1 */


#ifdef LV_HAVE_SSE
#include <xmmintrin.h>

#ifdef LV_HAVE_LIB_SIMDMATH
#include <simdmath.h>
#endif /* LV_HAVE_LIB_SIMDMATH */

static inline void volk_32f_s32f_power_32f_a_sse(float* cVector,
const float* aVector,
const float power,
unsigned int num_points)
{
unsigned int number = 0;

float* cPtr = cVector;
const float* aPtr = aVector;

#ifdef LV_HAVE_LIB_SIMDMATH
const unsigned int quarterPoints = num_points / 4;
__m128 vPower = _mm_set_ps1(power);
__m128 zeroValue = _mm_setzero_ps();
__m128 signMask;
__m128 negatedValues;
__m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
__m128 onesMask = _mm_set_ps1(1);

__m128 aVal, cVal;
for (; number < quarterPoints; number++) {

aVal = _mm_load_ps(aPtr);
signMask = _mm_cmplt_ps(aVal, zeroValue);
negatedValues = _mm_sub_ps(zeroValue, aVal);
aVal =
_mm_or_ps(_mm_andnot_ps(signMask, aVal), _mm_and_ps(signMask, negatedValues));

// powf4 doesn't support negative values in the base, so we mask them off and then
// apply the negative after
cVal = powf4(aVal, vPower); // Takes each input value to the specified power

cVal = _mm_mul_ps(_mm_or_ps(_mm_andnot_ps(signMask, onesMask),
_mm_and_ps(signMask, negativeOneToPower)),
cVal);

_mm_store_ps(cPtr, cVal); // Store the results back into the C container

aPtr += 4;
cPtr += 4;
}

number = quarterPoints * 4;
#endif /* LV_HAVE_LIB_SIMDMATH */

for (; number < num_points; number++) {
*cPtr++ = powf((*aPtr++), power);
}
}

#endif /* LV_HAVE_SSE */


#ifdef LV_HAVE_GENERIC

static inline void volk_32f_s32f_power_32f_generic(float* cVector,
Expand Down
81 changes: 0 additions & 81 deletions kernels/volk/volk_32fc_s32f_power_32fc.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,87 +56,6 @@ static inline lv_32fc_t __volk_s32fc_s32f_power_s32fc_a(const lv_32fc_t exp,
return mag * lv_cmake(-cosf(arg), sinf(arg));
}

#ifdef LV_HAVE_SSE
#include <xmmintrin.h>

#ifdef LV_HAVE_LIB_SIMDMATH
#include <simdmath.h>
#endif /* LV_HAVE_LIB_SIMDMATH */

static inline void volk_32fc_s32f_power_32fc_a_sse(lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const float power,
unsigned int num_points)
{
unsigned int number = 0;

lv_32fc_t* cPtr = cVector;
const lv_32fc_t* aPtr = aVector;

#ifdef LV_HAVE_LIB_SIMDMATH
const unsigned int quarterPoints = num_points / 4;
__m128 vPower = _mm_set_ps1(power);

__m128 cplxValue1, cplxValue2, magnitude, phase, iValue, qValue;
for (; number < quarterPoints; number++) {

cplxValue1 = _mm_load_ps((float*)aPtr);
aPtr += 2;

cplxValue2 = _mm_load_ps((float*)aPtr);
aPtr += 2;

// Convert to polar coordinates

// Arrange in i1i2i3i4 format
iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
// Arrange in q1q2q3q4 format
qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3, 1, 3, 1));

phase = atan2f4(qValue, iValue); // Calculate the Phase

magnitude = _mm_sqrt_ps(
_mm_add_ps(_mm_mul_ps(iValue, iValue),
_mm_mul_ps(qValue, qValue))); // Calculate the magnitude by square
// rooting the added I2 and Q2 values

// Now calculate the power of the polar coordinate data
magnitude = powf4(magnitude, vPower); // Take the magnitude to the specified power

phase = _mm_mul_ps(phase, vPower); // Multiply the phase by the specified power

// Convert back to cartesian coordinates
iValue = _mm_mul_ps(cosf4(phase),
magnitude); // Multiply the cos of the phase by the magnitude
qValue = _mm_mul_ps(sinf4(phase),
magnitude); // Multiply the sin of the phase by the magnitude

cplxValue1 =
_mm_unpacklo_ps(iValue, qValue); // Interleave the lower two i & q values
cplxValue2 =
_mm_unpackhi_ps(iValue, qValue); // Interleave the upper two i & q values

_mm_store_ps((float*)cPtr,
cplxValue1); // Store the results back into the C container

cPtr += 2;

_mm_store_ps((float*)cPtr,
cplxValue2); // Store the results back into the C container

cPtr += 2;
}

number = quarterPoints * 4;
#endif /* LV_HAVE_LIB_SIMDMATH */

for (; number < num_points; number++) {
*cPtr++ = __volk_s32fc_s32f_power_s32fc_a((*aPtr++), power);
}
}
#endif /* LV_HAVE_SSE */


#ifdef LV_HAVE_GENERIC

static inline void volk_32fc_s32f_power_32fc_generic(lv_32fc_t* cVector,
Expand Down
30 changes: 0 additions & 30 deletions kernels/volk/volk_32fc_s32f_power_spectral_densitypuppet_32f.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,36 +15,6 @@
#include <volk/volk_32fc_s32f_x2_power_spectral_density_32f.h>


#ifdef LV_HAVE_AVX

static inline void
volk_32fc_s32f_power_spectral_densitypuppet_32f_a_avx(float* logPowerOutput,
const lv_32fc_t* complexFFTInput,
const float normalizationFactor,
unsigned int num_points)
{
volk_32fc_s32f_x2_power_spectral_density_32f_a_avx(
logPowerOutput, complexFFTInput, normalizationFactor, 2.5, num_points);
}

#endif /* LV_HAVE_AVX */


#ifdef LV_HAVE_SSE3

static inline void
volk_32fc_s32f_power_spectral_densitypuppet_32f_a_sse3(float* logPowerOutput,
const lv_32fc_t* complexFFTInput,
const float normalizationFactor,
unsigned int num_points)
{
volk_32fc_s32f_x2_power_spectral_density_32f_a_sse3(
logPowerOutput, complexFFTInput, normalizationFactor, 2.5, num_points);
}

#endif /* LV_HAVE_SSE3 */


#ifdef LV_HAVE_GENERIC

static inline void
Expand Down
80 changes: 0 additions & 80 deletions kernels/volk/volk_32fc_s32f_power_spectrum_32f.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,86 +90,6 @@ volk_32fc_s32f_power_spectrum_32f_generic(float* logPowerOutput,
}
#endif /* LV_HAVE_GENERIC */

#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>

#ifdef LV_HAVE_LIB_SIMDMATH
#include <simdmath.h>
#endif /* LV_HAVE_LIB_SIMDMATH */

static inline void
volk_32fc_s32f_power_spectrum_32f_a_sse3(float* logPowerOutput,
const lv_32fc_t* complexFFTInput,
const float normalizationFactor,
unsigned int num_points)
{
const float* inputPtr = (const float*)complexFFTInput;
float* destPtr = logPowerOutput;
uint64_t number = 0;
const float iNormalizationFactor = 1.0 / normalizationFactor;
#ifdef LV_HAVE_LIB_SIMDMATH
__m128 magScalar = _mm_set_ps1(10.0);
magScalar = _mm_div_ps(magScalar, logf4(magScalar));

__m128 invNormalizationFactor = _mm_set_ps1(iNormalizationFactor);

__m128 power;
__m128 input1, input2;
const uint64_t quarterPoints = num_points / 4;
for (; number < quarterPoints; number++) {
// Load the complex values
input1 = _mm_load_ps(inputPtr);
inputPtr += 4;
input2 = _mm_load_ps(inputPtr);
inputPtr += 4;

// Apply the normalization factor
input1 = _mm_mul_ps(input1, invNormalizationFactor);
input2 = _mm_mul_ps(input2, invNormalizationFactor);

// Multiply each value by itself
// (r1*r1), (i1*i1), (r2*r2), (i2*i2)
input1 = _mm_mul_ps(input1, input1);
// (r3*r3), (i3*i3), (r4*r4), (i4*i4)
input2 = _mm_mul_ps(input2, input2);

// Horizontal add, to add (r*r) + (i*i) for each complex value
// (r1*r1)+(i1*i1), (r2*r2) + (i2*i2), (r3*r3)+(i3*i3), (r4*r4)+(i4*i4)
power = _mm_hadd_ps(input1, input2);

// Calculate the natural log power
power = logf4(power);

// Convert to log10 and multiply by 10.0
power = _mm_mul_ps(power, magScalar);

// Store the floating point results
_mm_store_ps(destPtr, power);

destPtr += 4;
}

number = quarterPoints * 4;
#endif /* LV_HAVE_LIB_SIMDMATH */
// Calculate the FFT for any remaining points

for (; number < num_points; number++) {
// Calculate dBm
// 50 ohm load assumption
// 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
// 75 ohm load assumption
// 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)

const float real = *inputPtr++ * iNormalizationFactor;
const float imag = *inputPtr++ * iNormalizationFactor;

*destPtr = volk_log2to10factor * log2f_non_ieee(((real * real) + (imag * imag)));

destPtr++;
}
}
#endif /* LV_HAVE_SSE3 */

#ifdef LV_HAVE_NEON
#include <arm_neon.h>
#include <volk/volk_neon_intrinsics.h>
Expand Down
Loading

0 comments on commit 4266ee8

Please sign in to comment.