Skip to content

Commit

Permalink
A faster NEON vmask any() and all() (#483)
Browse files Browse the repository at this point in the history
  • Loading branch information
solidpixel authored Aug 2, 2024
1 parent 69bc17b commit 8bc51bc
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 21 deletions.
20 changes: 0 additions & 20 deletions Source/astcenc_vecmathlib_common_4.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,26 +32,6 @@

#include <cstdio>

// ============================================================================
// vmask4 operators and functions
// ============================================================================

/**
* @brief True if any lanes are enabled, false otherwise.
*/
ASTCENC_SIMD_INLINE bool any(vmask4 a)
{
return mask(a) != 0;
}

/**
* @brief True if all lanes are enabled, false otherwise.
*/
ASTCENC_SIMD_INLINE bool all(vmask4 a)
{
return mask(a) == 0xF;
}

// ============================================================================
// vint4 operators and functions
// ============================================================================
Expand Down
18 changes: 17 additions & 1 deletion Source/astcenc_vecmathlib_neon_4.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2019-2023 Arm Limited
// Copyright 2019-2024 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
Expand Down Expand Up @@ -411,6 +411,22 @@ ASTCENC_SIMD_INLINE unsigned int mask(vmask4 a)
return vaddvq_u32(vshlq_u32(tmp, shift));
}

/**
* @brief True if any lanes are enabled, false otherwise.
*/
ASTCENC_SIMD_INLINE bool any(vmask4 a)
{
return vmaxvq_u32(a.m) != 0;
}

/**
* @brief True if all lanes are enabled, false otherwise.
*/
ASTCENC_SIMD_INLINE bool all(vmask4 a)
{
return vminvq_u32(a.m) != 0;
}

// ============================================================================
// vint4 operators and functions
// ============================================================================
Expand Down
16 changes: 16 additions & 0 deletions Source/astcenc_vecmathlib_none_4.h
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,22 @@ ASTCENC_SIMD_INLINE unsigned int mask(vmask4 a)
((a.m[3] >> 28) & 0x8);
}

/**
* @brief True if any lanes are enabled, false otherwise.
*/
ASTCENC_SIMD_INLINE bool any(vmask4 a)
{
return mask(a) != 0;
}

/**
* @brief True if all lanes are enabled, false otherwise.
*/
ASTCENC_SIMD_INLINE bool all(vmask4 a)
{
return mask(a) == 0xF;
}

// ============================================================================
// vint4 operators and functions
// ============================================================================
Expand Down
16 changes: 16 additions & 0 deletions Source/astcenc_vecmathlib_sse_4.h
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,22 @@ ASTCENC_SIMD_INLINE unsigned int mask(vmask4 a)
return static_cast<unsigned int>(_mm_movemask_ps(a.m));
}

/**
* @brief True if any lanes are enabled, false otherwise.
*/
ASTCENC_SIMD_INLINE bool any(vmask4 a)
{
return mask(a) != 0;
}

/**
* @brief True if all lanes are enabled, false otherwise.
*/
ASTCENC_SIMD_INLINE bool all(vmask4 a)
{
return mask(a) == 0xF;
}

// ============================================================================
// vint4 operators and functions
// ============================================================================
Expand Down

0 comments on commit 8bc51bc

Please sign in to comment.