forked from parallella/pal
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathp_popcount.c
59 lines (53 loc) · 1.32 KB
/
p_popcount.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#include <pal.h>
#include <stdint.h>
/**
*
* Counts the number of bits set in 'a'.
*
* @param a Pointer to input vector
*
* @param c Pointer to result vector
*
* @param n Size of 'a' and 'c' vector.
*
* @param p Number of processor to use (task parallelism)
*
* @param team Team to work with
*
* @return None
*
*/
void p_popcount_u32(uint32_t *a, uint32_t *c, int n, int p, p_team_t team)
{
static const uint32_t A[] = {0x55555555, 0x33333333,
0x0f0f0f0f, 0x01010101};
uint32_t *pa, *pc;
uint32_t tmp;
int i;
pa = a;
pc = c;
for (i = 0; i < n; i++) {
tmp = *pa - ((*pa >> 1) & A[0]);
tmp = (tmp & A[1]) + ((tmp >> 2) & A[1]);
*pc = ((tmp + (tmp >> 4)) & A[2]) * A[3] >> 24;
pc++;
pa++;
}
}
void p_popcount_u64(uint64_t *a, uint64_t *c, int n, int p, p_team_t team)
{
static const uint64_t A[] = {0x5555555555555555, 0x3333333333333333,
0x0f0f0f0f0f0f0f0f, 0x0101010101010101};
uint64_t *pa, *pc;
uint64_t tmp;
int i;
pa = a;
pc = c;
for (i = 0; i < n; i++) {
tmp = *pa - ((*pa >> 1) & A[0]);
tmp = (tmp & A[1]) + ((tmp >> 2) & A[1]);
*pc = ((tmp + (tmp >> 4)) & A[2]) * A[3] >> 56;
pc++;
pa++;
}
}