-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
c261dfc
commit 586f3e8
Showing
4 changed files
with
291 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
DEFS = | ||
EDEFS = -fno-unroll-loops | ||
|
||
CFLAGS += -O3 -std=c11 -fopenmp $(DEFS) | ||
|
||
COPRTHR = /usr/local/browndeer/coprthr2 | ||
SHMEM = ../../src | ||
|
||
INCS = -I. -I$(COPRTHR)/include -I$(SHMEM) -I../../common | ||
LIBS = -L$(COPRTHR)/lib -lcoprthr -lcoprthrcc -lm | ||
|
||
ELIBS = -lcoprthr2_dev -L$(SHMEM) -lshmem_coprthr | ||
|
||
KERNEL = shmem_tfunc.e32 | ||
|
||
TARGETS = main.x $(KERNEL) | ||
|
||
all: $(TARGETS) | ||
|
||
.PHONY: clean distclean | ||
|
||
.SUFFIXES: | ||
.SUFFIXES: .c .o .x .e32 .S | ||
|
||
main.x: main.c | ||
$(CC) $(CFLAGS) $(INCS) main.c -o main.x $(LIBS) | ||
|
||
shmem_tfunc.e32: shmem_tfunc.c | ||
coprcc $(INCS) $(EDEFS) $(ELIBS) shmem_tfunc.c -o shmem_tfunc.e32 | ||
|
||
.c.o: | ||
$(CC) $(CFLAGS) $(INCS) -c $< | ||
|
||
info: $(KERNEL) | ||
coprcc-info -j -l 100 shmem_tfunc.e32 | ||
|
||
clean: | ||
rm -f *.o | ||
rm -f $(TARGETS) | ||
|
||
distclean: clean |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
#include <stdint.h> | ||
#include "ctimer.h" | ||
|
||
#define NDEFAULT 16 // T[] is 2^N | ||
#define NCORESLOG2DEFAULT 4 // cores = 2^C | ||
#define A(i) (1 + (i) + (i) * (i)) | ||
|
||
typedef struct my_args | ||
{ | ||
uint32_t* T; | ||
uint32_t N; | ||
uint32_t time; | ||
} my_args_t; | ||
|
||
inline __attribute__((__always_inline__)) | ||
uint32_t INDEX(uint32_t i) // pseudo-random index access | ||
{ | ||
uint32_t a = i; | ||
a *= 1234567891; | ||
a ^= a << 13; | ||
a ^= a >> 17; | ||
a ^= a << 5; | ||
return a; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
#include <stdio.h> | ||
#include <string.h> | ||
#include "gups.h" | ||
#include "coprthr_cc.h" | ||
#include "coprthr_thread.h" | ||
|
||
uint32_t RandomAccessHost(uint32_t* T, uint32_t N) | ||
{ | ||
ctimer_start(); | ||
uint64_t t0 = ctimer(); | ||
|
||
uint32_t kshift = 32 - N; | ||
uint32_t I = 1 << (N + 2); | ||
for (uint32_t i = 0; i < I; i++) | ||
{ | ||
uint32_t a = INDEX(i); | ||
uint32_t k = a >> kshift; | ||
uint32_t t = T[k]; | ||
uint32_t t_new = t ^ a; | ||
T[k] = t_new; | ||
} | ||
|
||
t0 -= ctimer(); | ||
return ctimer_nsec(t0); | ||
} | ||
|
||
uint32_t RandomAccessEpiphany(uint32_t* T, uint32_t N, uint32_t C) | ||
{ | ||
uint32_t K = 1 << N; | ||
size_t Ksz = K * sizeof(uint32_t); | ||
|
||
// open device for threads | ||
int dd = coprthr_dopen(COPRTHR_DEVICE_E32, COPRTHR_O_THREAD); | ||
if (dd < 0) { | ||
fprintf(stderr, "device open failed\n"); | ||
return 0; | ||
} | ||
|
||
// Allocate shared DRAM | ||
coprthr_mem_t T_mem = coprthr_dmalloc(dd, Ksz, 0); | ||
coprthr_mem_t argmem = coprthr_dmalloc(dd, sizeof(my_args_t), 0); | ||
|
||
// Write to shared DRAM | ||
coprthr_dwrite(dd, T_mem, 0, T, Ksz, COPRTHR_E_WAIT); | ||
|
||
my_args_t* args = (my_args_t*)coprthr_memptr(argmem, 0); | ||
*args = (my_args_t){.N = N, .T = coprthr_memptr(T_mem,0)}; | ||
|
||
// Select program and thread function | ||
coprthr_program_t prg = coprthr_cc_read_bin("./shmem_tfunc.e32", 0); | ||
coprthr_sym_t thr = coprthr_getsym(prg, "SHMEMRandomAccessPowerOf2"); | ||
|
||
// launch threads | ||
coprthr_attr_t attr; | ||
coprthr_attr_init(&attr); | ||
coprthr_attr_setdetachstate(&attr, COPRTHR_CREATE_JOINABLE); | ||
coprthr_attr_setdevice(&attr, dd); | ||
coprthr_td_t td; | ||
coprthr_ncreate(C, &td, &attr, thr, (void*)&argmem); | ||
void* status; | ||
coprthr_join(td, &status); | ||
|
||
// read back data from memory on device | ||
coprthr_dread(dd, T_mem, 0, T, Ksz, COPRTHR_E_WAIT); | ||
|
||
// cleanup | ||
coprthr_attr_destroy(&attr); | ||
coprthr_dfree(dd, T_mem); | ||
coprthr_dfree(dd, argmem); | ||
|
||
return args->time; | ||
} | ||
|
||
int main(int argc, char* argv[]) | ||
{ | ||
uint32_t time_cpu, time_e32; | ||
uint32_t N = NDEFAULT; | ||
uint32_t NC = NCORESLOG2DEFAULT; | ||
|
||
int i = 1; | ||
while (i < argc) { | ||
if (!strncmp(argv[i],"-n",2)) N = atoi(argv[++i]); | ||
else if (!strncmp(argv[i],"-c",2)) NC = atoi(argv[++i]); | ||
else if (!strncmp(argv[i],"-h",2) || !strncmp(argv[i],"--help",6)) goto help; | ||
else { | ||
fprintf(stderr, "unrecognized option: %s\n",argv[i]); | ||
help: | ||
printf("Usage: %s [OPTION]...\n" | ||
"Parallel 32-bit on-chip implementation of Random Access measuring Giga Updates Per Second (GUPS)\n" | ||
" -n log2 of size. Default: %u (size = %u)\n" | ||
" -c log2 of number of Epiphany cores. Default: %u (cores = %u)\n" | ||
" -h, --help print this\n", | ||
argv[0], NDEFAULT, 1 << NDEFAULT, | ||
NCORESLOG2DEFAULT, 1 << NCORESLOG2DEFAULT); | ||
return 0; | ||
} | ||
++i; | ||
} | ||
if (NC > NCORESLOG2DEFAULT) { | ||
fprintf(stderr, "Try a smaller number of cores, C (%u)\n", NCORESLOG2DEFAULT); | ||
return 1; | ||
} | ||
uint32_t Nlimit = NDEFAULT - (NCORESLOG2DEFAULT - NC); | ||
if (N > Nlimit) { | ||
fprintf(stderr, "Try a smaller size, N (%u)\n", Nlimit); | ||
return 1; | ||
} | ||
|
||
uint32_t K = 1 << N; | ||
uint32_t I = 1 << (N + 2); | ||
uint32_t C = 1 << NC; | ||
|
||
uint32_t* T = calloc(K, sizeof(*T)); | ||
uint32_t* Thost = calloc(K, sizeof(*Thost)); | ||
if (!T || !Thost) { | ||
fprintf(stderr, "Could not allocate memory\n"); | ||
return 1; | ||
} | ||
|
||
// initializing T | ||
srand(12345); | ||
for (int k = 0; k < K; k++) T[k] = rand(); | ||
memcpy(Thost, T, K * sizeof(*Thost)); | ||
|
||
time_e32 = RandomAccessEpiphany(T, N, C); | ||
time_cpu = RandomAccessHost(Thost, N); | ||
|
||
int err = 0; | ||
for (int k = 0; k < K; k++) if (T[k] != Thost[k]) err++; | ||
|
||
printf("N = %u (2^N = %u), cores = %u, err = %d (\x1b[3%dm%0.2f%%\x1b[0m)\n", | ||
N, K, C, err, (err*100 >= K) ? 1 : 2, (double)err*100.0/(double)K); | ||
printf("GUPS = %0.4f (host = %0.4f)\n", (double)I/(double)time_e32, (double)I/(double)time_cpu); | ||
|
||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
/* | ||
* This routine performs a distributed RandomAccess Benchmark. Instead of | ||
* 64-bit values, 32-bit values are used. Also, a different PRNG is used | ||
* because GF(2) at small indices isn't very random. It is also non-trivial to | ||
* calculate the n'th value. | ||
* | ||
* -JAR | ||
*/ | ||
|
||
#include <host_stdio.h> | ||
#include "gups.h" | ||
|
||
#define SHMEM_USE_HEADER_ONLY | ||
|
||
#include "shmem.h" | ||
#include "shmemx.h" | ||
|
||
inline __attribute__((__always_inline__)) | ||
uint32_t ctz(register uint32_t x) | ||
{ | ||
x &= -x; | ||
x -= 1; | ||
x -= ((x >> 1) & 0x55555555); | ||
x = (((x >> 2) & 0x33333333) + (x & 0x33333333)); | ||
x = (((x >> 4) + x) & 0x0f0f0f0f); | ||
x += (x >> 8); | ||
x += (x >> 16); | ||
return (x & 0x0000003f); | ||
} | ||
|
||
void __entry | ||
SHMEMRandomAccessPowerOf2( my_args_t* args ) | ||
{ | ||
uint32_t* T = args->T; | ||
uint32_t N = args->N; | ||
uint32_t Np2 = N + 2; | ||
uint32_t K = 1 << N; | ||
uint32_t I = 1 << Np2; | ||
uint32_t N32 = 32 - N; | ||
|
||
shmem_init(); | ||
int me = shmem_my_pe(); | ||
int npes = shmem_n_pes(); | ||
|
||
uint32_t log2_npes = ctz(npes); | ||
uint32_t peshift = N - log2_npes; | ||
uint32_t shift = N - 32; | ||
uint32_t kmask0 = (0xffffffff << shift) >> shift; | ||
shift = 32 - peshift; | ||
uint32_t kmask1 = (0xffffffff << shift) >> shift; | ||
|
||
uint32_t npart = K >> log2_npes; | ||
int ipart = I >> log2_npes; | ||
int istart = ipart * me; | ||
int iend = ipart * (me + 1); | ||
size_t nsz = npart * sizeof(uint32_t); | ||
uint32_t* t = shmem_align(0x2000, nsz); | ||
shmemx_memcpy(t, T + npart * me, nsz); | ||
|
||
shmem_barrier_all(); | ||
|
||
ctimer_start(); | ||
uint32_t time = ctimer(); | ||
|
||
for (int i = istart; i < iend; i++) { | ||
// pseudo random number 'a', can trivially calculate n'th value | ||
uint32_t a = INDEX(i); | ||
uint32_t k_global = a >> N32; | ||
int k = k_global & kmask1; | ||
int q = k_global >> peshift; // the processor number | ||
if (N > 11) { // The percentage of errors is high for small N... | ||
uint64_t* ptk = shmem_ptr(t + k, q); | ||
*ptk ^= a; | ||
} else { // ...so use atomic xor to be error-free | ||
shmem_uint32_atomic_xor(t + k, a, q); | ||
} | ||
} | ||
|
||
shmem_barrier_all(); | ||
time -= ctimer(); | ||
time = ctimer_nsec(time); | ||
|
||
// Copying local results to DRAM | ||
shmemx_memcpy(T + npart * me, t, nsz); | ||
if (!me) args->time = time; | ||
|
||
shmem_free(t); | ||
|
||
shmem_finalize(); | ||
} |