-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathfcollect32.c
115 lines (98 loc) · 3.4 KB
/
fcollect32.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
/*
* Copyright (c) 2016 U.S. Army Research laboratory. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* This software was developed by Brown Deer Technology, LLC. with Copyright
* assigned to the US Army Research laboratory as required by contract.
*/
/*
* Performance test for shmem_fcollect32
*/
#include <host_stdio.h>
#include <shmem.h>
#include "ctimer.h"
#define NELEMENT 256
#ifndef NLOOP
#define NLOOP 1024
#endif
int main (void)
{
int i, j, nelement;
static int ti, tsum;
static long pSyncA[SHMEM_COLLECT_SYNC_SIZE];
static long pSyncB[SHMEM_COLLECT_SYNC_SIZE];
static int pWrk[SHMEM_REDUCE_MIN_WRKDATA_SIZE];
static long pSync[SHMEM_REDUCE_SYNC_SIZE];
for (i = 0; i < SHMEM_COLLECT_SYNC_SIZE; i++) {
pSyncA[i] = SHMEM_SYNC_VALUE;
pSyncB[i] = SHMEM_SYNC_VALUE;
}
for (i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i++) {
pSync[i] = SHMEM_SYNC_VALUE;
}
shmem_init();
int me = shmem_my_pe();
int npes = shmem_n_pes();
int* source = (int*)shmem_malloc(NELEMENT * sizeof (*source));
int* target = (int*)shmem_malloc(NELEMENT * sizeof (*target) * npes);
for (i = 0; i < NELEMENT; i++) {
source[i] = ((i + 1) << 10) + me;
}
if (me == 0) {
host_printf("# SHMEM Fcollect32 times for NPES = %d\n" \
"# Bytes\tLatency (nanoseconds)\n", npes);
}
for (nelement = 1; nelement <= NELEMENT; nelement <<= 1)
{
for (i = 0; i < nelement * npes; i++) {
target[i] = -90;
}
shmem_barrier_all();
ctimer_start();
unsigned int t = ctimer();
for (i = 0; i < NLOOP; i += 2) {
shmem_fcollect32(target, source, nelement, 0, 0, npes, pSyncA);
shmem_fcollect32(target, source, nelement, 0, 0, npes, pSyncB);
}
t -= ctimer();
ti = (int)t;
shmem_int_sum_to_all(&tsum, &ti, 1, 0, 0, npes, pWrk, pSync);
if (me == 0) {
int bytes = nelement * sizeof(*source);
unsigned int nsec = ctimer_nsec(tsum / (npes * NLOOP));
host_printf("%5d %7u\n", bytes, nsec);
}
int err = 0;
for (j = 0; j < npes; j++) {
for (i = 0; i < nelement; i++) {
if (target[j*nelement + i] != (((i + 1) << 10) + j)) err++;
}
}
if (err) host_printf("%d: %d ERRORS\n", me, err);
}
shmem_free(target);
shmem_free(source);
shmem_finalize();
return 0;
}