-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathput_nb_dual.c
115 lines (95 loc) · 3.48 KB
/
put_nb_dual.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
/*
* Copyright (c) 2016 U.S. Army Research laboratory. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* This software was developed by Brown Deer Technology, LLC. with Copyright
* assigned to the US Army Research laboratory as required by contract.
*/
/*
* Performance test for shmem_putmem_nbi (latency and bandwidth)
*/
#include <host_stdio.h>
#include <shmem.h>
#include "ctimer.h"
#define NELEMENT 8192
#ifndef NLOOP
#define NLOOP 1024
#endif
int main (void)
{
int i, nelement;
static int ti, tsum;
static int pWrk[SHMEM_REDUCE_MIN_WRKDATA_SIZE];
static long pSync[SHMEM_REDUCE_SYNC_SIZE];
for (i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i++) {
pSync[i] = SHMEM_SYNC_VALUE;
}
shmem_init();
int me = shmem_my_pe();
int npes = shmem_n_pes();
int nxtpe = me + 1;
if (nxtpe >= npes) nxtpe -= npes;
char* source = (char*)shmem_align(NELEMENT * sizeof(char), 0x2000);
char* target = (char*)shmem_align(NELEMENT * sizeof(char), 0x2000);
for (i = 0; i < NELEMENT; i++) {
source[i] = (char)(i + 1);
}
if (me == 0) {
host_printf("# SHMEM Non-Blocking PutMem, Dual-Issue Performance\n" \
"# Bytes\tLatency (nanoseconds)\n");
}
/* For int put we take average of all the times realized by a pair of PEs,
thus reducing effects of physical location of PEs */
for (nelement = 2; nelement <= NELEMENT; nelement <<= 1)
{
unsigned int n2 = nelement >> 1;
for (i = 0; i < NELEMENT; i++) { // reset values for each iteration
target[i] = 0xff;
}
shmem_barrier_all();
ctimer_start();
unsigned int t = ctimer();
for (i = 0; i < NLOOP; i++) {
shmem_putmem_nbi(target, source, n2, nxtpe);
shmem_putmem_nbi(target + n2, source + n2, n2, nxtpe);
}
shmem_quiet();
t -= ctimer();
ti = (int)t;
shmem_int_sum_to_all(&tsum, &ti, 1, 0, 0, npes, pWrk, pSync);
if (me == 0) {
int bytes = nelement * sizeof(*source);
unsigned int nsec = ctimer_nsec(tsum / (npes * NLOOP));
host_printf("%6d %7u\n", bytes, nsec);
}
int err = 0;
for (i = 0; i < nelement; i++) if (target[i] != source[i]) err++;
for (i = nelement; i < NELEMENT; i++) if (target[i] != 0xff) err++;
if (err) host_printf("# %d: ERROR: %d incorrect value(s) copied\n", me, err);
}
shmem_free(target);
shmem_free(source);
shmem_finalize();
return 0;
}