Skip to content

Commit 5f74be7

Browse files
committed
add shmem_sync/shmem_sync_all support and bug fix
1 parent 5a3fd81 commit 5f74be7

File tree

8 files changed

+199
-73
lines changed

8 files changed

+199
-73
lines changed

src/shmem.h

Lines changed: 43 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -171,9 +171,9 @@ F(uint,unsigned int) \
171171
F(ulong,unsigned long) \
172172
F(ulonglong,unsigned long long) \
173173
F(int32,int32_t) \
174-
F(int64,int64_t) /*\
174+
F(int64,int64_t) \
175175
F(uint32,uint32_t) \
176-
F(uint64,uint64_t)*/
176+
F(uint64,uint64_t)
177177

178178
#define DECL_P2P(F) \
179179
F(short,short) \
@@ -317,8 +317,8 @@ DECL_P2P(DECL_SHMEM_X_TEST)
317317
SHMEM_SCOPE void shmem_barrier(int PE_start, int logPE_stride, int PE_size, long *pSync);
318318
SHMEM_SCOPE void shmem_barrier_all(void);
319319

320-
#define shmem_sync(...) shmem_barrier(__VA_ARGS__)
321-
#define shmem_sync_all(...) shmem_barrier_all(__VA_ARGS__)
320+
SHMEM_SCOPE void shmem_sync(int PE_start, int logPE_stride, int PE_size, long *pSync);
321+
SHMEM_SCOPE void shmem_sync_all(void);
322322

323323
#define DECL_SHMEM_X_TO_ALL(N,T) \
324324
SHMEM_SCOPE void shmem_##N##_to_all(T *dest, const T *source, int nreduce, int PE_start, int logPE_stride, int PE_size, T *pWrk, long *pSync);
@@ -593,14 +593,45 @@ DECL_SHMEM_TYPE_RMA(ptrdiff_t, ptrdiff, 32)
593593
#define shmem_wait_until(ivar,cmp,cmp_value) DECL_GENERIC_P2P(ivar,SHMEM_WAIT_UNTIL_GENERIC)(ivar,cmp,cmp_value)
594594
#define shmem_test(ivar,cmp,value) DECL_GENERIC_P2P(ivar,SHMEM_TEST_GENERIC)(ivar,cmp,value)
595595

596-
#define shmem_finc(...) shmem_atomic_fetch_inc(__VA_ARGS__)
597-
#define shmem_inc(...) shmem_atomic_inc(__VA_ARGS__)
598-
#define shmem_fadd(...) shmem_atomic_fetch_add(__VA_ARGS__)
599-
#define shmem_add(...) shmem_atomic_add(__VA_ARGS__)
600-
#define shmem_cswap(...) shmem_atomic_compare_swap(__VA_ARGS__)
601-
#define shmem_swap(...) shmem_atomic_swap(__VA_ARGS__)
602-
#define shmem_fetch(...) shmem_atomic_fetch(__VA_ARGS__)
603-
#define shmem_set(...) shmem_atomic_set(__VA_ARGS__)
596+
#define shmem_add shmem_atomic_add
597+
#define shmem_cswap shmem_atomic_compare_swap
598+
#define shmem_fadd shmem_atomic_fetch_add
599+
#define shmem_fetch shmem_atomic_fetch
600+
#define shmem_finc shmem_atomic_fetch_inc
601+
#define shmem_inc shmem_atomic_inc
602+
#define shmem_set shmem_atomic_set
603+
#define shmem_swap shmem_atomic_swap
604+
605+
#define shmem_int_add shmem_int_atomic_add
606+
#define shmem_long_add shmem_long_atomic_add
607+
#define shmem_longlong_add shmem_longlong_atomic_add
608+
#define shmem_int_cswap shmem_int_atomic_compare_swap
609+
#define shmem_long_cswap shmem_long_atomic_compare_swap
610+
#define shmem_longlong_cswap shmem_longlong_atomic_compare_swap
611+
#define shmem_int_fadd shmem_int_atomic_fetch_add
612+
#define shmem_long_fadd shmem_long_atomic_fetch_add
613+
#define shmem_longlong_fadd shmem_longlong_atomic_fetch_add
614+
#define shmem_int_fetch shmem_int_atomic_fetch
615+
#define shmem_long_fetch shmem_long_atomic_fetch
616+
#define shmem_longlong_fetch shmem_longlong_atomic_fetch
617+
#define shmem_float_fetch shmem_float_atomic_fetch
618+
#define shmem_double_fetch shmem_double_atomic_fetch
619+
#define shmem_int_finc shmem_int_atomic_fetch_inc
620+
#define shmem_long_finc shmem_long_atomic_fetch_inc
621+
#define shmem_longlong_finc shmem_longlong_atomic_fetch_inc
622+
#define shmem_int_inc shmem_int_atomic_inc
623+
#define shmem_long_inc shmem_long_atomic_inc
624+
#define shmem_longlong_inc shmem_longlong_atomic_inc
625+
#define shmem_int_set shmem_int_atomic_set
626+
#define shmem_long_set shmem_long_atomic_set
627+
#define shmem_longlong_set shmem_longlong_atomic_set
628+
#define shmem_float_set shmem_float_atomic_set
629+
#define shmem_double_set shmem_double_atomic_set
630+
#define shmem_int_swap shmem_int_atomic_swap
631+
#define shmem_long_swap shmem_long_atomic_swap
632+
#define shmem_longlong_swap shmem_longlong_atomic_swap
633+
#define shmem_float_swap shmem_float_atomic_swap
634+
#define shmem_double_swap shmem_double_atomic_swap
604635

605636
#define __put_nbi(dest,source,nelems,pe) DECL_GENERIC_STANDARD_RMA(dest,SHMEM_PUT_NBI_GENERIC)(dest,source,nelems,pe)
606637
#define __get_nbi(dest,source,nelems,pe) DECL_GENERIC_STANDARD_RMA(dest,SHMEM_GET_NBI_GENERIC)(dest,source,nelems,pe)

src/shmem_barrier.c

Lines changed: 3 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -34,39 +34,12 @@
3434
extern "C" {
3535
#endif
3636

37-
SHMEM_SCOPE void SHMEM_INLINE
38-
__shmem_barrier_lte2(int PE_start, int logPE_stride, int PE_size, long *pSync)
39-
{ /* Routine for PE_size <= 2. Looping over shmem_barrier() for npes = 2 may
40-
* not work correctly. Solution requires using testset because only
41-
* sychronization stage may not be reset before subsequent call */
42-
if (PE_size == 1) return;
43-
int PE_step = 0x1 << logPE_stride;
44-
if (__shmem.my_pe != PE_start) PE_step *= -1;
45-
int to = __shmem.my_pe + PE_step;
46-
volatile long* lock = (volatile long*)pSync;
47-
__shmem_set_lock((long*)shmem_ptr((void*)lock, to));
48-
while (*lock == SHMEM_SYNC_VALUE);
49-
*lock = 0;
50-
}
51-
5237
SHMEM_SCOPE void
5338
shmem_barrier(int PE_start, int logPE_stride, int PE_size, long *pSync)
5439
{
55-
if (PE_size < 3) return __shmem_barrier_lte2(PE_start, logPE_stride, PE_size, pSync);
56-
int PE_size_stride = PE_size << logPE_stride;
57-
int PE_end = PE_size_stride + PE_start;
58-
59-
int c, r;
60-
for (c = 0, r = (1 << logPE_stride); r < PE_size_stride; c++, r <<= 1)
61-
{
62-
int to = __shmem.my_pe + r;
63-
if (to >= PE_end) to -= PE_size_stride;
64-
volatile long* lock = (volatile long*)(pSync + c);
65-
long * remote_lock = (long*)shmem_ptr((void*)lock, to);
66-
*remote_lock = 1;
67-
while (*lock == SHMEM_SYNC_VALUE);
68-
*lock = SHMEM_SYNC_VALUE;
69-
}
40+
shmem_quiet();
41+
shmem_sync(PE_start, logPE_stride, PE_size, pSync);
42+
__shmem.dma_used = 0; // reset
7043
}
7144

7245
#ifdef __cplusplus

src/shmem_barrier_all.c

Lines changed: 1 addition & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -34,41 +34,14 @@
3434
extern "C" {
3535
#endif
3636

37-
#ifdef SHMEM_USE_WAND_BARRIER
38-
3937
SHMEM_SCOPE void
4038
shmem_barrier_all(void)
4139
{
4240
shmem_quiet();
43-
__asm__ __volatile__ (
44-
"gid \n" // disable interrupts
45-
"wand \n" // wait on AND
46-
".balignw 8,0x01a2 \n" // nop align gie/idle pair to block
47-
"gie \n" // enable interrupts
48-
"idle \n" // to go sleep
49-
);
41+
shmem_sync_all();
5042
__shmem.dma_used = 0; // reset
5143
}
5244

53-
#else
54-
55-
SHMEM_SCOPE void
56-
shmem_barrier_all(void)
57-
{
58-
shmem_quiet();
59-
int c;
60-
for (c = 0; c < __shmem.n_pes_log2; c++)
61-
{
62-
volatile long* lock = (volatile long*)(__shmem.barrier_sync + c);
63-
*(__shmem.barrier_psync[c]) = 1;
64-
while (*lock == SHMEM_SYNC_VALUE);
65-
*lock = SHMEM_SYNC_VALUE;
66-
}
67-
__shmem.dma_used = 0; // reset
68-
}
69-
70-
#endif
71-
7245
#ifdef __cplusplus
7346
}
7447
#endif

src/shmem_header_only.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,8 @@
202202
#include "shmem_size_atomic_inc.c"
203203
#include "shmem_size_atomic_set.c"
204204
#include "shmem_size_atomic_swap.c"
205+
#include "shmem_sync.c"
206+
#include "shmem_sync_all.c"
205207
#include "shmem_test_lock.c"
206208
#include "shmem_uint32_atomic_add.c"
207209
#include "shmem_uint32_atomic_and.c"

src/shmem_quiet.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,15 @@ shmem_quiet(void)
4949
" bne .Loop%= \n" // spin until both complete
5050
: : : "r0", "r1", "r2", "cc"
5151
);
52+
// XXX This isn't a great way to guarantee the data has finished
5253
if (__shmem.cdst0) {
53-
while(*__shmem.cdst0 == __shmem.csrc0);
54+
if(*__shmem.cdst0 == __shmem.csrc0);
55+
*__shmem.cdst0 = ~__shmem.csrc0;
5456
__shmem.cdst0 = 0;
5557
}
5658
if (__shmem.cdst1) {
57-
while(*__shmem.cdst1 == __shmem.csrc1);
59+
if(*__shmem.cdst1 == __shmem.csrc1);
60+
*__shmem.cdst1 = ~__shmem.csrc1;
5861
__shmem.cdst1 = 0;
5962
}
6063
}

src/shmem_sync.c

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/*
2+
* Copyright (c) 2016-2017 U.S. Army Research laboratory. All rights reserved.
3+
*
4+
* Redistribution and use in source and binary forms, with or without
5+
* modification, are permitted provided that the following conditions are met:
6+
*
7+
* 1. Redistributions of source code must retain the above copyright notice,
8+
* this list of conditions and the following disclaimer.
9+
*
10+
* 2. Redistributions in binary form must reproduce the above copyright notice,
11+
* this list of conditions and the following disclaimer in the documentation
12+
* and/or other materials provided with the distribution.
13+
*
14+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17+
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
18+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24+
* POSSIBILITY OF SUCH DAMAGE.
25+
*
26+
* This software was developed by Brown Deer Technology, LLC. with Copyright
27+
* assigned to the US Army Research laboratory as required by contract.
28+
*/
29+
30+
#include "internals.h"
31+
#include "shmem.h"
32+
33+
#ifdef __cplusplus
34+
extern "C" {
35+
#endif
36+
37+
SHMEM_SCOPE void SHMEM_INLINE
38+
__shmem_sync_lte2(int PE_start, int logPE_stride, int PE_size, long *pSync)
39+
{ /* Routine for PE_size <= 2. Looping over shmem_barrier() for npes = 2 may
40+
* not work correctly. Solution requires using testset because only
41+
* sychronization stage may not be reset before subsequent call */
42+
if (PE_size == 1) return;
43+
int PE_step = 0x1 << logPE_stride;
44+
if (__shmem.my_pe != PE_start) PE_step *= -1;
45+
int to = __shmem.my_pe + PE_step;
46+
volatile long* lock = (volatile long*)pSync;
47+
__shmem_set_lock((long*)shmem_ptr((void*)lock, to));
48+
while (*lock == SHMEM_SYNC_VALUE);
49+
*lock = 0;
50+
}
51+
52+
SHMEM_SCOPE void
53+
shmem_sync(int PE_start, int logPE_stride, int PE_size, long *pSync)
54+
{
55+
if (PE_size < 3) return __shmem_sync_lte2(PE_start, logPE_stride, PE_size, pSync);
56+
int PE_size_stride = PE_size << logPE_stride;
57+
int PE_end = PE_size_stride + PE_start;
58+
59+
int c, r;
60+
for (c = 0, r = (1 << logPE_stride); r < PE_size_stride; c++, r <<= 1)
61+
{
62+
int to = __shmem.my_pe + r;
63+
if (to >= PE_end) to -= PE_size_stride;
64+
volatile long* lock = (volatile long*)(pSync + c);
65+
long * remote_lock = (long*)shmem_ptr((void*)lock, to);
66+
*remote_lock = 1;
67+
while (*lock == SHMEM_SYNC_VALUE);
68+
*lock = SHMEM_SYNC_VALUE;
69+
}
70+
}
71+
72+
#ifdef __cplusplus
73+
}
74+
#endif

src/shmem_sync_all.c

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
/*
2+
* Copyright (c) 2016-2017 U.S. Army Research laboratory. All rights reserved.
3+
*
4+
* Redistribution and use in source and binary forms, with or without
5+
* modification, are permitted provided that the following conditions are met:
6+
*
7+
* 1. Redistributions of source code must retain the above copyright notice,
8+
* this list of conditions and the following disclaimer.
9+
*
10+
* 2. Redistributions in binary form must reproduce the above copyright notice,
11+
* this list of conditions and the following disclaimer in the documentation
12+
* and/or other materials provided with the distribution.
13+
*
14+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17+
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
18+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24+
* POSSIBILITY OF SUCH DAMAGE.
25+
*
26+
* This software was developed by Brown Deer Technology, LLC. with Copyright
27+
* assigned to the US Army Research laboratory as required by contract.
28+
*/
29+
30+
#include "internals.h"
31+
#include "shmem.h"
32+
33+
#ifdef __cplusplus
34+
extern "C" {
35+
#endif
36+
37+
#ifdef SHMEM_USE_WAND_BARRIER
38+
39+
SHMEM_SCOPE void
40+
shmem_sync_all(void)
41+
{
42+
__asm__ __volatile__ (
43+
"gid \n" // disable interrupts
44+
"wand \n" // wait on AND
45+
".balignw 8,0x01a2 \n" // nop align gie/idle pair to block
46+
"gie \n" // enable interrupts
47+
"idle \n" // to go sleep
48+
);
49+
}
50+
51+
#else
52+
53+
SHMEM_SCOPE void
54+
shmem_sync_all(void)
55+
{
56+
int c;
57+
for (c = 0; c < __shmem.n_pes_log2; c++)
58+
{
59+
volatile long* lock = (volatile long*)(__shmem.barrier_sync + c);
60+
*(__shmem.barrier_psync[c]) = 1;
61+
while (*lock == SHMEM_SYNC_VALUE);
62+
*lock = SHMEM_SYNC_VALUE;
63+
}
64+
}
65+
66+
#endif
67+
68+
#ifdef __cplusplus
69+
}
70+
#endif

src/shmemx_memcpy_nbi.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ shmemx_memcpy_nbi(void *dst, const void *src, size_t nbytes)
5555
__shmem.dma_desc.outer_stride = stride,
5656
__shmem.dma_desc.config = config;
5757
__shmem.dma_used = 1;
58-
#if 0 // XXX dual channel DMA may be unstable
58+
#if 1 // XXX dual channel DMA may be unstable
5959
unsigned int dmachannel;
6060
__asm__ __volatile__ (
6161
"mov r0, #15 \n"

0 commit comments

Comments
 (0)