Skip to content

Commit

Permalink
add shmem_sync/shmem_sync_all support and bug fix
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesaross committed Dec 4, 2017
1 parent 5a3fd81 commit 5f74be7
Show file tree
Hide file tree
Showing 8 changed files with 199 additions and 73 deletions.
55 changes: 43 additions & 12 deletions src/shmem.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,9 +171,9 @@ F(uint,unsigned int) \
F(ulong,unsigned long) \
F(ulonglong,unsigned long long) \
F(int32,int32_t) \
F(int64,int64_t) /*\
F(int64,int64_t) \
F(uint32,uint32_t) \
F(uint64,uint64_t)*/
F(uint64,uint64_t)

#define DECL_P2P(F) \
F(short,short) \
Expand Down Expand Up @@ -317,8 +317,8 @@ DECL_P2P(DECL_SHMEM_X_TEST)
SHMEM_SCOPE void shmem_barrier(int PE_start, int logPE_stride, int PE_size, long *pSync);
SHMEM_SCOPE void shmem_barrier_all(void);

#define shmem_sync(...) shmem_barrier(__VA_ARGS__)
#define shmem_sync_all(...) shmem_barrier_all(__VA_ARGS__)
SHMEM_SCOPE void shmem_sync(int PE_start, int logPE_stride, int PE_size, long *pSync);
SHMEM_SCOPE void shmem_sync_all(void);

#define DECL_SHMEM_X_TO_ALL(N,T) \
SHMEM_SCOPE void shmem_##N##_to_all(T *dest, const T *source, int nreduce, int PE_start, int logPE_stride, int PE_size, T *pWrk, long *pSync);
Expand Down Expand Up @@ -593,14 +593,45 @@ DECL_SHMEM_TYPE_RMA(ptrdiff_t, ptrdiff, 32)
#define shmem_wait_until(ivar,cmp,cmp_value) DECL_GENERIC_P2P(ivar,SHMEM_WAIT_UNTIL_GENERIC)(ivar,cmp,cmp_value)
#define shmem_test(ivar,cmp,value) DECL_GENERIC_P2P(ivar,SHMEM_TEST_GENERIC)(ivar,cmp,value)

#define shmem_finc(...) shmem_atomic_fetch_inc(__VA_ARGS__)
#define shmem_inc(...) shmem_atomic_inc(__VA_ARGS__)
#define shmem_fadd(...) shmem_atomic_fetch_add(__VA_ARGS__)
#define shmem_add(...) shmem_atomic_add(__VA_ARGS__)
#define shmem_cswap(...) shmem_atomic_compare_swap(__VA_ARGS__)
#define shmem_swap(...) shmem_atomic_swap(__VA_ARGS__)
#define shmem_fetch(...) shmem_atomic_fetch(__VA_ARGS__)
#define shmem_set(...) shmem_atomic_set(__VA_ARGS__)
#define shmem_add shmem_atomic_add
#define shmem_cswap shmem_atomic_compare_swap
#define shmem_fadd shmem_atomic_fetch_add
#define shmem_fetch shmem_atomic_fetch
#define shmem_finc shmem_atomic_fetch_inc
#define shmem_inc shmem_atomic_inc
#define shmem_set shmem_atomic_set
#define shmem_swap shmem_atomic_swap

#define shmem_int_add shmem_int_atomic_add
#define shmem_long_add shmem_long_atomic_add
#define shmem_longlong_add shmem_longlong_atomic_add
#define shmem_int_cswap shmem_int_atomic_compare_swap
#define shmem_long_cswap shmem_long_atomic_compare_swap
#define shmem_longlong_cswap shmem_longlong_atomic_compare_swap
#define shmem_int_fadd shmem_int_atomic_fetch_add
#define shmem_long_fadd shmem_long_atomic_fetch_add
#define shmem_longlong_fadd shmem_longlong_atomic_fetch_add
#define shmem_int_fetch shmem_int_atomic_fetch
#define shmem_long_fetch shmem_long_atomic_fetch
#define shmem_longlong_fetch shmem_longlong_atomic_fetch
#define shmem_float_fetch shmem_float_atomic_fetch
#define shmem_double_fetch shmem_double_atomic_fetch
#define shmem_int_finc shmem_int_atomic_fetch_inc
#define shmem_long_finc shmem_long_atomic_fetch_inc
#define shmem_longlong_finc shmem_longlong_atomic_fetch_inc
#define shmem_int_inc shmem_int_atomic_inc
#define shmem_long_inc shmem_long_atomic_inc
#define shmem_longlong_inc shmem_longlong_atomic_inc
#define shmem_int_set shmem_int_atomic_set
#define shmem_long_set shmem_long_atomic_set
#define shmem_longlong_set shmem_longlong_atomic_set
#define shmem_float_set shmem_float_atomic_set
#define shmem_double_set shmem_double_atomic_set
#define shmem_int_swap shmem_int_atomic_swap
#define shmem_long_swap shmem_long_atomic_swap
#define shmem_longlong_swap shmem_longlong_atomic_swap
#define shmem_float_swap shmem_float_atomic_swap
#define shmem_double_swap shmem_double_atomic_swap

#define __put_nbi(dest,source,nelems,pe) DECL_GENERIC_STANDARD_RMA(dest,SHMEM_PUT_NBI_GENERIC)(dest,source,nelems,pe)
#define __get_nbi(dest,source,nelems,pe) DECL_GENERIC_STANDARD_RMA(dest,SHMEM_GET_NBI_GENERIC)(dest,source,nelems,pe)
Expand Down
33 changes: 3 additions & 30 deletions src/shmem_barrier.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,39 +34,12 @@
extern "C" {
#endif

SHMEM_SCOPE void SHMEM_INLINE
__shmem_barrier_lte2(int PE_start, int logPE_stride, int PE_size, long *pSync)
{ /* Routine for PE_size <= 2. Looping over shmem_barrier() for npes = 2 may
* not work correctly. Solution requires using testset because only
* sychronization stage may not be reset before subsequent call */
if (PE_size == 1) return;
int PE_step = 0x1 << logPE_stride;
if (__shmem.my_pe != PE_start) PE_step *= -1;
int to = __shmem.my_pe + PE_step;
volatile long* lock = (volatile long*)pSync;
__shmem_set_lock((long*)shmem_ptr((void*)lock, to));
while (*lock == SHMEM_SYNC_VALUE);
*lock = 0;
}

SHMEM_SCOPE void
shmem_barrier(int PE_start, int logPE_stride, int PE_size, long *pSync)
{
if (PE_size < 3) return __shmem_barrier_lte2(PE_start, logPE_stride, PE_size, pSync);
int PE_size_stride = PE_size << logPE_stride;
int PE_end = PE_size_stride + PE_start;

int c, r;
for (c = 0, r = (1 << logPE_stride); r < PE_size_stride; c++, r <<= 1)
{
int to = __shmem.my_pe + r;
if (to >= PE_end) to -= PE_size_stride;
volatile long* lock = (volatile long*)(pSync + c);
long * remote_lock = (long*)shmem_ptr((void*)lock, to);
*remote_lock = 1;
while (*lock == SHMEM_SYNC_VALUE);
*lock = SHMEM_SYNC_VALUE;
}
shmem_quiet();
shmem_sync(PE_start, logPE_stride, PE_size, pSync);
__shmem.dma_used = 0; // reset
}

#ifdef __cplusplus
Expand Down
29 changes: 1 addition & 28 deletions src/shmem_barrier_all.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,41 +34,14 @@
extern "C" {
#endif

#ifdef SHMEM_USE_WAND_BARRIER

SHMEM_SCOPE void
shmem_barrier_all(void)
{
shmem_quiet();
__asm__ __volatile__ (
"gid \n" // disable interrupts
"wand \n" // wait on AND
".balignw 8,0x01a2 \n" // nop align gie/idle pair to block
"gie \n" // enable interrupts
"idle \n" // to go sleep
);
shmem_sync_all();
__shmem.dma_used = 0; // reset
}

#else

SHMEM_SCOPE void
shmem_barrier_all(void)
{
shmem_quiet();
int c;
for (c = 0; c < __shmem.n_pes_log2; c++)
{
volatile long* lock = (volatile long*)(__shmem.barrier_sync + c);
*(__shmem.barrier_psync[c]) = 1;
while (*lock == SHMEM_SYNC_VALUE);
*lock = SHMEM_SYNC_VALUE;
}
__shmem.dma_used = 0; // reset
}

#endif

#ifdef __cplusplus
}
#endif
2 changes: 2 additions & 0 deletions src/shmem_header_only.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,8 @@
#include "shmem_size_atomic_inc.c"
#include "shmem_size_atomic_set.c"
#include "shmem_size_atomic_swap.c"
#include "shmem_sync.c"
#include "shmem_sync_all.c"
#include "shmem_test_lock.c"
#include "shmem_uint32_atomic_add.c"
#include "shmem_uint32_atomic_and.c"
Expand Down
7 changes: 5 additions & 2 deletions src/shmem_quiet.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,15 @@ shmem_quiet(void)
" bne .Loop%= \n" // spin until both complete
: : : "r0", "r1", "r2", "cc"
);
// XXX This isn't a great way to guarantee the data has finished
if (__shmem.cdst0) {
while(*__shmem.cdst0 == __shmem.csrc0);
if(*__shmem.cdst0 == __shmem.csrc0);
*__shmem.cdst0 = ~__shmem.csrc0;
__shmem.cdst0 = 0;
}
if (__shmem.cdst1) {
while(*__shmem.cdst1 == __shmem.csrc1);
if(*__shmem.cdst1 == __shmem.csrc1);
*__shmem.cdst1 = ~__shmem.csrc1;
__shmem.cdst1 = 0;
}
}
Expand Down
74 changes: 74 additions & 0 deletions src/shmem_sync.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/*
* Copyright (c) 2016-2017 U.S. Army Research laboratory. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* This software was developed by Brown Deer Technology, LLC. with Copyright
* assigned to the US Army Research laboratory as required by contract.
*/

#include "internals.h"
#include "shmem.h"

#ifdef __cplusplus
extern "C" {
#endif

SHMEM_SCOPE void SHMEM_INLINE
__shmem_sync_lte2(int PE_start, int logPE_stride, int PE_size, long *pSync)
{ /* Routine for PE_size <= 2. Looping over shmem_barrier() for npes = 2 may
* not work correctly. Solution requires using testset because only
* sychronization stage may not be reset before subsequent call */
if (PE_size == 1) return;
int PE_step = 0x1 << logPE_stride;
if (__shmem.my_pe != PE_start) PE_step *= -1;
int to = __shmem.my_pe + PE_step;
volatile long* lock = (volatile long*)pSync;
__shmem_set_lock((long*)shmem_ptr((void*)lock, to));
while (*lock == SHMEM_SYNC_VALUE);
*lock = 0;
}

SHMEM_SCOPE void
shmem_sync(int PE_start, int logPE_stride, int PE_size, long *pSync)
{
if (PE_size < 3) return __shmem_sync_lte2(PE_start, logPE_stride, PE_size, pSync);
int PE_size_stride = PE_size << logPE_stride;
int PE_end = PE_size_stride + PE_start;

int c, r;
for (c = 0, r = (1 << logPE_stride); r < PE_size_stride; c++, r <<= 1)
{
int to = __shmem.my_pe + r;
if (to >= PE_end) to -= PE_size_stride;
volatile long* lock = (volatile long*)(pSync + c);
long * remote_lock = (long*)shmem_ptr((void*)lock, to);
*remote_lock = 1;
while (*lock == SHMEM_SYNC_VALUE);
*lock = SHMEM_SYNC_VALUE;
}
}

#ifdef __cplusplus
}
#endif
70 changes: 70 additions & 0 deletions src/shmem_sync_all.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
* Copyright (c) 2016-2017 U.S. Army Research laboratory. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* This software was developed by Brown Deer Technology, LLC. with Copyright
* assigned to the US Army Research laboratory as required by contract.
*/

#include "internals.h"
#include "shmem.h"

#ifdef __cplusplus
extern "C" {
#endif

#ifdef SHMEM_USE_WAND_BARRIER

SHMEM_SCOPE void
shmem_sync_all(void)
{
__asm__ __volatile__ (
"gid \n" // disable interrupts
"wand \n" // wait on AND
".balignw 8,0x01a2 \n" // nop align gie/idle pair to block
"gie \n" // enable interrupts
"idle \n" // to go sleep
);
}

#else

SHMEM_SCOPE void
shmem_sync_all(void)
{
int c;
for (c = 0; c < __shmem.n_pes_log2; c++)
{
volatile long* lock = (volatile long*)(__shmem.barrier_sync + c);
*(__shmem.barrier_psync[c]) = 1;
while (*lock == SHMEM_SYNC_VALUE);
*lock = SHMEM_SYNC_VALUE;
}
}

#endif

#ifdef __cplusplus
}
#endif
2 changes: 1 addition & 1 deletion src/shmemx_memcpy_nbi.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ shmemx_memcpy_nbi(void *dst, const void *src, size_t nbytes)
__shmem.dma_desc.outer_stride = stride,
__shmem.dma_desc.config = config;
__shmem.dma_used = 1;
#if 0 // XXX dual channel DMA may be unstable
#if 1 // XXX dual channel DMA may be unstable
unsigned int dmachannel;
__asm__ __volatile__ (
"mov r0, #15 \n"
Expand Down

0 comments on commit 5f74be7

Please sign in to comment.