-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathshmem_init.c
190 lines (174 loc) · 6.24 KB
/
shmem_init.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
/*
* Copyright (c) 2016-2018 U.S. Army Research laboratory. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* This software was developed by Brown Deer Technology, LLC. with Copyright
* assigned to the US Army Research laboratory as required by contract.
*/
#include "shmem.h"
#include "internals.h"
#ifdef __cplusplus
extern "C" {
#endif
shmem_internals_t __shmem = { 0 };
const shmem_ctx_t SHMEM_CTX_DEFAULT = __CTX_DEFAULT;
#ifdef SHMEM_USE_WAND_BARRIER
SHMEM_SCOPE void __attribute__((aligned(8)))
__shmem_wand_isr (void)
{
__asm__ __volatile__ (
"gid \n" // disable further interrupts
"str r0, [sp, -0x1] \n" // push r0 on the stack
"str r1, [sp, -0x2] \n" // push r0 on the stack
"movfs r0, STATUS \n" // read STATUS register
"mov r1, 0xFFF7 \n" // low bits of NOT WAND bit
"movt r1, 0xFFFF \n" // and high bits
"and r0, r0, r1 \n" // clearing WAND bit
"movts STATUS, r0 \n" // setting STATUS register
"ldr r0, [sp, -0x1] \n" // pop r0 from the stack
"ldr r1, [sp, -0x2] \n" // pop r1 from the stack
"gie \n" // enable interrupts
"rti \n" // PC = IRET, which returns to after WAND instr
: : : "cc"
);
__builtin_unreachable();
}
SHMEM_SCOPE void SHMEM_INLINE
__shmem_wand_barrier_init(void)
{
unsigned int *ivt = (unsigned int*)0x20;
*ivt = ((((unsigned int)__shmem_wand_isr - (unsigned int)ivt) >> 1) << 8) | 0xe8; // e8 = B<*> Branch Opcode
__asm__ __volatile__ (
"gie \n" // enables interrupts in ILAT register
"mov r1, 0xFEFF \n" // low bits of NOT IRQ mask
"movt r1, 0xFFFF \n" // and top bits
"movfs r0, IMASK \n" // read IMASK register
"and r0, r0, r1 \n" // clearing WAND bit
"movts IMASK, r0 \n" // setting IMASK register
: : : "r0", "r1", "cc"
);
}
#else
SHMEM_SCOPE void SHMEM_INLINE
__shmem_dissemination_barrier_init(void)
{
int c, r;
for (c = 0, r = 1; r < __shmem.n_pes; c++, r <<= 1)
{
int to = __shmem.my_pe + r;
if (to >= __shmem.n_pes) to -= __shmem.n_pes;
long* lock = (long*)(__shmem.barrier_sync + c);
long* remote_lock = (long*)shmem_ptr((void*)lock, to);
__shmem.barrier_psync[c] = remote_lock;
}
}
#endif
#ifdef SHMEM_USE_IPI_GET
shmem_ipi_args_t shmem_ipi_args = {
.lock = 0,
.pmemcpy = (volatile void (*)(void*,const void*,size_t))shmemx_memcpy8,
.source = 0,
.dest = 0,
.nelems = 0,
.pcomplete = 0,
.complete = 0
};
SHMEM_SCOPE void __attribute__((interrupt ("swi")))
__shmem_user_isr(void)
{
void (*pmemcpy)(void*,const void*,size_t) = (void (*)(void*,const void*,size_t))shmem_ipi_args.pmemcpy;
pmemcpy((void*)shmem_ipi_args.dest, (const void*)shmem_ipi_args.source, (size_t)shmem_ipi_args.nelems);
*(shmem_ipi_args.pcomplete) = 1; // inform remote PE
shmem_ipi_args.lock = 0; // free lock
}
SHMEM_SCOPE void SHMEM_INLINE
__shmem_ipi_get_init (void)
{
unsigned int *ivt = (unsigned int*)0x24;
*ivt = ((((unsigned int)__shmem_user_isr - (unsigned int)ivt) >> 1) << 8) | 0xe8; // e8 = B<*> Branch Opcode
__asm__ __volatile__ (
"gie \n" // enable interrupts
"mov r1, 0xFDFF \n" // low bits of NOT USER_INTERRUPT mask
"movt r1, 0xFFFF \n" // and top bits
"movfs r0, IMASK \n" // read IMASK register
"and r0, r0, r1 \n" // clearing user interrupt mask bit
"movts IMASK, r0 \n" // setting IMASK register
: : : "r0", "r1", "cc"
);
}
#endif
SHMEM_SCOPE void
shmem_init(void)
{
__asm__ __volatile__ (
"movfs %[id], COREID \n" // storing COREID register value
: [id] "=r" (__shmem.coreid)
);
#if defined(__coprthr_device__)
__shmem.n_pes = coprthr_get_num_threads();
__shmem.my_pe = coprthr_get_thread_id();
#else
__shmem.n_pes = e_group_config.group_rows * e_group_config.group_cols;
unsigned int coreid = __shmem.coreid - e_group_config.group_id;
unsigned int row = (coreid >> 6) & 0x3f;
unsigned int col = (coreid) & 0x3f;
__shmem.my_pe = row*e_group_config.group_rows + col;
#endif
// log2_ceil of n_pes precalculated once
unsigned int x = __shmem.n_pes - 1;
while (x > 0) {
__shmem.n_pes_log2++;
x >>= 1;
}
__shmem.dma_start = ((int)(&__shmem.dma_desc) << 16) | 0x8;
__shmem.cdst0 = __shmem.cdst1 = 0;
#ifdef SHMEM_USE_WAND_BARRIER
__shmem_wand_barrier_init();
#else
__shmem_dissemination_barrier_init();
#endif
#ifdef SHMEM_USE_IPI_GET
__shmem_ipi_get_init();
#endif
__shmem.lock_high_bits = (unsigned int)shmem_ptr(NULL, 0); // using PE 0 for all global locks
#if !defined(__coprthr_device__)
extern char end;
__shmem.free_mem = (intptr_t)&end; // This should already be double-word aligned
#endif
__shmem.local_mem_base = (intptr_t)shmemx_sbrk(0);
int stride = SHMEM_HEAP_START - (int)__shmem.local_mem_base;
if (stride > 0) shmemx_sbrk(stride); // advance to SHMEM_HEAP_START address
#if defined(__coprthr_device__)
shmem_sync_all();
#else // linear barrier
if (__shmem.n_pes == 1) return;
if (!__shmem.my_pe) *__shmem.barrier_psync[0] = 1;
while(*__shmem.barrier_sync == SHMEM_SYNC_VALUE);
if (__shmem.my_pe) *__shmem.barrier_psync[0] = 1;
*__shmem.barrier_sync = SHMEM_SYNC_VALUE;
#endif
}
#ifdef __cplusplus
}
#endif