Skip to content

Commit 1d6f40e

Browse files
authored
Merge pull request #488 from nspark/feature/scan
Add inclusive and exclusive scan (prefix sum) operations
2 parents a41b519 + 8d65b65 commit 1d6f40e

File tree

3 files changed

+136
-0
lines changed

3 files changed

+136
-0
lines changed

content/shmem_scan.tex

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
\apisummary {
2+
Performs inclusive or exclusive prefix sum operations
3+
}
4+
5+
\begin{apidefinition}
6+
7+
%% C11
8+
\begin{C11synopsis}
9+
int @\FuncDecl{shmem\_sum\_inscan}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce);
10+
int @\FuncDecl{shmem\_sum\_exscan}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce);
11+
\end{C11synopsis}
12+
where \TYPE{} is one of the integer, real, or complex types supported
13+
for the SUM operation as specified by Table \ref{teamreducetypes}.
14+
15+
%% C/C++
16+
\begin{Csynopsis}
17+
int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_sum\_inscan}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce);
18+
int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_sum\_exscan}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce);
19+
\end{Csynopsis}
20+
where \TYPE{} is one of the integer, real, or complex types supported
21+
for the SUM operation and has a corresponding \TYPENAME{} as specified
22+
by Table \ref{teamreducetypes}.
23+
24+
\begin{apiarguments}
25+
\apiargument{IN}{team}{
26+
The team over which to perform the operation.
27+
}
28+
\apiargument{OUT}{dest}{
29+
Symmetric address of an array, of length \VAR{nreduce} elements,
30+
to receive the result of the scan routines. The type of
31+
\dest{} should match that implied in the SYNOPSIS section.
32+
}
33+
\apiargument{IN}{source}{
34+
Symmetric address of an array, of length \VAR{nreduce} elements,
35+
that contains one element for each separate scan routine.
36+
The type of \source{} should match that implied in the SYNOPSIS
37+
section.
38+
}
39+
\apiargument{IN}{nreduce}{
40+
The number of elements in the \dest{} and \source{} arrays.
41+
}
42+
\end{apiarguments}
43+
44+
\apidescription{
45+
46+
The \FUNC{shmem\_sum\_inscan} and \FUNC{shmem\_sum\_exscan} routines
47+
are collective routines over an \openshmem team that compute one or
48+
more scan (or prefix sum) operations across symmetric arrays on
49+
multiple \acp{PE}. The scan operations are performed with the SUM
50+
operator.
51+
52+
The \VAR{nreduce} argument determines the number of separate scan
53+
operations to perform. The \source{} array on all \acp{PE}
54+
participating in the operation provides one element for each scan.
55+
The results of the scan operations are placed in the \dest{} array
56+
on all \acp{PE} participating in the scan.
57+
58+
The \FUNC{shmem\_sum\_inscan} routine performs an inclusive scan
59+
operation, while the \FUNC{shmem\_sum\_exscan} routine performs an
60+
exclusive scan operation.
61+
62+
For \FUNC{shmem\_sum\_inscan}, the value of the $j$-th element in
63+
the \VAR{dest} array on \ac{PE}~$i$ is defined as:
64+
\begin{equation*}
65+
\textrm{dest}_{i,j} = \displaystyle\sum_{k=0}^{i} \textrm{source}_{k,j}
66+
\end{equation*}
67+
68+
For \FUNC{shmem\_sum\_exscan}, the value of the $j$-th element in
69+
the \VAR{dest} array on \ac{PE}~$i$ is defined as:
70+
\begin{equation*}
71+
\textrm{dest}_{i,j} =
72+
\begin{cases}
73+
\displaystyle\sum_{k=0}^{i-1} \textrm{source}_{k,j}, & \text{if} \; i \neq 0 \\
74+
0, & \text{if} \; i = 0
75+
\end{cases}
76+
\end{equation*}
77+
78+
The \source{} and \dest{} arguments must either be the same
79+
symmetric address, or two different symmetric addresses
80+
corresponding to buffers that do not overlap in memory. That is,
81+
they must be completely overlapping or completely disjoint.
82+
83+
Team-based scan routines operate over all \acp{PE} in the provided
84+
team argument. All \acp{PE} in the provided team must participate in
85+
the scan operation. If \VAR{team} compares equal to
86+
\LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the
87+
behavior is undefined.
88+
89+
Before any \ac{PE} calls a scan routine, the \dest{} array on all
90+
\acp{PE} participating in the operation must be ready to accept the
91+
results of the operation. Otherwise, the behavior is undefined.
92+
93+
Upon return from a scan routine, the following are true for the
94+
local \ac{PE}: the \dest{} array is updated, and the \source{} array
95+
may be safely reused.
96+
97+
When the \Cstd translation environment does not support complex
98+
types, an \openshmem implementation is not required to provide
99+
support for these complex-typed interfaces.
100+
}
101+
102+
\apireturnvalues{
103+
Zero on successful local completion. Nonzero otherwise.
104+
}
105+
106+
\begin{apiexamples}
107+
108+
\apicexample{
109+
In the following \Cstd[11] example, the \FUNC{collect\_at}
110+
function gathers a variable amount of data from each \ac{PE} and
111+
concatenates it, in order, at the target \ac{PE} \VAR{who}. Note
112+
that this routine is behaviorally similar to
113+
\FUNC{shmem\_collect}, except that this routine only gathers the
114+
data to a single \ac{PE}.
115+
}
116+
{./example_code/shmem_scan_example.c}
117+
{}
118+
119+
\end{apiexamples}
120+
121+
\end{apidefinition}

example_code/shmem_scan_example.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#include <shmem.h>
2+
3+
int collect_at(shmem_team_t team, void *dest, const void *source, size_t nbytes, int who) {
4+
static size_t sym_nbytes;
5+
sym_nbytes = nbytes;
6+
shmem_team_sync(team);
7+
int rc = shmem_sum_exscan(team, &sym_nbytes, &sym_nbytes, 1);
8+
shmem_putmem((void *)((uintptr_t)dest + sym_nbytes), source, nbytes, who);
9+
shmem_quiet();
10+
shmem_team_sync(team);
11+
return rc;
12+
}

main_spec.tex

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,9 @@ \subsubsection{\textbf{SHMEM\_COLLECT, SHMEM\_FCOLLECT}}\label{subsec:shmem_coll
424424
\subsubsection{\textbf{SHMEM\_REDUCTIONS}}\label{subsec:shmem_reductions}
425425
\input{content/shmem_reductions.tex}
426426

427+
\subsubsection{\textbf{SHMEM\_SCAN}}\label{subsec:shmem_scan}
428+
\input{content/shmem_scan.tex}
429+
427430

428431

429432

0 commit comments

Comments
 (0)