Skip to content

Commit 0242152

Browse files
committed
gufi_query sbatch wrapper
changed gufi_query_descend to print sbatch job ids at the end for easy querying removed gufi_distributed main test updates
1 parent 0ffc1d4 commit 0242152

11 files changed

+228
-91
lines changed

scripts/CMakeLists.txt

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -115,11 +115,12 @@ install(PROGRAMS "${CMAKE_CURRENT_BINARY_DIR}/querydbs" DESTINATION "${BIN}" COM
115115
# python files for doing distributed operations (server only)
116116
set(DISTRIBUTED
117117
gufi_dir2index.sbatch # sbatch wrapper for gufi_dir2index_distributed
118-
gufi_dir2index_distributed # executable only
118+
gufi_dir2index_distributed # executable
119119
gufi_dir2trace.sbatch # sbatch wrapper for gufi_dir2trace_distributed
120-
gufi_dir2trace_distributed # executable only
121-
gufi_distributed.py # also executable
122-
gufi_query_distributed # executable only
120+
gufi_dir2trace_distributed # executable
121+
gufi_distributed.py # library (installed into BIN)
122+
gufi_query.sbatch # sbatch wrapper for gufi_query_distributed
123+
gufi_query_distributed # executable
123124
)
124125

125126
foreach(DIST ${DISTRIBUTED})

scripts/gufi_dir2trace_distributed

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,13 +76,13 @@ def schedule_subtree(args, idx, start, end):
7676
'-y', str(args.level),
7777
'-D', start, end,
7878
args.input_dir,
79-
'{}.{}'.format(args.output_prefix, idx),
79+
'{0}.{1}'.format(args.output_prefix, idx),
8080
]
8181

8282
return gufi_distributed.run_slurm(cmd, args.dry_run)
8383

8484
def schedule_top(args, jobids):
85-
print(" Index upper directories up to and including level {}".format(args.level - 1))
85+
print(" Index upper directories up to and including level {0}".format(args.level - 1))
8686

8787
cmd = [args.sbatch, '--nodes=1']
8888
cmd += gufi_distributed.depend_on_slurm_jobids(jobids)
@@ -126,7 +126,7 @@ def main():
126126
gufi_distributed.distribute_work(args.input_dir, args.level, args.nodes,
127127
lambda idx, start, end: schedule_subtree(args, idx, start, end),
128128
lambda jobids: schedule_top(args, jobids))
129-
print('Index can now be created from "{}.*"'.format(args.output_prefix))
129+
print('Index can now be created from "{0}.*"'.format(args.output_prefix))
130130

131131
if __name__ == '__main__':
132132
main()

scripts/gufi_distributed.py

Lines changed: 8 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -112,10 +112,10 @@ def dir_plural(count):
112112
# print debug messages
113113
# run function to schedule jobs if it exists
114114
def schedule_subtrees(dir_count, splits, group_size, groups, schedule_subtree):
115-
print('Splitting {} {} into {} chunks of max size {}'.format(dir_count,
116-
dir_plural(dir_count),
117-
splits,
118-
group_size))
115+
print('Splitting {0} {1} into {2} chunks of max size {3}'.format(dir_count,
116+
dir_plural(dir_count),
117+
splits,
118+
group_size))
119119

120120
jobids = []
121121
for i, group in enumerate(groups):
@@ -124,8 +124,8 @@ def schedule_subtrees(dir_count, splits, group_size, groups, schedule_subtree):
124124
if count == 0:
125125
break
126126

127-
print(' Range {}: {} {}'.format(i, count, dir_plural(count)))
128-
print(' {} {}'.format(group[0], group[-1]))
127+
print(' Range {0}: {1} {2}'.format(i, count, dir_plural(count)))
128+
print(' {0} {1}'.format(group[0], group[-1]))
129129

130130
if schedule_subtree is not None:
131131
jobid = schedule_subtree(i, os.path.basename(group[0]), os.path.basename(group[-1]))
@@ -151,7 +151,8 @@ def distribute_work(root, level, nodes, schedule_subtree_func, schedule_top_func
151151
dirs = dirs_at_level(root, level)
152152
group_size, groups = group_dirs(dirs, nodes)
153153
jobids = schedule_subtrees(len(dirs), nodes, group_size, groups, schedule_subtree_func)
154-
schedule_top(schedule_top_func, jobids)
154+
jobids += [schedule_top(schedule_top_func, jobids).decode()]
155+
return jobids
155156

156157
def dir_arg(path):
157158
if not os.path.isdir(path):
@@ -177,17 +178,3 @@ def parse_args(name, desc):
177178
help='Number nodes to split work across')
178179

179180
return parser # allow for others to use this parser
180-
181-
def main():
182-
parser = parse_args('gufi_distributed.py',
183-
'Library for distributing GUFI work across nodes')
184-
parser.add_argument('tree',
185-
type=dir_arg,
186-
help='Tree to walk')
187-
args = parser.parse_args()
188-
189-
distribute_work(args.tree, args.level, args.nodes, None,
190-
lambda _jobids: print(" Processing upper directories up to and including level {}".format(args.level - 1)))
191-
192-
if __name__ == '__main__':
193-
main()

scripts/gufi_query.sbatch

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#!/usr/bin/env bash
2+
# This file is part of GUFI, which is part of MarFS, which is released
3+
# under the BSD license.
4+
#
5+
#
6+
# Copyright (c) 2017, Los Alamos National Security (LANS), LLC
7+
# All rights reserved.
8+
#
9+
# Redistribution and use in source and binary forms, with or without modification,
10+
# are permitted provided that the following conditions are met:
11+
#
12+
# 1. Redistributions of source code must retain the above copyright notice, this
13+
# list of conditions and the following disclaimer.
14+
#
15+
# 2. Redistributions in binary form must reproduce the above copyright notice,
16+
# this list of conditions and the following disclaimer in the documentation and/or
17+
# other materials provided with the distribution.
18+
#
19+
# 3. Neither the name of the copyright holder nor the names of its contributors
20+
# may be used to endorse or promote products derived from this software without
21+
# specific prior written permission.
22+
#
23+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
24+
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
25+
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26+
# IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
27+
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28+
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29+
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30+
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
31+
# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
32+
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33+
#
34+
#
35+
# From Los Alamos National Security, LLC:
36+
# LA-CC-15-039
37+
#
38+
# Copyright (c) 2017, Los Alamos National Security, LLC All rights reserved.
39+
# Copyright 2017. Los Alamos National Security, LLC. This software was produced
40+
# under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National
41+
# Laboratory (LANL), which is operated by Los Alamos National Security, LLC for
42+
# the U.S. Department of Energy. The U.S. Government has rights to use,
43+
# reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS
44+
# ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
45+
# ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is
46+
# modified to produce derivative works, such modified software should be
47+
# clearly marked, so as not to confuse it with the version available from
48+
# LANL.
49+
#
50+
# THIS SOFTWARE IS PROVIDED BY LOS ALAMOS NATIONAL SECURITY, LLC AND CONTRIBUTORS
51+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
52+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53+
# ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL SECURITY, LLC OR
54+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
55+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
56+
# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
57+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
58+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
59+
# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
60+
# OF SUCH DAMAGE.
61+
62+
63+
64+
set -e
65+
66+
gufi_query "$@"

scripts/gufi_query_distributed

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -98,28 +98,26 @@ def add_args(args):
9898
return cmd
9999

100100
# run gufi_query via sbatch
101-
def schedule_work(args, idx, start, end):
101+
def schedule_work(args, _idx, start, end):
102102
cmd = [
103103
args.sbatch, '--nodes=1',
104104
args.gufi_query,
105105
'-n', str(args.threads),
106106
'-y', str(args.level),
107107
'-D', start, end,
108-
'-o', '{}.{}'.format(args.output_prefix, idx),
109108
] + add_args(args)
110109

111110
return gufi_distributed.run_slurm(cmd, args.dry_run)
112111

113112
def schedule_top(args, jobids):
114-
print(" Query upper directories up to and including level {}".format(args.level - 1))
113+
print(" Query upper directories up to and including level {0}".format(args.level - 1))
115114

116115
cmd = [args.sbatch, '--nodes=1']
117116
cmd += gufi_distributed.depend_on_slurm_jobids(jobids)
118117
cmd += [
119118
args.gufi_query,
120119
'-n', str(args.threads),
121120
'-z', str(args.level - 1),
122-
'-o', args.output_prefix,
123121
] + add_args(args)
124122

125123
return gufi_distributed.run_slurm(cmd, args.dry_run)
@@ -131,7 +129,7 @@ def parse_args():
131129
parser.add_argument('--dry-run', action='store_true')
132130
parser.add_argument('--gufi_query', metavar='path',
133131
type=str,
134-
default='gufi_query')
132+
default='gufi_query.sbatch')
135133
parser.add_argument('--threads', '-n', metavar='n',
136134
type=gufi_common.get_positive,
137135
default=1)
@@ -157,10 +155,13 @@ def parse_args():
157155
def main():
158156
args = parse_args()
159157

160-
gufi_distributed.distribute_work(args.GUFI_index, args.level, args.nodes,
161-
lambda idx, start, end: schedule_work(args, idx, start, end),
162-
lambda jobids: schedule_top(args, jobids))
163-
print('Run "cat {}.*" to get complete output'.format(args.output_prefix))
158+
jobids = gufi_distributed.distribute_work(args.GUFI_index, args.level, args.nodes,
159+
lambda idx, start, end: schedule_work(args, idx, start, end),
160+
lambda jobids: schedule_top(args, jobids))
161+
162+
print('cat the following slurm job output files to get complete results:')
163+
for jobid in jobids:
164+
print(' {0}'.format(jobid))
164165

165166
if __name__ == '__main__':
166167
main()

test/regression/CMakeLists.txt

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ set(CORE
6565
generatetree.sh
6666
hexlify.py
6767
os_specific.sh
68+
sbatch.no-output
69+
sbatch.output
6870
setup.sh
6971
)
7072

@@ -192,9 +194,6 @@ else()
192194
message(STATUS "SQLAlchemy not found.")
193195
endif()
194196

195-
# for testing work distribution code
196-
configure_file(sbatch.fake sbatch.fake COPYONLY)
197-
198197
# .sh and .expected
199198
set(SH_AND_EXPECTED
200199
${BINARIES}

test/regression/gufi_distributed.expected

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,9 @@ Splitting 4 directories into 5 chunks of max size 1
2727
Range 3: 1 directory
2828
prefix2/unusual#? directory , prefix2/unusual#? directory ,
2929
Query upper directories up to and including level 0
30-
Run "cat results.*" to get complete output
31-
30+
cat the following slurm job output files to get complete results:
3231
# Gather Results
33-
$ cat results.0.0 results.1.0 results.2.0 results.3.0 results.0 results.0.1 results.1.1 results.2.1 results.3.1 results.1
32+
$ cat output files
3433
prefix2
3534
prefix2/.hidden
3635
prefix2/1KB
@@ -53,8 +52,7 @@ prefix2/unusual#? directory ,
5352
prefix2/unusual#? directory ,/unusual, name?#
5453

5554
# Diff original index results against the combined results
56-
$ diff <(gufi_query -S "SELECT rpath(sname, sroll) FROM vrsummary;" -E "SELECT rpath(sname, sroll) || '/' || name FROM vrpentries;" "prefix" | sort) <(cat results.0.0 results.1.0 results.2.0 results.3.0 results.0 results.0.1 results.1.1 results.2.1 results.3.1 results.1 | sort | sed 's/search2\/prefix/search\/prefix/g')
57-
55+
$ diff <(gufi_query -S "SELECT rpath(sname, sroll) FROM vrsummary;" -E "SELECT rpath(sname, sroll) || '/' || name FROM vrpentries;" "prefix" | sort) <(echo results | sed 's/search2\/prefix/search\/prefix/g')
5856
# Diff original index results against querying the new index from a single node
5957
$ diff <(gufi_query -S "SELECT rpath(sname, sroll) FROM vrsummary;" -E "SELECT rpath(sname, sroll) || '/' || name FROM vrpentries;" "prefix" | sort) <(gufi_query -S "SELECT rpath(sname, sroll) FROM vrsummary;" -E "SELECT rpath(sname, sroll) || '/' || name FROM vrpentries;" "prefix2" | sort | sed 's/search2\/prefix/search\/prefix/g')
6058

@@ -98,8 +96,3 @@ prefix/unusual#? directory ,/unusual, name?#
9896
# Diff original index results against the trace files
9997
$ diff <(gufi_query -d " " -S "SELECT rpath(sname, sroll) FROM vrsummary;" -E "SELECT rpath(sname, sroll) || '/' || name FROM vrpentries;" "prefix" | sort | sed 's/search\///g') <(cat traces.0.0 traces.1.0 traces.2.0 traces.3.0 traces.0 | awk -F'|' '{ print $1 }' | sort)
10098

101-
# Non-directory input
102-
$ gufi_distributed.py 1 1 gufi_distributed.py
103-
usage: gufi_distributed.py [-h] [--version] [--sbatch exec] level nodes tree
104-
gufi_distributed.py: error: argument tree: Bad directory: gufi_distributed.py
105-

test/regression/gufi_distributed.sh.in

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,6 @@ TRACES="traces"
7474
LEVEL=1
7575
RANKS=5
7676

77-
QUERIES="-S \"SELECT rpath(sname, sroll) FROM vrsummary;\" -E \"SELECT rpath(sname, sroll) || '/' || name FROM vrpentries;\""
78-
7977
OUTPUT="gufi_distributed.out"
8078

8179
files() {
@@ -102,20 +100,14 @@ files() {
102100
echo "${list[@]}"
103101
}
104102

105-
result_files() {
106-
files RESULTS
107-
}
108-
109103
trace_files() {
110104
files TRACES
111105
}
112106

113107
cleanup() {
114-
# shellcheck disable=SC2207
115-
results=($(result_files))
116108
# shellcheck disable=SC2207
117109
traces=($(trace_files))
118-
rm -rf "${SEARCH2}" "${results[@]}" "${traces[@]}"
110+
rm -rf "${SEARCH2}" "${traces[@]}"
119111
}
120112

121113
cleanup_exit() {
@@ -127,34 +119,46 @@ trap cleanup_exit EXIT
127119

128120
cleanup
129121

130-
run_no_sort "${GUFI_DISTRIBUTED} --help"
131122
run_no_sort "${GUFI_DIR2INDEX_DISTRIBUTED} --help"
132123
run_no_sort "${GUFI_DIR2TRACE_DISTRIBUTED} --help"
133124
run_no_sort "${GUFI_QUERY_DISTRIBUTED} --help"
134125

135126
(
136127
echo "# Index Source Tree"
137-
run_no_sort "${GUFI_DIR2INDEX_DISTRIBUTED} --sbatch \"${SBATCH_FAKE}\" --gufi_dir2index \"${GUFI_DIR2INDEX}\" ${LEVEL} ${RANKS} \"${SRCDIR}\" \"${SEARCH2}\""
128+
run_no_sort "${GUFI_DIR2INDEX_DISTRIBUTED} --sbatch \"${SBATCH_NO_OUTPUT}\" --gufi_dir2index \"${GUFI_DIR2INDEX}\" ${LEVEL} ${RANKS} \"${SRCDIR}\" \"${SEARCH2}\""
138129

139130
echo "# Query Index"
140-
run_no_sort "${GUFI_QUERY_DISTRIBUTED} --sbatch \"${SBATCH_FAKE}\" --gufi_query \"${GUFI_QUERY}\" --threads ${THREADS} --output-prefix \"${RESULTS}\" ${LEVEL} ${RANKS} \"${INDEXROOT2}\" ${QUERIES}"
131+
gqd_output=$(run_no_sort "${GUFI_QUERY_DISTRIBUTED} --sbatch \"${SBATCH_OUTPUT}\" --gufi_query \"${GUFI_QUERY}\" --threads ${THREADS} --output-prefix \"${RESULTS}\" ${LEVEL} ${RANKS} \"${INDEXROOT2}\" -S \"SELECT rpath(sname, sroll) FROM vrsummary;\" -E \"SELECT rpath(sname, sroll) || '/' || name FROM vrpentries;\"")
132+
echo "${gqd_output}" | head -n -5
133+
134+
mapfile -t jobids < <(echo "${gqd_output}" | @SED@ -n '/cat the following/,$p' | tail -n +2 | @SED@ 's/ //g')
141135

142136
echo "# Gather Results"
143-
results=("$(result_files)")
144-
# shellcheck disable=SC2145
145-
run_sort "cat ${results[@]}"
137+
filenames=()
138+
for jobid in "${jobids[@]}"
139+
do
140+
filenames+=("slurm.fake-${jobid}.out")
141+
done
142+
results=$(cat "${filenames[@]}" | sort)
143+
144+
echo "$ cat output files"
145+
echo "${results}"
146+
echo
146147

147148
echo "# Diff original index results against the combined results"
148149
# shellcheck disable=SC2153
149-
run_sort "@DIFF@ <(${GUFI_QUERY} ${QUERIES} \"${INDEXROOT}\" | sort) <(cat $(result_files) | sort | sed 's/${INDEXROOT2//\//\\/}/${INDEXROOT//\//\\/}/g')"
150+
echo "$ @DIFF@ <(${GUFI_QUERY} -S \"SELECT rpath(sname, sroll) FROM vrsummary;\" -E \"SELECT rpath(sname, sroll) || '/' || name FROM vrpentries;\" \"${INDEXROOT}\" | sort) <(echo results | sed 's/${INDEXROOT2//\//\\/}/${INDEXROOT//\//\\/}/g')" | replace
151+
# shellcheck disable=SC2001
152+
@DIFF@ <("${GUFI_QUERY}" -S "SELECT rpath(sname, sroll) FROM vrsummary;" -E "SELECT rpath(sname, sroll) || '/' || name FROM vrpentries;" "${INDEXROOT}" | sort) <(echo "${results}" | sed "s/${INDEXROOT2//\//\\/}/${INDEXROOT//\//\\/}/g")
150153

151154
echo "# Diff original index results against querying the new index from a single node"
152-
run_sort "@DIFF@ <(${GUFI_QUERY} ${QUERIES} \"${INDEXROOT}\" | sort) <(${GUFI_QUERY} ${QUERIES} \"${INDEXROOT2}\" | sort | sed 's/${INDEXROOT2//\//\\/}/${INDEXROOT//\//\\/}/g')"
155+
run_no_sort "@DIFF@ <(${GUFI_QUERY} -S \"SELECT rpath(sname, sroll) FROM vrsummary;\" -E \"SELECT rpath(sname, sroll) || '/' || name FROM vrpentries;\" \"${INDEXROOT}\" | sort) <(${GUFI_QUERY} -S \"SELECT rpath(sname, sroll) FROM vrsummary;\" -E \"SELECT rpath(sname, sroll) || '/' || name FROM vrpentries;\" \"${INDEXROOT2}\" | sort | sed 's/${INDEXROOT2//\//\\/}/${INDEXROOT//\//\\/}/g')"
153156

154-
cleanup
157+
# cleanup
158+
rm -rf "${jobids[@]}"
155159

156160
echo "# Convert source tree to trace files"
157-
run_no_sort "${GUFI_DIR2TRACE_DISTRIBUTED} --sbatch \"${SBATCH_FAKE}\" --gufi_dir2trace \"${GUFI_DIR2TRACE}\" -d \"${DELIM}\" ${LEVEL} ${RANKS} \"${SRCDIR}\" \"${TRACES}\""
161+
run_no_sort "${GUFI_DIR2TRACE_DISTRIBUTED} --sbatch \"${SBATCH_NO_OUTPUT}\" --gufi_dir2trace \"${GUFI_DIR2TRACE}\" -d \"${DELIM}\" ${LEVEL} ${RANKS} \"${SRCDIR}\" \"${TRACES}\""
158162

159163
traces=("$(trace_files)")
160164

@@ -164,11 +168,7 @@ run_sort "cat ${traces[@]} | @AWK@ -F'${DELIM}' '{ print \$1 }'"
164168

165169
echo "# Diff original index results against the trace files"
166170
# shellcheck disable=SC2145,SC2153
167-
run_no_sort "@DIFF@ <(${GUFI_QUERY} -d \" \" ${QUERIES} \"${INDEXROOT}\" | sort | @SED@ 's/${SEARCH//\//\\/}\\///g') <(cat ${traces[@]} | @AWK@ -F'${DELIM}' '{ print \$1 }' | sort)"
168-
169-
echo "# Non-directory input"
170-
run_no_sort "${GUFI_DISTRIBUTED} 1 1 ${GUFI_DISTRIBUTED}"
171-
171+
run_no_sort "@DIFF@ <(${GUFI_QUERY} -d \" \" -S \"SELECT rpath(sname, sroll) FROM vrsummary;\" -E \"SELECT rpath(sname, sroll) || '/' || name FROM vrpentries;\" \"${INDEXROOT}\" | sort | @SED@ 's/${SEARCH//\//\\/}\\///g') <(cat ${traces[@]} | @AWK@ -F'${DELIM}' '{ print \$1 }' | sort)"
172172
) 2>&1 | @SED@ "s/${INDEXROOT2//\//\\/}/prefix2/g" | tee "${OUTPUT}"
173173

174174
@DIFF@ @CMAKE_CURRENT_BINARY_DIR@/gufi_distributed.expected "${OUTPUT}"

0 commit comments

Comments
 (0)