Skip to content

Commit 7ef7f01

Browse files
authored
Merge branch 'dev' into sim-name
2 parents 953b8a8 + 4a90ea7 commit 7ef7f01

File tree

14 files changed

+264
-183
lines changed

14 files changed

+264
-183
lines changed

.github/workflows/long-tests.yml

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ name: Long Tests
66
on:
77
# Triggers the workflow on push or pull request events but only for the mydev branch
88
push:
9-
# pull_request:
9+
branches-ignore:
10+
- "gh-readonly-queue**"
1011
merge_group:
1112

1213
# Allows you to run this workflow manually from the Actions tab
@@ -74,7 +75,14 @@ jobs:
7475
./util/plotting/plot-correlation.py -c ./statistics-archive/ubench/ampere-ubench-sass.csv -H ./hw_run/AMPERE-RTX3070/11.2/ | tee ampere-ubench-correl.txt
7576
ssh ghci@tgrogers-pc01 mkdir -p /home/ghci/accel-sim/correl/git_${GITHUB_REF}"_"$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT/
7677
rsync --delete -r ./util/plotting/correl-html/ ghci@tgrogers-pc01:/home/ghci/accel-sim/correl/git_${GITHUB_REF}"_"$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT/
77-
echo "Correlation Report at: https://tgrogers-pc01.ecn.purdue.edu/github-ci/accel-sim/correl/git_${GITHUB_REF}"_"$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT/"
78+
BODY="Github CI - Build $GITHUB_REF SUCCESS.
79+
Action link: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
80+
Branch/PR Name: $GITHUB_REF_NAME
81+
Correlation Report at: https://tgrogers-pc01.ecn.purdue.edu/github-ci/accel-sim/correl/git_${GITHUB_REF}"_"$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT/."
82+
SUBJECT="Github CI - Build $GITHUB_REF SUCCESS"
83+
TO="${{ secrets.GROUP_EMAIL }}"
84+
echo "$BODY" | mail -s "$SUBJECT" "$TO"
85+
echo "Correlation Report at: https://tgrogers-pc01.ecn.purdue.edu/github-ci/accel-sim/correl/git_${GITHUB_REF}"_"$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT/."
7886
PTX-Simulation:
7987
if: github.repository == 'accel-sim/accel-sim-framework'
8088
runs-on: tgrogers-raid
@@ -102,7 +110,7 @@ jobs:
102110
source ./gpu-simulator/setup_environment.sh
103111
104112
rm -rf ./gpu-app-collection
105-
git clone git@github.com:accel-sim/gpu-app-collection.git
113+
git clone -b dev git@github.com:accel-sim/gpu-app-collection.git
106114
source ./gpu-app-collection/src/setup_environment
107115
srun -c20 make rodinia_2.0-ft GPU_Microbenchmark -j20 -C ./gpu-app-collection/src
108116
./gpu-app-collection/get_regression_data.sh

.github/workflows/short-tests.yml

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,15 @@ name: Short Tests
66
on:
77
# Triggers the workflow on push or pull request events but only for the mydev branch
88
push:
9+
branches-ignore:
10+
- "gh-readonly-queue**"
911
pull_request:
1012

1113
# Allows you to run this workflow manually from the Actions tab
1214
workflow_dispatch:
1315

1416
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
15-
jobs:
17+
jobs:
1618
SASS-Simulation:
1719
runs-on: ubuntu-latest
1820
container:
@@ -27,25 +29,48 @@ jobs:
2729
run: /bin/bash $GITHUB_WORKSPACE/short-tests.sh
2830
PTX-Simulation:
2931
runs-on: ubuntu-latest
30-
container:
31-
image: tgrogers/accel-sim_regress:Ubuntu-22.04-cuda-11.7
3232
# env:
3333

3434
# Steps represent a sequence of tasks that will be executed as part of the job
3535
steps:
3636
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
3737
- uses: actions/checkout@v4
3838
- name: Run Simulation
39-
run: echo "skipped SASS-Simulation. Will perform in merge queue"
39+
run: echo "skipped PTX-Simulation. Will perform in merge queue"
4040
Tracer-Tool:
4141
runs-on: ubuntu-latest
42-
container:
43-
image: tgrogers/accel-sim_regress:Ubuntu-22.04-cuda-11.7
4442
# env:
4543

4644
# Steps represent a sequence of tasks that will be executed as part of the job
4745
steps:
4846
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
4947
- uses: actions/checkout@v4
5048
- name: Run Simulation
51-
run: echo "skipped SASS-Simulation. Will perform in merge queue"
49+
run: echo "skipped Tracer-Simulation. Will perform in merge queue"
50+
format-code:
51+
runs-on: ubuntu-latest
52+
needs: [SASS-Simulation, PTX-Simulation, Tracer-Tool]
53+
54+
permissions:
55+
# Give the default GITHUB_TOKEN write permission to commit and push the
56+
# added or changed files to the repository.
57+
contents: write
58+
59+
steps:
60+
- uses: actions/checkout@v4
61+
# Other steps that change files in the repository go here
62+
#
63+
- name: Run clang-format
64+
run: |
65+
sudo apt-get install -y clang-format
66+
./gpu-simulator/format-code.sh
67+
./util/tracer_nvbit/tracer_tool/format-code.sh
68+
69+
- uses: stefanzweifel/git-auto-commit-action@v5
70+
with:
71+
# Optional. Commit message for the created commit.
72+
# Defaults to "Apply automatic changes"
73+
commit_message: Automated clang-format
74+
# Optional. Option used by `git-status` to determine if the repository is
75+
# dirty. See https://git-scm.com/docs/git-status#_options
76+
status_options: '--untracked-files=no'

Jenkinsfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ pipeline {
4040
source ./gpu-simulator/setup_environment.sh
4141
4242
rm -rf ./gpu-app-collection
43-
git clone git@github.com:accel-sim/gpu-app-collection.git
43+
git clone -b dev git@github.com:accel-sim/gpu-app-collection.git
4444
source ./gpu-app-collection/src/setup_environment
4545
srun -c20 make rodinia_2.0-ft GPU_Microbenchmark -j20 -C ./gpu-app-collection/src
4646
./gpu-app-collection/get_regression_data.sh

gpu-simulator/ISA_Def/ampere_opcode.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ static const std::unordered_map<std::string, OpcodeChar> Ampere_OpcodeMap = {
136136
{"CCTLT", OpcodeChar(OP_CCTLT, ALU_OP)},
137137

138138
{"LDGDEPBAR", OpcodeChar(OP_LDGDEPBAR, ALU_OP)},
139-
{"LDGSTS", OpcodeChar(OP_LDGSTS, LOAD_OP)},
139+
{"LDGSTS", OpcodeChar(OP_LDGSTS, LOAD_OP)},
140140

141141
// Uniform Datapath Instruction
142142
// UDP unit

gpu-simulator/trace-driven/trace_driven.cc

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -259,10 +259,10 @@ bool trace_warp_inst_t::parse_from_trace_struct(
259259
cache_op = CACHE_ALL;
260260
break;
261261
case OP_LDG:
262-
// LDGSTS is loading the values needed directly from the global memory to shared memory.
263-
// Before this feature, the values need to be loaded to registers first, then store to
264-
// the shared memory.
265-
case OP_LDGSTS: // Add for memcpy_async
262+
// LDGSTS is loading the values needed directly from the global memory to
263+
// shared memory. Before this feature, the values need to be loaded to
264+
// registers first, then store to the shared memory.
265+
case OP_LDGSTS: // Add for memcpy_async
266266
case OP_LDL:
267267
assert(data_size > 0);
268268
memory_op = memory_load;
@@ -272,8 +272,7 @@ bool trace_warp_inst_t::parse_from_trace_struct(
272272
else
273273
space.set_type(global_space);
274274
// Add for LDGSTS instruction
275-
if (m_opcode == OP_LDGSTS)
276-
m_is_ldgsts = true;
275+
if (m_opcode == OP_LDGSTS) m_is_ldgsts = true;
277276
// check the cache scope, if its strong GPU, then bypass L1
278277
if (trace.check_opcode_contain(opcode_tokens, "STRONG") &&
279278
trace.check_opcode_contain(opcode_tokens, "GPU")) {
@@ -369,19 +368,18 @@ bool trace_warp_inst_t::parse_from_trace_struct(
369368
// barrier_type bar_type;
370369
// reduction_type red_type;
371370
break;
372-
// LDGDEPBAR is to form a group containing the previous LDGSTS instructions that
373-
// have not been grouped yet.
374-
// In the implementation, a group number will be assigned once the instruction is
375-
// met.
371+
// LDGDEPBAR is to form a group containing the previous LDGSTS instructions
372+
// that have not been grouped yet. In the implementation, a group number
373+
// will be assigned once the instruction is met.
376374
case OP_LDGDEPBAR:
377375
m_is_ldgdepbar = true;
378376
break;
379-
// DEPBAR is served as a warp-wise barrier that is only effective for LDGSTS
380-
// instructions. It is associated with a immediate value. The immediate value
381-
// indicates the last N LDGDEPBAR groups to not wait once the instruction is met.
382-
// For example, if the immediate value is 1, then the last group is able to proceed
383-
// even with DEPBAR present; if the immediate value is 0, then all of the groups
384-
// need to finish before proceed.
377+
// DEPBAR is served as a warp-wise barrier that is only effective for LDGSTS
378+
// instructions. It is associated with a immediate value. The immediate
379+
// value indicates the last N LDGDEPBAR groups to not wait once the
380+
// instruction is met. For example, if the immediate value is 1, then the
381+
// last group is able to proceed even with DEPBAR present; if the immediate
382+
// value is 0, then all of the groups need to finish before proceed.
385383
case OP_DEPBAR:
386384
m_is_depbar = true;
387385
m_depbar_group_no = trace.imm;

gpu-simulator/trace-parser/trace_parser.cc

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
#include <vector>
1313

1414
#include <errno.h>
15-
#include <unistd.h>
1615
#include <signal.h>
16+
#include <unistd.h>
1717

1818
#include "trace_parser.h"
1919

@@ -32,7 +32,10 @@ void split(const std::string &str, std::vector<std::string> &cont,
3232
}
3333
}
3434

35-
inst_trace_t::inst_trace_t() { memadd_info = NULL; imm = 0;}
35+
inst_trace_t::inst_trace_t() {
36+
memadd_info = NULL;
37+
imm = 0;
38+
}
3639

3740
inst_trace_t::~inst_trace_t() {
3841
if (memadd_info != NULL) delete memadd_info;
@@ -169,7 +172,7 @@ bool inst_trace_t::parse_from_string(std::string trace, unsigned trace_version,
169172
ss >> temp;
170173
sscanf(temp.c_str(), "R%d", &reg_src[i]);
171174
}
172-
175+
173176
// parse mem info
174177
unsigned address_mode = 0;
175178
unsigned mem_width = 0;
@@ -287,60 +290,60 @@ kernel_trace_t *trace_parser::parse_kernel_info(
287290

288291
std::string read_trace_cmd;
289292
int _l = kerneltraces_filepath.length();
290-
if(_l > 3 && kerneltraces_filepath.substr(_l-3, 3) == ".xz"){
293+
if (_l > 3 && kerneltraces_filepath.substr(_l - 3, 3) == ".xz") {
291294
// this is xz-compressed trace
292295
read_trace_cmd = "xz -dc " + kerneltraces_filepath;
293-
} else if(_l > 7 && kerneltraces_filepath.substr(_l-7, 7) == ".traceg"){
296+
} else if (_l > 7 && kerneltraces_filepath.substr(_l - 7, 7) == ".traceg") {
294297
// this is plain text trace
295-
read_trace_cmd ="cat " + kerneltraces_filepath;
298+
read_trace_cmd = "cat " + kerneltraces_filepath;
296299
} else {
297-
std::cerr << "Can't read trace. Only .xz and plain text are supported: "
298-
<< kerneltraces_filepath <<"\n";
300+
std::cerr << "Can't read trace. Only .xz and plain text are supported: "
301+
<< kerneltraces_filepath << "\n";
299302
exit(1);
300303
}
301304

302305
// Create an interprocess channel, and fork out a data source process. The
303306
// data source process reads trace from disk, write to the channel, and the
304-
// simulator process read from the channel.
307+
// simulator process read from the channel.
305308
int *pipefd = kernel_info->pipefd;
306-
if(pipe(pipefd) != 0){
309+
if (pipe(pipefd) != 0) {
307310
std::cerr << "Failed to create interprocess channel\n";
308311
perror("pipe");
309312
exit(1);
310313
}
311314

312315
pid_t pid = fork();
313-
if(pid == 0){
316+
if (pid == 0) {
314317
// The child process is the data source. Redirect its
315318
// stdout to the write end of the pipe.
316319
close(pipefd[0]);
317320
dup2(pipefd[1], STDOUT_FILENO);
318321

319322
// When using GDB, sending Ctrl+C to the simulator will send a SIGINT signal
320323
// to the child process as well, subsequently causing it to terminate. To
321-
// avoid this, we let the child process ignore (SIG_IGN) the SIGINT signal.
324+
// avoid this, we let the child process ignore (SIG_IGN) the SIGINT signal.
322325
// Reference:
323-
// https://stackoverflow.com/questions/38404925/gdb-interrupt-running-process-without-killing-child-processes
326+
// https://stackoverflow.com/questions/38404925/gdb-interrupt-running-process-without-killing-child-processes
324327
signal(SIGINT, SIG_IGN);
325328

326329
execle("/bin/sh", "sh", "-c", read_trace_cmd.c_str(), NULL, environ);
327-
perror("execle"); // the child process shouldn't reach here if all is well.
330+
perror("execle"); // the child process shouldn't reach here if all is well.
328331
exit(1);
329332
} else {
330333
// parent (simulator)
331334
close(pipefd[1]);
332335
dup2(pipefd[0], STDIN_FILENO);
333336
}
334-
335-
// Parent continues from here.
336-
kernel_info->ifs = &std::cin;
337+
338+
// Parent continues from here.
339+
kernel_info->ifs = &std::cin;
337340
std::istream *ifs = kernel_info->ifs;
338341

339342
std::cout << "Processing kernel " << kerneltraces_filepath << std::endl;
340343

341344
std::string line;
342345

343-
// Important to clear the istream. Otherwise, the eofbit from the last
346+
// Important to clear the istream. Otherwise, the eofbit from the last
344347
// kernel may be carried over to this kernel
345348
ifs->clear();
346349
clearerr(stdin);
@@ -417,7 +420,7 @@ void trace_parser::kernel_finalizer(kernel_trace_t *trace_info) {
417420
// The pipe read/write end file descriptors held by the child process would
418421
// have been automatically closed when it terminated. But the parent
419422
// process may read an arbitrary amount of trace files, so it has to close
420-
// all file descriptors.
423+
// all file descriptors.
421424
close(trace_info->pipefd[0]);
422425
close(trace_info->pipefd[1]);
423426
delete trace_info;

gpu-simulator/trace-parser/trace_parser.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,12 +99,12 @@ struct kernel_trace_t {
9999
std::istream *ifs;
100100
// Anonymous pipe through which the trace is transmitted from a trace reader
101101
// process to the simulator process
102-
int pipefd[2]={};
102+
int pipefd[2] = {};
103103
};
104104

105105
class trace_parser {
106106
public:
107-
trace_parser(){}
107+
trace_parser() {}
108108
trace_parser(const char *kernellist_filepath);
109109

110110
std::vector<trace_command> parse_commandlist_file();

short-tests.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#!/bin/bash
22

3+
set -e
4+
35
if [ ! -n "$CUDA_INSTALL_PATH" ]; then
46
echo "ERROR ** Install CUDA Toolkit and set CUDA_INSTALL_PATH.";
57
exit;

util/job_launching/apps/define-all-apps.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,13 @@ GPU_Microbenchmark:
9292
- shared_lat:
9393
- args:
9494
accel-sim-mem: 1G
95+
- shared_bank_conflicts:
96+
## argument 1 kernel has conflicts
97+
- args: 1
98+
accel-sim-mem: 1G
99+
## argument 2 kernel doesn't have conflicts
100+
- args: 2
101+
accel-sim-mem: 1G
95102
- MaxFlops:
96103
- args:
97104
accel-sim-mem: 1G

util/job_launching/stats/example_stats.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ collect_aggregate:
2424
- 'total dram reads\s*=\s*(.*)'
2525
- 'total dram writes\s*=\s*(.*)'
2626
- 'kernel_launch_uid\s*=\s*(.*)'
27+
- 'gpgpu_n_shmem_bkconflict\s*=\s*(.*)'
28+
- 'gpgpu_n_l1cache_bkconflict\s*=\s*(.*)'
2729

2830

2931
# These stats are reset each kernel and should not be diff'd

0 commit comments

Comments
 (0)