Skip to content

Commit

Permalink
Auto clang format (accel-sim#322)
Browse files Browse the repository at this point in the history
* add clang auto-commit

* Automated clang-format

* use default checkout ref

* Format after SASS tests

---------

Co-authored-by: barnes88 <[email protected]>
Co-authored-by: JRPAN <[email protected]>
  • Loading branch information
3 people committed Sep 3, 2024
1 parent 4e073be commit 778953b
Show file tree
Hide file tree
Showing 8 changed files with 225 additions and 171 deletions.
31 changes: 29 additions & 2 deletions .github/workflows/short-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ on:
workflow_dispatch:

# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
jobs:
SASS-Simulation:
runs-on: ubuntu-latest
container:
Expand Down Expand Up @@ -46,4 +46,31 @@ jobs:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v4
- name: Run Simulation
run: echo "skipped Tracer-Simulation. Will perform in merge queue"
run: echo "skipped Tracer-Simulation. Will perform in merge queue"
format-code:
runs-on: ubuntu-latest
needs: [SASS-Simulation, PTX-Simulation, Tracer-Tool]

permissions:
# Give the default GITHUB_TOKEN write permission to commit and push the
# added or changed files to the repository.
contents: write

steps:
- uses: actions/checkout@v4
# Other steps that change files in the repository go here
#
- name: Run clang-format
run: |
sudo apt-get install -y clang-format
./gpu-simulator/format-code.sh
./util/tracer_nvbit/tracer_tool/format-code.sh
- uses: stefanzweifel/git-auto-commit-action@v5
with:
# Optional. Commit message for the created commit.
# Defaults to "Apply automatic changes"
commit_message: Automated clang-format
# Optional. Option used by `git-status` to determine if the repository is
# dirty. See https://git-scm.com/docs/git-status#_options
status_options: '--untracked-files=no'
2 changes: 1 addition & 1 deletion gpu-simulator/ISA_Def/ampere_opcode.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ static const std::unordered_map<std::string, OpcodeChar> Ampere_OpcodeMap = {
{"CCTLT", OpcodeChar(OP_CCTLT, ALU_OP)},

{"LDGDEPBAR", OpcodeChar(OP_LDGDEPBAR, ALU_OP)},
{"LDGSTS", OpcodeChar(OP_LDGSTS, LOAD_OP)},
{"LDGSTS", OpcodeChar(OP_LDGSTS, LOAD_OP)},

// Uniform Datapath Instruction
// UDP unit
Expand Down
30 changes: 14 additions & 16 deletions gpu-simulator/trace-driven/trace_driven.cc
Original file line number Diff line number Diff line change
Expand Up @@ -259,10 +259,10 @@ bool trace_warp_inst_t::parse_from_trace_struct(
cache_op = CACHE_ALL;
break;
case OP_LDG:
// LDGSTS is loading the values needed directly from the global memory to shared memory.
// Before this feature, the values need to be loaded to registers first, then store to
// the shared memory.
case OP_LDGSTS: // Add for memcpy_async
// LDGSTS is loading the values needed directly from the global memory to
// shared memory. Before this feature, the values need to be loaded to
// registers first, then store to the shared memory.
case OP_LDGSTS: // Add for memcpy_async
case OP_LDL:
assert(data_size > 0);
memory_op = memory_load;
Expand All @@ -272,8 +272,7 @@ bool trace_warp_inst_t::parse_from_trace_struct(
else
space.set_type(global_space);
// Add for LDGSTS instruction
if (m_opcode == OP_LDGSTS)
m_is_ldgsts = true;
if (m_opcode == OP_LDGSTS) m_is_ldgsts = true;
// check the cache scope, if its strong GPU, then bypass L1
if (trace.check_opcode_contain(opcode_tokens, "STRONG") &&
trace.check_opcode_contain(opcode_tokens, "GPU")) {
Expand Down Expand Up @@ -369,19 +368,18 @@ bool trace_warp_inst_t::parse_from_trace_struct(
// barrier_type bar_type;
// reduction_type red_type;
break;
// LDGDEPBAR is to form a group containing the previous LDGSTS instructions that
// have not been grouped yet.
// In the implementation, a group number will be assigned once the instruction is
// met.
// LDGDEPBAR is to form a group containing the previous LDGSTS instructions
// that have not been grouped yet. In the implementation, a group number
// will be assigned once the instruction is met.
case OP_LDGDEPBAR:
m_is_ldgdepbar = true;
break;
// DEPBAR is served as a warp-wise barrier that is only effective for LDGSTS
// instructions. It is associated with a immediate value. The immediate value
// indicates the last N LDGDEPBAR groups to not wait once the instruction is met.
// For example, if the immediate value is 1, then the last group is able to proceed
// even with DEPBAR present; if the immediate value is 0, then all of the groups
// need to finish before proceed.
// DEPBAR is served as a warp-wise barrier that is only effective for LDGSTS
// instructions. It is associated with a immediate value. The immediate
// value indicates the last N LDGDEPBAR groups to not wait once the
// instruction is met. For example, if the immediate value is 1, then the
// last group is able to proceed even with DEPBAR present; if the immediate
// value is 0, then all of the groups need to finish before proceed.
case OP_DEPBAR:
m_is_depbar = true;
m_depbar_group_no = trace.imm;
Expand Down
41 changes: 22 additions & 19 deletions gpu-simulator/trace-parser/trace_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
#include <vector>

#include <errno.h>
#include <unistd.h>
#include <signal.h>
#include <unistd.h>

#include "trace_parser.h"

Expand All @@ -32,7 +32,10 @@ void split(const std::string &str, std::vector<std::string> &cont,
}
}

inst_trace_t::inst_trace_t() { memadd_info = NULL; imm = 0;}
inst_trace_t::inst_trace_t() {
memadd_info = NULL;
imm = 0;
}

inst_trace_t::~inst_trace_t() {
if (memadd_info != NULL) delete memadd_info;
Expand Down Expand Up @@ -169,7 +172,7 @@ bool inst_trace_t::parse_from_string(std::string trace, unsigned trace_version,
ss >> temp;
sscanf(temp.c_str(), "R%d", &reg_src[i]);
}

// parse mem info
unsigned address_mode = 0;
unsigned mem_width = 0;
Expand Down Expand Up @@ -287,60 +290,60 @@ kernel_trace_t *trace_parser::parse_kernel_info(

std::string read_trace_cmd;
int _l = kerneltraces_filepath.length();
if(_l > 3 && kerneltraces_filepath.substr(_l-3, 3) == ".xz"){
if (_l > 3 && kerneltraces_filepath.substr(_l - 3, 3) == ".xz") {
// this is xz-compressed trace
read_trace_cmd = "xz -dc " + kerneltraces_filepath;
} else if(_l > 7 && kerneltraces_filepath.substr(_l-7, 7) == ".traceg"){
} else if (_l > 7 && kerneltraces_filepath.substr(_l - 7, 7) == ".traceg") {
// this is plain text trace
read_trace_cmd ="cat " + kerneltraces_filepath;
read_trace_cmd = "cat " + kerneltraces_filepath;
} else {
std::cerr << "Can't read trace. Only .xz and plain text are supported: "
<< kerneltraces_filepath <<"\n";
std::cerr << "Can't read trace. Only .xz and plain text are supported: "
<< kerneltraces_filepath << "\n";
exit(1);
}

// Create an interprocess channel, and fork out a data source process. The
// data source process reads trace from disk, write to the channel, and the
// simulator process read from the channel.
// simulator process read from the channel.
int *pipefd = kernel_info->pipefd;
if(pipe(pipefd) != 0){
if (pipe(pipefd) != 0) {
std::cerr << "Failed to create interprocess channel\n";
perror("pipe");
exit(1);
}

pid_t pid = fork();
if(pid == 0){
if (pid == 0) {
// The child process is the data source. Redirect its
// stdout to the write end of the pipe.
close(pipefd[0]);
dup2(pipefd[1], STDOUT_FILENO);

// When using GDB, sending Ctrl+C to the simulator will send a SIGINT signal
// to the child process as well, subsequently causing it to terminate. To
// avoid this, we let the child process ignore (SIG_IGN) the SIGINT signal.
// avoid this, we let the child process ignore (SIG_IGN) the SIGINT signal.
// Reference:
// https://stackoverflow.com/questions/38404925/gdb-interrupt-running-process-without-killing-child-processes
// https://stackoverflow.com/questions/38404925/gdb-interrupt-running-process-without-killing-child-processes
signal(SIGINT, SIG_IGN);

execle("/bin/sh", "sh", "-c", read_trace_cmd.c_str(), NULL, environ);
perror("execle"); // the child process shouldn't reach here if all is well.
perror("execle"); // the child process shouldn't reach here if all is well.
exit(1);
} else {
// parent (simulator)
close(pipefd[1]);
dup2(pipefd[0], STDIN_FILENO);
}
// Parent continues from here.
kernel_info->ifs = &std::cin;

// Parent continues from here.
kernel_info->ifs = &std::cin;
std::istream *ifs = kernel_info->ifs;

std::cout << "Processing kernel " << kerneltraces_filepath << std::endl;

std::string line;

// Important to clear the istream. Otherwise, the eofbit from the last
// Important to clear the istream. Otherwise, the eofbit from the last
// kernel may be carried over to this kernel
ifs->clear();
clearerr(stdin);
Expand Down Expand Up @@ -417,7 +420,7 @@ void trace_parser::kernel_finalizer(kernel_trace_t *trace_info) {
// The pipe read/write end file descriptors held by the child process would
// have been automatically closed when it terminated. But the parent
// process may read an arbitrary amount of trace files, so it has to close
// all file descriptors.
// all file descriptors.
close(trace_info->pipefd[0]);
close(trace_info->pipefd[1]);
delete trace_info;
Expand Down
2 changes: 1 addition & 1 deletion gpu-simulator/trace-parser/trace_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ struct kernel_trace_t {
std::istream *ifs;
// Anonymous pipe through which the trace is transmitted from a trace reader
// process to the simulator process
int pipefd[2]={};
int pipefd[2] = {};
};

class trace_parser {
Expand Down
15 changes: 8 additions & 7 deletions util/tracer_nvbit/tracer_tool/inject_funcs.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@
* extern "C" __device__ __noinline__
* To prevent "dead"-code elimination by the compiler.
*/
extern "C" __device__ __noinline__ void instrument_inst(
int pred, int opcode_id, int32_t vpc, bool is_mem, uint64_t addr,
int32_t width, int32_t desReg, int32_t srcReg1, int32_t srcReg2,
int32_t srcReg3, int32_t srcReg4, int32_t srcReg5, int32_t srcNum,
uint64_t immediate,
uint64_t pchannel_dev, uint64_t ptotal_dynamic_instr_counter,
uint64_t preported_dynamic_instr_counter, uint64_t pstop_report, uint32_t line_num) {
extern "C" __device__ __noinline__ void
instrument_inst(int pred, int opcode_id, int32_t vpc, bool is_mem,
uint64_t addr, int32_t width, int32_t desReg, int32_t srcReg1,
int32_t srcReg2, int32_t srcReg3, int32_t srcReg4,
int32_t srcReg5, int32_t srcNum, uint64_t immediate,
uint64_t pchannel_dev, uint64_t ptotal_dynamic_instr_counter,
uint64_t preported_dynamic_instr_counter, uint64_t pstop_report,
uint32_t line_num) {

const int active_mask = __ballot_sync(__activemask(), 1);
const int predicate_mask = __ballot_sync(__activemask(), pred);
Expand Down
Loading

0 comments on commit 778953b

Please sign in to comment.