diff --git a/src/sst/elements/vanadis/decoder/vdecoder.h b/src/sst/elements/vanadis/decoder/vdecoder.h index e88228b3f3..eef5e3a0c6 100644 --- a/src/sst/elements/vanadis/decoder/vdecoder.h +++ b/src/sst/elements/vanadis/decoder/vdecoder.h @@ -215,7 +215,6 @@ class VanadisDecoder : public SST::SubComponent void setThreadLocalStoragePointer(uint64_t new_tls) { tls_ptr = new_tls; } uint64_t getThreadLocalStoragePointer() const { return tls_ptr; } - uint64_t getCycleCount() const { return cycle_count; } // VanadisCircularQueue* getDecodedQueue() { return // decoded_q; } @@ -242,7 +241,6 @@ class VanadisDecoder : public SST::SubComponent uint32_t core; uint64_t tls_ptr; - uint64_t cycle_count; bool wantDelegatedLoad; VanadisCircularQueue* thread_rob; diff --git a/src/sst/elements/vanadis/decoder/vmipsdecoder.h b/src/sst/elements/vanadis/decoder/vmipsdecoder.h index 749e6608e5..c092844aa2 100644 --- a/src/sst/elements/vanadis/decoder/vmipsdecoder.h +++ b/src/sst/elements/vanadis/decoder/vmipsdecoder.h @@ -430,13 +430,11 @@ class VanadisMIPSDecoder : public VanadisDecoder stat_decode_cop1_eq = registerStatistic("ins_decode_cop1_eq", "1"); } - virtual void tick(SST::Output* output, uint64_t cycle) + virtual void tick(SST::Output* output, uint64_t) { output->verbose(CALL_INFO, 16, VANADIS_DBG_DECODER_FLG, "-> Decode step for thr: %" PRIu32 "\n", hw_thr); output->verbose(CALL_INFO, 16, VANADIS_DBG_DECODER_FLG, "---> Max decodes per cycle: %" PRIu16 "\n", max_decodes_per_cycle); - cycle_count = cycle; - ins_loader->printStatus(output); uint16_t decodes_performed = 0; diff --git a/src/sst/elements/vanadis/decoder/vriscv64decoder.h b/src/sst/elements/vanadis/decoder/vriscv64decoder.h index 670bb2a5ef..458391c56a 100644 --- a/src/sst/elements/vanadis/decoder/vriscv64decoder.h +++ b/src/sst/elements/vanadis/decoder/vriscv64decoder.h @@ -128,15 +128,13 @@ class VanadisRISCV64Decoder : public VanadisDecoder } - void tick(SST::Output* output, uint64_t cycle) override + void tick(SST::Output* output, uint64_t) override { if(output->getVerboseLevel() >= 16) { output->verbose(CALL_INFO, 16, 0, "-> Decode step for thr: %" PRIu32 "\n", hw_thr); output->verbose(CALL_INFO, 16, 0, "---> Max decodes per cycle: %" PRIu16 "\n", max_decodes_per_cycle); } - cycle_count = cycle; - for ( uint16_t i = 0; i < max_decodes_per_cycle; ++i ) { if ( ! thread_rob->full() ) { if ( ins_loader->hasBundleAt(ip) ) { @@ -1107,16 +1105,29 @@ class VanadisRISCV64Decoder : public VanadisDecoder } break; default: { + using namespace Zicntr; uint64_t csrNum = uimm64 & 0xfff; switch ( csrNum ) { - case 0xc00: - { - if ( 0 == rs1 ) { - auto thread_call = std::bind(&VanadisRISCV64Decoder::getCycleCount, this); - bundle->addInstruction( new VanadisSetRegisterByCallInstruction( ins_address, hw_thr, options, rd, thread_call)); - decode_fault = false; - } - } break; + case 0xc00: // RDCYCLE + bundle->addInstruction( new VanadisReadCounterInstruction( CYCLE, ins_address, hw_thr, options, rd ) ); + decode_fault = 0 != rs1; + break; + + case 0xc01: // RDTIME + bundle->addInstruction( new VanadisReadCounterInstruction( TIME, ins_address, hw_thr, options, rd ) ); + decode_fault = 0 != rs1; + break; + + case 0xc02: // RDINSTRET + bundle->addInstruction( new VanadisReadCounterInstruction( INSTRET, ins_address, hw_thr, options, rd ) ); + decode_fault = 0 != rs1; + break; + + case 0xc80: // RDCYCLEH + case 0xc81: // RDTIMEH + case 0xc82: // RDINSTRETH + output->verbose( CALL_INFO, 16, 0, "riscv64 does not support Zicntr [H] suffix" ); + break; } } break; diff --git a/src/sst/elements/vanadis/inst/regfile.h b/src/sst/elements/vanadis/inst/regfile.h index dc69dced9c..9a945dee02 100644 --- a/src/sst/elements/vanadis/inst/regfile.h +++ b/src/sst/elements/vanadis/inst/regfile.h @@ -52,6 +52,7 @@ class VanadisRegisterFile void init( ) { std::memset(int_reg_storage, 0, (int_reg_width * count_int_regs)); std::memset(fp_reg_storage, 0, (fp_reg_width * count_fp_regs)); + std::fill_n(counters, sizeof(counters), 0); } ~VanadisRegisterFile() @@ -212,6 +213,18 @@ class VanadisRegisterFile } } + void incrementCounter(int i, uint64_t n = 1) + { + assert(0 <= i and i < sizeof(counters)); + counters[i] += n; + } + + uint64_t getCounter(int i) const + { + assert(0 <= i and i < sizeof(counters)); + return counters[i]; + } + private: char* getIntReg(const uint16_t reg) { @@ -263,6 +276,9 @@ class VanadisRegisterFile VanadisFPRegisterMode fp_reg_mode; const uint32_t fp_reg_width; const uint32_t int_reg_width; + + // Counters from 8 https://github.com/riscv/riscv-isa-manual/releases/download/riscv-isa-release-f797123-2024-06-27/riscv-unprivileged.pdf + uint64_t counters[32]; }; } // namespace Vanadis diff --git a/src/sst/elements/vanadis/inst/vinstall.h b/src/sst/elements/vanadis/inst/vinstall.h index 736f5385c4..122816233f 100644 --- a/src/sst/elements/vanadis/inst/vinstall.h +++ b/src/sst/elements/vanadis/inst/vinstall.h @@ -120,4 +120,8 @@ #include "inst/vfpclass.h" #include "inst/vmin.h" +// Zicntr +#include "inst/vzicntr.h" +#include "inst/vzicntr_readcounter.h" + #endif diff --git a/src/sst/elements/vanadis/inst/vzicntr.h b/src/sst/elements/vanadis/inst/vzicntr.h new file mode 100644 index 0000000000..1f76a3a756 --- /dev/null +++ b/src/sst/elements/vanadis/inst/vzicntr.h @@ -0,0 +1,17 @@ + +#ifndef _H_VANADIS_ZICNTR +#define _H_VANADIS_ZICNTR + +#include + +namespace SST::Vanadis +{ + // Tags to be used to with the regfile and VanadisReadCounterInstruction + namespace Zicntr { + inline constexpr std::integral_constant CYCLE; + inline constexpr std::integral_constant TIME; + inline constexpr std::integral_constant INSTRET; + } +} // namespace Vanadis::SST + +#endif diff --git a/src/sst/elements/vanadis/inst/vzicntr_readcounter.h b/src/sst/elements/vanadis/inst/vzicntr_readcounter.h new file mode 100644 index 0000000000..d20b1ab66d --- /dev/null +++ b/src/sst/elements/vanadis/inst/vzicntr_readcounter.h @@ -0,0 +1,91 @@ + +#ifndef _H_VANADIS_ZICNTR_READ_COUNTER +#define _H_VANADIS_ZICNTR_READ_COUNTER + +#include "inst/vinst.h" +#include "inst/vzicntr.h" + +namespace SST::Vanadis +{ + namespace Zicntr + { + template + class VanadisReadCounterInstruction : public VanadisInstruction + { + static_assert( id < 3 ); + static_assert( XLEN == 64 or XLEN == 32 ); + static_assert( XLEN != 64 or H == false ); + + public: + VanadisReadCounterInstruction( + const std::integral_constant, + const uint64_t addr, + const uint32_t hw_thr, + const VanadisDecoderOptions* isa_opts, + const uint16_t dest) + : VanadisInstruction(addr, hw_thr, isa_opts, 0, 1, 0, 1, 0, 0, 0, 0) + { + isa_int_regs_out[0] = dest; + } + + VanadisReadCounterInstruction* clone() override + { + return new VanadisReadCounterInstruction(*this); + } + + VanadisFunctionalUnitType getInstFuncType() const override + { + return INST_INT_ARITH; // Is this appropriate? + } + + const char* getInstCode() const override + { + switch ( id ) { + case Zicntr::CYCLE: return H ? "RDCYCLEH" : "RDCYCLE"; + case Zicntr::TIME: return H ? "RDTIMEH" : "RDTIME"; + case Zicntr::INSTRET: return H ? "RDINSTRETH" : "RDINSTRET"; + } + __builtin_unreachable(); + } + + void printToBuffer(char* const buffer, const size_t buffer_size) override + { + snprintf( + buffer, buffer_size, + "%s %5" PRIu16 " (phys: %5" PRIu16 ")", + getInstCode(), isa_int_regs_out[0], phys_int_regs_out[0]); + } + + void execute(SST::Output* const output, VanadisRegisterFile* const regFile) override + { +#ifdef VANADIS_BUILD_DEBUG + if(output->getVerboseLevel() >= 16) { + output->verbose( + CALL_INFO, 16, 0, + "Execute: 0x%" PRI_ADDR " %s phys: out=%" PRIu16 ", isa: out=%" PRIu16 "\n", + getInstructionAddress(), getInstCode(), phys_int_regs_out[0], isa_int_regs_out[0]); + } +#endif + + static constexpr uint64_t mask = 0x00000000'FFFFFFFF; + const uint64_t count64 = regFile->getCounter(id); + const uint32_t count32 = count64 & mask; + const uint32_t count32H = count64 >> 32; + + if constexpr ( XLEN == 64 ) { + regFile->setIntReg(phys_int_regs_out[0], count64); + } + else if constexpr ( XLEN == 32 and H ) { + regFile->setIntReg(phys_int_regs_out[0], count32H); + } + else { + regFile->setIntReg(phys_int_regs_out[0], count32); + } + + markExecuted(); + } + }; + } // namespace Zicntr +} // namespace Vanadis::SST + +#endif diff --git a/src/sst/elements/vanadis/sst-vanadis-tracediff b/src/sst/elements/vanadis/sst-vanadis-tracediff new file mode 100755 index 0000000000..911b2b5744 Binary files /dev/null and b/src/sst/elements/vanadis/sst-vanadis-tracediff differ diff --git a/src/sst/elements/vanadis/tests/basic_vanadis.py b/src/sst/elements/vanadis/tests/basic_vanadis.py index 492fe1ecca..fa101c95f2 100644 --- a/src/sst/elements/vanadis/tests/basic_vanadis.py +++ b/src/sst/elements/vanadis/tests/basic_vanadis.py @@ -55,6 +55,7 @@ #exe = "uname" #exe = "mem-test" #exe = "checkpoint" +#exe = "zicntr" physMemSize = "4GiB" diff --git a/src/sst/elements/vanadis/tests/small/misc/zicntr/riscv64/zicntr b/src/sst/elements/vanadis/tests/small/misc/zicntr/riscv64/zicntr new file mode 100755 index 0000000000..48b26d1f1f Binary files /dev/null and b/src/sst/elements/vanadis/tests/small/misc/zicntr/riscv64/zicntr differ diff --git a/src/sst/elements/vanadis/tests/small/misc/zicntr/zicntr.c b/src/sst/elements/vanadis/tests/small/misc/zicntr/zicntr.c new file mode 100644 index 0000000000..699d821d50 --- /dev/null +++ b/src/sst/elements/vanadis/tests/small/misc/zicntr/zicntr.c @@ -0,0 +1,29 @@ +#include +#include +#include + +uint64_t read_cycles() { + uint64_t cycles; + asm volatile ("rdcycle %0" : "=r" (cycles)); + return cycles; +} + +uint64_t read_time() { + uint64_t time; + asm volatile ("rdtime %0" : "=r" (time)); + return time; +} + +uint64_t read_instructions() { + uint64_t instructions; + asm volatile ("rdinstret %0" : "=r" (instructions)); + return instructions; +} + +int main() +{ + uint64_t cycles = read_cycles(); + uint64_t time = read_time(); + uint64_t instructions = read_instructions(); + printf("cycles: %" PRIu64 " time: %" PRIu64 " instructions: %" PRIu64 "\n", cycles, time, instructions); +} diff --git a/src/sst/elements/vanadis/vanadis.cc b/src/sst/elements/vanadis/vanadis.cc index 7e970d33e3..8c0ea02cab 100644 --- a/src/sst/elements/vanadis/vanadis.cc +++ b/src/sst/elements/vanadis/vanadis.cc @@ -959,6 +959,11 @@ VANADIS_COMPONENT::performRetire(int rob_num, VanadisCircularQueueincrementCounter(Zicntr::INSTRET); + if ( perform_delay_cleanup ) { VanadisInstruction* delay_ins = rob->pop(); @@ -1299,7 +1304,7 @@ VANADIS_COMPONENT::tick(SST::Cycle_t cycle) if ( cnt ) { auto thr = m_curRetireHwThread; rc[thr] = performRetire(thr, rob[thr], cycle); - + ++m_curRetireHwThread; m_curRetireHwThread %= hw_threads; cnt = hw_threads; @@ -1417,6 +1422,11 @@ VANADIS_COMPONENT::tick(SST::Cycle_t cycle) #endif current_cycle++; + for (VanadisRegisterFile* reg : register_files) { + assert(reg); + reg->incrementCounter(Zicntr::CYCLE); + reg->incrementCounter(Zicntr::TIME); + } uint64_t used_phys_int = 0; uint64_t used_phys_fp = 0;