From 4314ea5c2bd5b477661b0ce35fdaca3f0250e0a1 Mon Sep 17 00:00:00 2001 From: Spencer-Comin Date: Tue, 5 Sep 2023 10:37:36 -0400 Subject: [PATCH 1/6] zNext facility checks Signed-off-by: Spencer Comin --- runtime/compiler/z/env/J9CPU.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/runtime/compiler/z/env/J9CPU.cpp b/runtime/compiler/z/env/J9CPU.cpp index 95cee671bdb..5f1df25c99e 100644 --- a/runtime/compiler/z/env/J9CPU.cpp +++ b/runtime/compiler/z/env/J9CPU.cpp @@ -113,6 +113,14 @@ J9::Z::CPU::customize(OMRProcessorDesc processorDescription) omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL_ENHANCEMENT_FACILITY_2, FALSE); } + if (processorDescription.processor < OMR_PROCESSOR_S390_ZNEXT) + { + omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_S390_MISCELLANEOUS_INSTRUCTION_EXTENSION_3, FALSE); + omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_S390_VECTOR_FACILITY_ENHANCEMENT_3, FALSE); + omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_S390_PLO_EXTENSION, FALSE); + omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL_ENHANCEMENT_FACILITY_3, FALSE); + } + // This variable is used internally by the j9sysinfo macros below and cannot be folded away J9PortLibrary* privatePortLibrary = TR::Compiler->portLib; @@ -160,7 +168,11 @@ J9::Z::CPU::enableFeatureMasks() OMR_FEATURE_S390_VECTOR_FACILITY_ENHANCEMENT_2, OMR_FEATURE_S390_MISCELLANEOUS_INSTRUCTION_EXTENSION_3, OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL_ENHANCEMENT_FACILITY, - OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL_ENHANCEMENT_FACILITY_2}; + OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL_ENHANCEMENT_FACILITY_2, + OMR_FEATURE_S390_MISCELLANEOUS_INSTRUCTION_EXTENSION_4, + OMR_FEATURE_S390_VECTOR_FACILITY_ENHANCEMENT_3, + OMR_FEATURE_S390_PLO_EXTENSION, + OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL_ENHANCEMENT_FACILITY_3}; memset(_supportedFeatureMasks.features, 0, OMRPORT_SYSINFO_FEATURES_SIZE*sizeof(uint32_t)); OMRPORT_ACCESS_FROM_OMRPORT(TR::Compiler->omrPortLib); From 17f5dda7377cdc660acf91e662011097dda274c9 Mon Sep 17 00:00:00 2001 From: Spencer-Comin Date: Tue, 31 Oct 2023 12:25:49 -0400 Subject: [PATCH 2/6] Add emulation support for zNext instructions When running on non zNext hardware, emulating zNext instructions allows testing the exploitation of zNext instructions. Following are the summary of changes being made with this commit. - Add infrastructure to emulate instructions - Emulate following zNext instructions 1. Load indexed address 2. Bit deposit and extract 3. Count leading and trailing zeros Co-authored-by: Dhruv Chopra Co-authored-by: Shubham Verma Co-authored-by: Spencer Comin Signed-off-by: Spencer Comin --- runtime/cmake/caches/common.cmake | 1 + runtime/compiler/runtime/SignalHandler.c | 19 +- runtime/compiler/z/runtime/CMakeLists.txt | 6 + runtime/compiler/z/runtime/Emulation.cpp | 251 ++++++++++++++++++++++ 4 files changed, 273 insertions(+), 4 deletions(-) create mode 100644 runtime/compiler/z/runtime/Emulation.cpp diff --git a/runtime/cmake/caches/common.cmake b/runtime/cmake/caches/common.cmake index 92b30f2b579..fda57f1e106 100644 --- a/runtime/cmake/caches/common.cmake +++ b/runtime/cmake/caches/common.cmake @@ -216,3 +216,4 @@ set(J9VM_THR_LOCK_RESERVATION ON CACHE BOOL "") set(J9VM_THR_PREEMPTIVE ON CACHE BOOL "") set(J9VM_THR_SMART_DEFLATION ON CACHE BOOL "") set(J9VM_OPT_OPENJDK_FFI ON CACHE BOOL "") +set(J9VM_JIT_EMULATE_ZNEXT OFF CACHE BOOL "Enable ZNext emulation") diff --git a/runtime/compiler/runtime/SignalHandler.c b/runtime/compiler/runtime/SignalHandler.c index cdcdab5fe7f..4507febccf1 100644 --- a/runtime/compiler/runtime/SignalHandler.c +++ b/runtime/compiler/runtime/SignalHandler.c @@ -564,11 +564,11 @@ UDATA jitPPCHandler(J9VMThread* vmThread, U_32 sigType, void* sigInfo) *iarPtr = (UDATA) ((void *) &jitHandleInternalErrorTrap); #endif return J9PORT_SIG_EXCEPTION_CONTINUE_EXECUTION; - + } else if (J9PORT_SIG_FLAG_SIGTRAP == sigType) { IDATA trapType = jitPPCIdentifyCodeCacheTrapType((U_8 *) *iarPtr); - + switch (trapType) { case TRAP_TYPE_NULL_CHECK: @@ -937,8 +937,19 @@ UDATA restoreSystemStackPointerState(J9VMThread* vmThread, U_32 sigType, void* s return J9PORT_SIG_EXCEPTION_CONTINUE_EXECUTION; } +#ifdef EMULATE_ZNEXT +extern int jitS390Emulation(J9VMThread* vmThread, void* sigInfo); +#endif + UDATA jit390Handler(J9VMThread* vmThread, U_32 sigType, void* sigInfo) { +#ifdef EMULATE_ZNEXT + if (J9PORT_SIG_FLAG_SIGILL == sigType && jitS390Emulation(vmThread, sigInfo) == 0) + { + return J9PORT_SIG_EXCEPTION_CONTINUE_EXECUTION; + } +#endif + PORT_ACCESS_FROM_VMC(vmThread); J9JITConfig *jitConfig = vmThread->javaVM->jitConfig; @@ -1251,13 +1262,13 @@ UDATA jit390Handler(J9VMThread* vmThread, U_32 sigType, void* sigInfo) /* add one to *controlPC for symmetry with IA32, handler check subs one */ jit390SetTrapHandler(controlPC, entryPointRegister, (void *) &jitHandleNullPointerExceptionTrap); return restoreSystemStackPointerState(vmThread, sigType, sigInfo); - + case TRAP_TYPE_INTERNAL_ERROR: vmThread->jitException = (J9Object *) (controlPCValue + 1); /* add one to *controlPC for symmetry with IA32, handler check subs one */ jit390SetTrapHandler(controlPC, entryPointRegister, (void *) &jitHandleInternalErrorTrap); return restoreSystemStackPointerState(vmThread, sigType, sigInfo); - + case TRAP_TYPE_ARRAY_BOUNDS: vmThread->jitException = (J9Object *) (controlPCValue + 1); /* add one to *controlPC for symmetry with IA32, handler check subs one */ diff --git a/runtime/compiler/z/runtime/CMakeLists.txt b/runtime/compiler/z/runtime/CMakeLists.txt index a9eb8b60cea..46ad5102094 100644 --- a/runtime/compiler/z/runtime/CMakeLists.txt +++ b/runtime/compiler/z/runtime/CMakeLists.txt @@ -65,3 +65,9 @@ j9jit_files( ${CMAKE_CURRENT_BINARY_DIR}/Recompilation.s ${CMAKE_CURRENT_BINARY_DIR}/ValueProf.s ) + +if(J9VM_JIT_EMULATE_ZNEXT) + j9jit_files( + z/runtime/Emulation.cpp + ) +endif() diff --git a/runtime/compiler/z/runtime/Emulation.cpp b/runtime/compiler/z/runtime/Emulation.cpp new file mode 100644 index 00000000000..1a673fba02f --- /dev/null +++ b/runtime/compiler/z/runtime/Emulation.cpp @@ -0,0 +1,251 @@ +/******************************************************************************* + * Copyright IBM Corp. and others 2023 + * + * This program and the accompanying materials are made available under + * the terms of the Eclipse Public License 2.0 which accompanies this + * distribution and is available at https://www.eclipse.org/legal/epl-2.0/ + * or the Apache License, Version 2.0 which accompanies this distribution and + * is available at https://www.apache.org/licenses/LICENSE-2.0. + * + * This Source Code may also be made available under the following + * Secondary Licenses when the conditions for such availability set + * forth in the Eclipse Public License, v. 2.0 are satisfied: GNU + * General Public License, version 2 with the GNU Classpath + * Exception [1] and GNU General Public License, version 2 with the + * OpenJDK Assembly Exception [2]. + * + * [1] https://www.gnu.org/software/classpath/license.html + * [2] https://openjdk.org/legal/assembly-exception.html + * + * SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0-only WITH Classpath-exception-2.0 OR GPL-2.0-only WITH OpenJDK-assembly-exception-1.0 + *******************************************************************************/ + +#include +#include + +#include "j9protos.h" +#include "j9cfg.h" + +#ifdef LINUX + +#include "../omr/port/linuxs390/omrsignal_context.h" + +#include "infra/Bit.hpp" + +class InstEmulator + { + public: + static InstEmulator *decode(uint8_t *pc); + virtual void emulate(mcontext_t *cpu) {} + }; + +class LXAEmulator : public InstEmulator + { + private: + uint8_t r1, x2, b2; + uint32_t dx2; + uint8_t shift; + bool isLogical; + public: + LXAEmulator(uint8_t *start); + virtual void emulate(mcontext_t *cpu); + }; + +LXAEmulator::LXAEmulator(uint8_t *start) + { + r1 = (start[1] & 0xF0) >> 4; + x2 = start[1] & 0x0F; + b2 = (start[2] & 0xF0) >> 4; + uint32_t dxl2 = (uint32_t) ((*(uint16_t *)(start+2)) & 0x0FFF); + uint32_t dxh2 = start[4]; + dx2 = dxl2 | (dxh2 << 12); + shift = (start[5] & 0xE) >> 1; + isLogical = (start[5] & 1) == 1; + } + +void LXAEmulator::emulate(mcontext_t *cpu) + { + int64_t addr; + int32_t tmp = (int32_t) dx2; + + // sign extend immediate + if (tmp & 0x80000) + { + tmp |= 0xFFF00000; + } + + if (x2 != 0) + { + tmp += cpu->gregs[x2]; + } + + if (isLogical) + { + addr = (int64_t)(uint32_t) tmp; + } + else + { + addr = (int64_t) tmp; + } + + addr <<= shift; + addr += cpu->gregs[b2]; + cpu->gregs[r1] = addr; + } + +class BDEPGEmulator : public InstEmulator + { + private: + uint8_t r1, r2, r3; + public: + BDEPGEmulator(uint8_t *start); + virtual void emulate(mcontext_t *cpu); + }; + +BDEPGEmulator::BDEPGEmulator(uint8_t *start) + { + r1 = (start[3]&0xF0) >> 4; + r2 = start[3]&0x0F; + r3 = (start[2]&0xF0) >> 4; + } + +void BDEPGEmulator::emulate(mcontext_t *cpu) + { + uint64_t val = cpu->gregs[r2]; + uint64_t mask = cpu->gregs[r3]; + uint64_t res = 0; + + for (int n = 0; mask; n++) + { + if (mask & (1ULL << 63)) + { + res |= (val & (1ULL << 63)) >> n; + val <<= 1; + } + mask <<= 1; + } + + cpu->gregs[r1] = res; + } + +class BEXTGEmulator : public InstEmulator + { + private: + uint8_t r1, r2, r3; + public: + BEXTGEmulator(uint8_t *start); + virtual void emulate(mcontext_t *cpu); + }; + +BEXTGEmulator::BEXTGEmulator(uint8_t *start) + { + r1 = (start[3]&0xF0) >> 4; + r2 = start[3]&0x0F; + r3 = (start[2]&0xF0) >> 4; + } + +void BEXTGEmulator::emulate(mcontext_t *cpu) + { + uint64_t val = cpu->gregs[r2]; + uint64_t mask = cpu->gregs[r3]; + uint64_t res = 0; + + for (int k = 0; mask; mask <<= 1, val <<= 1) + { + if (mask & (1ULL<<63)) + { + res |= (val & (1ULL<<63)) >> k; + k++; + } + } + + cpu->gregs[r1] = res; + } + +class CLZGEmulator : public InstEmulator + { + private: + uint8_t r1, r2; + public: + CLZGEmulator(uint8_t *start); + virtual void emulate(mcontext_t *cpu); + }; + +CLZGEmulator::CLZGEmulator(uint8_t *start) + { + r1 = (start[3]&0xF0) >> 4; + r2 = start[3]&0x0F; + } + +void CLZGEmulator::emulate(mcontext_t *cpu) + { + cpu->gregs[r1] = (uint64_t)leadingZeroes(cpu->gregs[r2]); + } + +class CTZGEmulator : public InstEmulator + { + private: + uint8_t r1, r2; + public: + CTZGEmulator(uint8_t *start); + virtual void emulate(mcontext_t *cpu); + }; + +CTZGEmulator::CTZGEmulator(uint8_t *start) + { + r1 = (start[3]&0xF0) >> 4; + r2 = start[3]&0x0F; + } + +void CTZGEmulator::emulate(mcontext_t *cpu) + { + cpu->gregs[r1] = (uint64_t)trailingZeroes(cpu->gregs[r2]); + } + +InstEmulator *InstEmulator::decode(uint8_t *pc) + { + if (pc[-6] == 0xE3 && (pc[-1]&0xF0) == 0x60 && (pc[-1]&0x0F) < 10) + { + return new LXAEmulator(pc-6); + } + else if (*(uint16_t*)(pc-4) == 0xB96D) + { + return new BDEPGEmulator(pc-4); + } + else if (*(uint16_t*)(pc-4) == 0xB96C) + { + return new BEXTGEmulator(pc-4); + } + else if (*(uint16_t*)(pc-4) == 0xB968) + { + return new CLZGEmulator(pc-4); + } + else if (*(uint16_t*)(pc-4) == 0xB969) + { + return new CTZGEmulator(pc-4); + } + + return NULL; + } + +#endif /* LINUX */ + +extern "C" +int jitS390Emulation(J9VMThread* vmThread, void* sigInfo) + { +#ifdef LINUX + OMRUnixSignalInfo *unixSigInfo = (OMRUnixSignalInfo*) sigInfo; + + uint8_t *pc = (uint8_t*) unixSigInfo->platformSignalInfo.context->uc_mcontext.psw.addr; + + InstEmulator *inst = InstEmulator::decode(pc); + if (inst != NULL) + { + inst->emulate(&unixSigInfo->platformSignalInfo.context->uc_mcontext); + delete inst; + return 0; + } +#endif /* LINUX */ + + return -1; + } From aef9df0ff278d2083bc42ef9797620d94d375386 Mon Sep 17 00:00:00 2001 From: Spencer-Comin Date: Mon, 6 Nov 2023 12:55:07 -0500 Subject: [PATCH 3/6] Accelerate Integer and Long expand and compress methods Accelerate Integer.expand, Integer.compress, Long.expand, and Long.compress. These methods map directly to the BDEPG and BEXTG instructions in zNext. - Check if emulation for zNext instruction is enabled and if it is, enable the acceleration of compress and expand with emulation. Signed-off-by: Spencer Comin --- .../codegen/J9RecognizedMethodsEnum.hpp | 4 ++ runtime/compiler/env/j9method.cpp | 4 ++ .../compiler/z/codegen/J9CodeGenerator.cpp | 38 +++++++++++++++++++ 3 files changed, 46 insertions(+) diff --git a/runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp b/runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp index 9762669ad9e..04cfe7e22c8 100644 --- a/runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp +++ b/runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp @@ -527,6 +527,8 @@ java_lang_Integer_reverseBytes, java_lang_Integer_rotateLeft, java_lang_Integer_rotateRight, + java_lang_Integer_compress, + java_lang_Integer_expand, java_lang_Integer_valueOf, java_lang_Integer_toUnsignedLong, java_lang_Integer_stringSize, @@ -544,6 +546,8 @@ java_lang_Long_reverseBytes, java_lang_Long_rotateLeft, java_lang_Long_rotateRight, + java_lang_Long_compress, + java_lang_Long_expand, java_lang_Short_reverseBytes, java_lang_Long_stringSize, java_lang_Long_toString, diff --git a/runtime/compiler/env/j9method.cpp b/runtime/compiler/env/j9method.cpp index df8bed9db17..092a7b32a62 100644 --- a/runtime/compiler/env/j9method.cpp +++ b/runtime/compiler/env/j9method.cpp @@ -3314,6 +3314,8 @@ void TR_ResolvedJ9Method::construct() {x(TR::java_lang_Integer_reverseBytes, "reverseBytes", "(I)I")}, {x(TR::java_lang_Integer_rotateLeft, "rotateLeft", "(II)I")}, {x(TR::java_lang_Integer_rotateRight, "rotateRight", "(II)I")}, + {x(TR::java_lang_Integer_compress, "compress", "(II)I")}, + {x(TR::java_lang_Integer_expand, "expand", "(II)I")}, {x(TR::java_lang_Integer_valueOf, "valueOf", "(I)Ljava/lang/Integer;")}, { TR::java_lang_Integer_init, 6, "", (int16_t)-1, "*"}, {x(TR::java_lang_Integer_toUnsignedLong, "toUnsignedLong", "(I)J")}, @@ -3335,6 +3337,8 @@ void TR_ResolvedJ9Method::construct() {x(TR::java_lang_Long_reverseBytes, "reverseBytes", "(J)J")}, {x(TR::java_lang_Long_rotateLeft, "rotateLeft", "(JI)J")}, {x(TR::java_lang_Long_rotateRight, "rotateRight", "(JI)J")}, + {x(TR::java_lang_Long_compress, "compress", "(JJ)J")}, + {x(TR::java_lang_Long_expand, "expand", "(JJ)J")}, { TR::java_lang_Long_init, 6, "", (int16_t)-1, "*"}, {x(TR::java_lang_Long_stringSize, "stringSize", "(J)I") }, {x(TR::java_lang_Long_toString, "toString", "(J)Ljava/lang/String;") }, diff --git a/runtime/compiler/z/codegen/J9CodeGenerator.cpp b/runtime/compiler/z/codegen/J9CodeGenerator.cpp index 02b36d78393..5113d162ccf 100644 --- a/runtime/compiler/z/codegen/J9CodeGenerator.cpp +++ b/runtime/compiler/z/codegen/J9CodeGenerator.cpp @@ -3761,6 +3761,20 @@ J9::Z::CodeGenerator::suppressInliningOfRecognizedMethod(TR::RecognizedMethod me return true; } + static bool disableZNextCompressExpand = feGetEnv("TR_DisableZNextCompressExpand") != NULL; + if (!disableZNextCompressExpand && + (self()->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_MISCELLANEOUS_INSTRUCTION_EXTENSION_4) || + TR::InstOpCode(TR::InstOpCode::BEXTG).canEmulate() && TR::InstOpCode(TR::InstOpCode::BDEPG).canEmulate())) + { + if (method == TR::java_lang_Integer_compress || + method == TR::java_lang_Integer_expand || + method == TR::java_lang_Long_compress || + method == TR::java_lang_Long_expand) + { + return true; + } + } + if (method == TR::java_util_concurrent_atomic_AtomicBoolean_getAndSet || method == TR::java_util_concurrent_atomic_AtomicInteger_getAndAdd || method == TR::java_util_concurrent_atomic_AtomicInteger_getAndIncrement || @@ -4143,6 +4157,30 @@ J9::Z::CodeGenerator::inlineDirectCall( break; } + static bool disableZNextCompressExpand = feGetEnv("TR_DisableZNextCompressExpand") != NULL; + if (!disableZNextCompressExpand && + (self()->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_MISCELLANEOUS_INSTRUCTION_EXTENSION_4) || + TR::InstOpCode(TR::InstOpCode::BEXTG).canEmulate() && TR::InstOpCode(TR::InstOpCode::BDEPG).canEmulate())) + { + switch (methodSymbol->getRecognizedMethod()) + { + case TR::java_lang_Integer_compress: + resultReg = TR::TreeEvaluator::inlineBitCompress(node, cg, false); + return true; + case TR::java_lang_Integer_expand: + resultReg = TR::TreeEvaluator::inlineBitExpand(node, cg, false); + return true; + case TR::java_lang_Long_compress: + resultReg = TR::TreeEvaluator::inlineBitCompress(node, cg, true); + return true; + case TR::java_lang_Long_expand: + resultReg = TR::TreeEvaluator::inlineBitExpand(node, cg, true); + return true; + default: + break; + } + } + #ifdef J9VM_OPT_JAVA_CRYPTO_ACCELERATION if (self()->inlineCryptoMethod(node, resultReg)) { From e3c72cf533d89f7d966546bb9c82220763915df1 Mon Sep 17 00:00:00 2001 From: Shubham Verma Date: Fri, 1 Dec 2023 13:02:34 -0500 Subject: [PATCH 4/6] Accelerate DAA ExternalDecimal.checkExternalDecimal api Accelerate DAA ExternalDecimal.checkExternalDecimal api for verifying Zoned/External decimals using zNext instruction, Vector Test Zoned. This change: - Adds IL opCode for the ExternalDecimal.checkExternalDecimal - Updates IL generator to recognize and inline ExternalDecimal.checkExternalDecimal - Adds evaluator to accelerate the api call via Vector Test Zoned Signed-off-by: Shubham Verma --- .../codegen/J9RecognizedMethodsEnum.hpp | 6 + runtime/compiler/codegen/J9TreeEvaluator.cpp | 6 + runtime/compiler/codegen/J9TreeEvaluator.hpp | 1 + runtime/compiler/env/j9method.cpp | 27 ++++- runtime/compiler/env/j9method.h | 2 + runtime/compiler/il/ILOpCodesEnum.hpp | 2 +- runtime/compiler/il/Opcodes.enum | 16 +++ runtime/compiler/ilgen/Walker.cpp | 2 + .../optimizer/DataAccessAccelerator.cpp | 101 ++++++++++++++++ .../optimizer/DataAccessAccelerator.hpp | 1 + .../compiler/optimizer/InlinerTempForJ9.cpp | 3 + runtime/compiler/optimizer/J9LocalCSE.cpp | 1 + .../compiler/optimizer/J9SimplifierTable.enum | 1 + .../compiler/z/codegen/J9BCDTreeEvaluator.cpp | 109 ++++++++++++++++++ .../compiler/z/codegen/J9TreeEvaluator.hpp | 1 + 15 files changed, 276 insertions(+), 3 deletions(-) diff --git a/runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp b/runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp index 04cfe7e22c8..108ea09cfc6 100644 --- a/runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp +++ b/runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp @@ -724,6 +724,12 @@ com_ibm_dataaccess_PackedDecimal_movePackedDecimal_, com_ibm_dataaccess_PackedDecimal_checkPackedDecimal_, + // wrapper methods + com_ibm_dataaccess_ExternalDecimal_checkExternalDecimal, + + //inline methods + com_ibm_dataaccess_ExternalDecimal_checkExternalDecimal_, + com_ibm_Compiler_Internal__TR_Prefetch, com_ibm_Compiler_Internal_Quad_enableQuadOptimization, diff --git a/runtime/compiler/codegen/J9TreeEvaluator.cpp b/runtime/compiler/codegen/J9TreeEvaluator.cpp index 259b4f2a672..0b97a22fe23 100644 --- a/runtime/compiler/codegen/J9TreeEvaluator.cpp +++ b/runtime/compiler/codegen/J9TreeEvaluator.cpp @@ -36,6 +36,12 @@ #include "runtime/J9ValueProfiler.hpp" #include "util_api.h" +TR::Register* +J9::TreeEvaluator::zdchkEvaluator(TR::Node *node, TR::CodeGenerator *cg) + { + return TR::TreeEvaluator::unImpOpEvaluator(node, cg); + } + TR::Register* J9::TreeEvaluator::zdloadEvaluator(TR::Node *node, TR::CodeGenerator *cg) { diff --git a/runtime/compiler/codegen/J9TreeEvaluator.hpp b/runtime/compiler/codegen/J9TreeEvaluator.hpp index 8d30eef2890..3de452c6612 100644 --- a/runtime/compiler/codegen/J9TreeEvaluator.hpp +++ b/runtime/compiler/codegen/J9TreeEvaluator.hpp @@ -75,6 +75,7 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::TreeEvaluatorConnector float frequency; }; + static TR::Register *zdchkEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *zdloadEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *zdloadiEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *zdstoreEvaluator(TR::Node *node, TR::CodeGenerator *cg); diff --git a/runtime/compiler/env/j9method.cpp b/runtime/compiler/env/j9method.cpp index 092a7b32a62..bd756df73f6 100644 --- a/runtime/compiler/env/j9method.cpp +++ b/runtime/compiler/env/j9method.cpp @@ -2617,6 +2617,14 @@ void TR_ResolvedJ9Method::construct() {TR::unknownMethod} }; + static X DataAccessExternalDecimalMethods[] = + { + {x(TR::com_ibm_dataaccess_ExternalDecimal_checkExternalDecimal , "checkExternalDecimal" , "([BIIII)I")}, + {x(TR::com_ibm_dataaccess_ExternalDecimal_checkExternalDecimal_, "checkExternalDecimal_", "([BIIII)I")}, + + {TR::unknownMethod} + }; + static X BigDecimalMethods[] = { @@ -4291,6 +4299,7 @@ void TR_ResolvedJ9Method::construct() { "java/util/Hashtable$HashEnumerator", HashtableHashEnumeratorMethods }, { "com/ibm/Compiler/Internal/Prefetch", PrefetchMethods }, { "java/lang/invoke/VarHandleInternal", VarHandleMethods }, + { "com/ibm/dataaccess/ExternalDecimal", DataAccessExternalDecimalMethods }, { 0 } }; @@ -9826,10 +9835,17 @@ TR_ResolvedJ9Method::isFieldFlattened(TR::Compilation *comp, int32_t cpIndex, bo return vmThread->javaVM->internalVMFunctions->isFlattenableFieldFlattened(reinterpret_cast(containingClass), fieldShape); } +bool +TR_ResolvedJ9Method::isDAAExternalDecimalWrapperMethod() + { + // DAA External Decimal check method + return (this->TR_ResolvedMethod::getRecognizedMethod() == TR::com_ibm_dataaccess_ExternalDecimal_checkExternalDecimal); + } + bool TR_ResolvedJ9Method::isDAAWrapperMethod() { - return isDAAMarshallingWrapperMethod() || isDAAPackedDecimalWrapperMethod(); + return isDAAMarshallingWrapperMethod() || isDAAPackedDecimalWrapperMethod() || isDAAExternalDecimalWrapperMethod(); } bool @@ -9970,10 +9986,17 @@ TR_ResolvedJ9Method::isDAAPackedDecimalWrapperMethod() return false; } +bool +TR_ResolvedJ9Method::isDAAExternalDecimalIntrinsicMethod() + { + // DAA External Decimal check method + return (this->TR_ResolvedMethod::getRecognizedMethod() == TR::com_ibm_dataaccess_ExternalDecimal_checkExternalDecimal_); + } + bool TR_ResolvedJ9Method::isDAAIntrinsicMethod() { - return isDAAMarshallingIntrinsicMethod() || isDAAPackedDecimalIntrinsicMethod(); + return isDAAMarshallingIntrinsicMethod() || isDAAPackedDecimalIntrinsicMethod() || isDAAExternalDecimalIntrinsicMethod(); } bool diff --git a/runtime/compiler/env/j9method.h b/runtime/compiler/env/j9method.h index 9df040a4d95..2b3583f8bba 100644 --- a/runtime/compiler/env/j9method.h +++ b/runtime/compiler/env/j9method.h @@ -471,9 +471,11 @@ class TR_ResolvedJ9Method : public TR_J9Method, public TR_ResolvedJ9MethodBase bool isDAAWrapperMethod(); bool isDAAMarshallingWrapperMethod(); bool isDAAPackedDecimalWrapperMethod(); + bool isDAAExternalDecimalWrapperMethod(); bool isDAAIntrinsicMethod(); bool isDAAMarshallingIntrinsicMethod(); bool isDAAPackedDecimalIntrinsicMethod(); + bool isDAAExternalDecimalIntrinsicMethod(); protected: TR_ResolvedMethod * aotMaskResolvedPossiblyPrivateVirtualMethod(TR::Compilation *comp, TR_ResolvedMethod *method); diff --git a/runtime/compiler/il/ILOpCodesEnum.hpp b/runtime/compiler/il/ILOpCodesEnum.hpp index 077351ee990..d745bdcc59e 100644 --- a/runtime/compiler/il/ILOpCodesEnum.hpp +++ b/runtime/compiler/il/ILOpCodesEnum.hpp @@ -26,7 +26,7 @@ #include "compiler/il/OMRILOpCodesEnum.hpp" FirstJ9Op = LastScalarOMROp + 1, - LastJ9Op = BCDCHK, + LastJ9Op = zdchk, FirstTROp = FirstOMROp, LastTROp = LastJ9Op, diff --git a/runtime/compiler/il/Opcodes.enum b/runtime/compiler/il/Opcodes.enum index d7d9a3c8e3e..e0c20c83502 100644 --- a/runtime/compiler/il/Opcodes.enum +++ b/runtime/compiler/il/Opcodes.enum @@ -1542,3 +1542,19 @@ OPCODE_MACRO(\ /* .ifCompareOpCode = */ TR::BadILOp, \ /* .description = */ \ ) +OPCODE_MACRO(\ + /* .opcode = */ zdchk, \ + /* .name = */ "zdchk", \ + /* .properties1 = */ 0, \ + /* .properties2 = */ ILProp2::ValueNumberShare | ILProp2::SupportedForPRE, \ + /* .properties3 = */ 0, \ + /* .properties4 = */ ILProp4::BinaryCodedDecimalOp, \ + /* .dataType = */ TR::Int32, \ + /* .typeProperties = */ ILTypeProp::Size_4 | ILTypeProp::Integer, \ + /* .childProperties = */ TWO_CHILD(TR::ZonedDecimal, TR::Int8), \ + /* .swapChildrenOpCode = */ TR::BadILOp, \ + /* .reverseBranchOpCode = */ TR::BadILOp, \ + /* .booleanCompareOpCode = */ TR::BadILOp, \ + /* .ifCompareOpCode = */ TR::BadILOp, \ + /* .description = zoned decimal validity checking */ \ +) diff --git a/runtime/compiler/ilgen/Walker.cpp b/runtime/compiler/ilgen/Walker.cpp index 92bd22718e0..146a5670fe4 100644 --- a/runtime/compiler/ilgen/Walker.cpp +++ b/runtime/compiler/ilgen/Walker.cpp @@ -4190,6 +4190,8 @@ break DAA_PRINT(TR::com_ibm_dataaccess_PackedDecimal_shiftRightPackedDecimal); DAA_PRINT(TR::com_ibm_dataaccess_PackedDecimal_movePackedDecimal); + DAA_PRINT(TR::com_ibm_dataaccess_ExternalDecimal_checkExternalDecimal); + default: break; } diff --git a/runtime/compiler/optimizer/DataAccessAccelerator.cpp b/runtime/compiler/optimizer/DataAccessAccelerator.cpp index e6c897e74f4..345771ac67f 100644 --- a/runtime/compiler/optimizer/DataAccessAccelerator.cpp +++ b/runtime/compiler/optimizer/DataAccessAccelerator.cpp @@ -343,6 +343,14 @@ int32_t TR_DataAccessAccelerator::performOnBlock(TR::Block* block, TreeTopContai ++result; } break; + // DAA External Decimal Check + case TR::com_ibm_dataaccess_ExternalDecimal_checkExternalDecimal_: + if (comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL_ENHANCEMENT_FACILITY_3) + && inlineCheckExternalDecimal(treeTop, callNode)) + { + ++result; + } + break; default: matched = false; @@ -846,6 +854,99 @@ bool TR_DataAccessAccelerator::inlineCheckPackedDecimal(TR::TreeTop *callTreeTop return false; } +bool TR_DataAccessAccelerator::inlineCheckExternalDecimal(TR::TreeTop *callTreeTop, TR::Node *callNode) + { + TR::Node *byteArrayNode = callNode->getChild(0); + TR::Node *offsetNode = callNode->getChild(1); + TR::Node *precisionNode = callNode->getChild(2); + TR::Node *typeNode = callNode->getChild(3); + TR::Node *bytesWithSpacesNode = callNode->getChild(4); + + int32_t precision = precisionNode->getInt(); + int32_t bytesWithSpaces = bytesWithSpacesNode->getInt(); + int32_t type = typeNode->getInt(); + const char *failMsg = NULL; + + /* Hardware expects both, precision and bytesWithSpaces to be + * 5 bit unsigned binary integer. However, 0 is valid only for + * bytesWithSpaces. This is why precision must be within [1-31] + * range and bytesWithSpaces must be within [0-31] range. + */ + // TODO: Add support for non-constant arguments + if (!isChildConst(callNode, 2)) + failMsg = "Precision is not constant"; + else if (precision < 1 || precision > 31) + failMsg = "Precision value is not in valid range [1-31]"; + else if (!isChildConst(callNode, 3)) + failMsg = "Decimal type node is not constant"; + else if (type < 1 || type > 4) + failMsg = "Invalid decimal type. Supported types are (1|2|3|4)"; + else if (!isChildConst(callNode, 4)) + failMsg = "bytesWithSpaces node is not constant"; + else if (bytesWithSpaces < 0 || bytesWithSpaces > 31) + failMsg = "bytesWithSpaces value not in valid range [0-31]"; + + if (failMsg) + { + TR::DebugCounter::incStaticDebugCounter(comp(), + TR::DebugCounter::debugCounterName(comp(), + "DAA/rejected/chkZonedDecimal")); + + return printInliningStatus (false, callNode, failMsg); + } + + if (performTransformation(comp(), "O^O TR_DataAccessAccelerator: inlineCheckZonedDecimal on callNode %p\n", callNode)) + { + TR::DebugCounter::incStaticDebugCounter(comp(), + TR::DebugCounter::debugCounterName(comp(), + "DAA/inlined/chkZonedDecimal")); + + insertByteArrayNULLCHK(callTreeTop, callNode, byteArrayNode); + + TR::DataType decimalType = TR::DataTypes::NoType; + TR::ILOpCodes loadOpCode = TR::BadILOp; + if (type == 1) + { + decimalType = TR::ZonedDecimal; + loadOpCode = TR::zdloadi; + } + else if (type == 2) + { + decimalType = TR::ZonedDecimalSignLeadingEmbedded; + loadOpCode = TR::zdsleLoadi; + } + else if (type == 3) + { + decimalType = TR::ZonedDecimalSignTrailingSeparate; + loadOpCode = TR::zdstsLoadi; + } + else if (type == 4) + { + decimalType = TR::ZonedDecimalSignLeadingSeparate; + loadOpCode = TR::zdslsLoadi; + } + int32_t precisionSizeInNumberOfBytes = TR::DataType::getSizeFromBCDPrecision(decimalType, precision); + + insertByteArrayBNDCHK(callTreeTop, callNode, byteArrayNode, offsetNode, 0); + insertByteArrayBNDCHK(callTreeTop, callNode, byteArrayNode, offsetNode, precisionSizeInNumberOfBytes - 1); + + TR::SymbolReference* zonedDecimalSymbolReference = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(decimalType, NULL, precisionSizeInNumberOfBytes, fe()); + TR::Node* zdchkChild0Node = TR::Node::createWithSymRef(loadOpCode, 1, 1, constructAddressNode(callNode, byteArrayNode, offsetNode), zonedDecimalSymbolReference); + zdchkChild0Node->setDecimalPrecision(precision); + + byteArrayNode->decReferenceCount(); + offsetNode->decReferenceCount(); + precisionNode->decReferenceCount(); + typeNode->decReferenceCount(); + + TR::Node* bytesWithSpacesConstNode = TR::Node::bconst(static_cast(bytesWithSpaces)); + TR::Node::recreateWithoutProperties(callNode, TR::zdchk, 2, zdchkChild0Node, bytesWithSpacesConstNode); + return true; + } + + return false; +} + TR::Node* TR_DataAccessAccelerator::insertIntegerGetIntrinsic(TR::TreeTop* callTreeTop, TR::Node* callNode, int32_t sourceNumBytes, int32_t targetNumBytes) { if (targetNumBytes != 1 && targetNumBytes != 2 && targetNumBytes != 4 && targetNumBytes != 8) diff --git a/runtime/compiler/optimizer/DataAccessAccelerator.hpp b/runtime/compiler/optimizer/DataAccessAccelerator.hpp index 0337e0a18f4..420ae10c19b 100644 --- a/runtime/compiler/optimizer/DataAccessAccelerator.hpp +++ b/runtime/compiler/optimizer/DataAccessAccelerator.hpp @@ -211,6 +211,7 @@ class TR_DataAccessAccelerator : public TR::Optimization TR::Node* insertDecimalSetIntrinsic(TR::TreeTop* callTreeTop, TR::Node* callNode, int32_t sourceNumBytes, int32_t targetNumBytes); bool inlineCheckPackedDecimal(TR::TreeTop* callTreeTop, TR::Node* callNode); + bool inlineCheckExternalDecimal(TR::TreeTop* callTreeTop, TR::Node* callNode); private: diff --git a/runtime/compiler/optimizer/InlinerTempForJ9.cpp b/runtime/compiler/optimizer/InlinerTempForJ9.cpp index d5187b6d8e7..ce8957ad70f 100644 --- a/runtime/compiler/optimizer/InlinerTempForJ9.cpp +++ b/runtime/compiler/optimizer/InlinerTempForJ9.cpp @@ -5411,6 +5411,9 @@ TR_J9InlinerPolicy::supressInliningRecognizedInitialCallee(TR_CallSite* callsite // DAA Packed Decimal check method case TR::com_ibm_dataaccess_PackedDecimal_checkPackedDecimal_: + // DAA External Decimal check method + case TR::com_ibm_dataaccess_ExternalDecimal_checkExternalDecimal_: + // DAA Packed Decimal <-> Integer case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToInteger_: case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToInteger_ByteBuffer_: diff --git a/runtime/compiler/optimizer/J9LocalCSE.cpp b/runtime/compiler/optimizer/J9LocalCSE.cpp index e10528f86cf..0777419cc66 100644 --- a/runtime/compiler/optimizer/J9LocalCSE.cpp +++ b/runtime/compiler/optimizer/J9LocalCSE.cpp @@ -117,6 +117,7 @@ J9::LocalCSE::shouldCommonNode(TR::Node *parent, TR::Node *node) case TR::com_ibm_dataaccess_PackedDecimal_equalsPackedDecimal_: case TR::com_ibm_dataaccess_PackedDecimal_notEqualsPackedDecimal_: case TR::com_ibm_dataaccess_PackedDecimal_checkPackedDecimal_: + case TR::com_ibm_dataaccess_ExternalDecimal_checkExternalDecimal_: case TR::com_ibm_dataaccess_DecimalData_convertExternalDecimalToPackedDecimal_: case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToExternalDecimal_: case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToUnicodeDecimal_: diff --git a/runtime/compiler/optimizer/J9SimplifierTable.enum b/runtime/compiler/optimizer/J9SimplifierTable.enum index dcebe11957d..cb530ede7d8 100644 --- a/runtime/compiler/optimizer/J9SimplifierTable.enum +++ b/runtime/compiler/optimizer/J9SimplifierTable.enum @@ -118,6 +118,7 @@ #define pdModifyPrecisionSimplifierHandler pdshlSimplifier #define countDigitsSimplifierHandler dftSimplifier #define BCDCHKSimplifierHandler dftSimplifier +#define zdchkSimplifierHandler dftSimplifier #include "optimizer/OMRSimplifierTable.enum" diff --git a/runtime/compiler/z/codegen/J9BCDTreeEvaluator.cpp b/runtime/compiler/z/codegen/J9BCDTreeEvaluator.cpp index aef18277f89..5993e173f87 100644 --- a/runtime/compiler/z/codegen/J9BCDTreeEvaluator.cpp +++ b/runtime/compiler/z/codegen/J9BCDTreeEvaluator.cpp @@ -5138,6 +5138,115 @@ J9::Z::TreeEvaluator::pdchkEvaluator(TR::Node *node, TR::CodeGenerator *cg) return chkResultReg; } +TR::Register * +J9::Z::TreeEvaluator::zdchkEvaluator(TR::Node *node, TR::CodeGenerator *cg) + { + cg->traceBCDEntry("zdchk",node); + TR::Compilation *comp = cg->comp(); + TR::Register *chkResultReg = cg->allocateRegister(TR_GPR); + generateRRInstruction(cg, TR::InstOpCode::getXORRegOpCode(), node, chkResultReg, chkResultReg); + + TR::Node *child = node->getFirstChild(); + int32_t precision = child->getDecimalPrecision(); + TR_ASSERT_FATAL_WITH_NODE(node, precision > 0 && precision < 32, + "External decimal precision was expected to be greater than 0 and less than 32, but was %d.\n", + precision); + + int32_t bytesWithSpacesConst = node->getSecondChild()->getInt(); + TR_ASSERT_FATAL_WITH_NODE(node, bytesWithSpacesConst >= 0 && bytesWithSpacesConst < 32, + "Space bytes count(%d) for external decimal was outside expected range [0-31].\n", + bytesWithSpacesConst); + + TR::DataType dataType = child->getDataType(); + // Safe to downcast here because decimalLength can at most be precision + 1 + int8_t decimalLength = static_cast(TR::DataType::getSizeFromBCDPrecision(dataType, precision)); + TR_ASSERT_FATAL_WITH_NODE(node, decimalLength > 0 && decimalLength <= (static_cast(precision) + 1), + "External decimal length was expected to be greater than 0 and less than equal to precision + 1, " + "but decimal length was %d and precision was %d.\n", + decimalLength, precision); + TR_PseudoRegister *sourceReg = cg->evaluateBCDNode(child); + sourceReg = cg->privatizeBCDRegisterIfNeeded(node, child, sourceReg); + TR::MemoryReference *sourceMR = generateS390LeftAlignedMemoryReference(child, sourceReg->getStorageReference(), cg, decimalLength); + + TR::Register *vZondedLowReg = cg->allocateRegister(TR_VRF); + TR::Register *vZondedHighReg = cg->allocateRegister(TR_VRF); + + /** + * I3: / SSC LS DSC STC DC + * 0 0 0 00000 000 00000 + * - Separate-Sign Control (SSC) : 1 if sign is separate, otherwise 0 for embedded sign. + * - Leading Sign (LS) : 1 if sign is leading, otherwise 0 for trailing sign. + * - Disallowed-Spaces Count (DSC): DC is total number of digits, DSC represents number of digits + * with zone and digit format, rest can have space. + * Only relevant when LS and SSC are 0. + * - Sign-Test Control (STC) : Specifies which codes are considered as valid sign codes. + * 110 (C,D,F) for embedded sign (will return false for non-preferred sign codes). + * 010 (4e,60) for sign separate (leading/trailing). + * 000 all sign codes are considered valid (hex) (A-F). + * - Digits Count (DC) : Integer specifying number of bytes to be verified. Does not include sign byte. + */ + uint16_t zonedDecimalInfo = 0x0; // I3 operandgit d + uint8_t stc = 0x0; + + int8_t dsc = static_cast(precision - bytesWithSpacesConst); + + if (dataType == TR::ZonedDecimalSignTrailingSeparate) // Sign trailing separate + { + zonedDecimalInfo = 0x2; // set SSC bit + stc = 0x2; + dsc = 0x0; + } + else if (dataType == TR::ZonedDecimalSignLeadingEmbedded) // Sign leading embedded + { + zonedDecimalInfo = 0x1; // set LC bit + } + else if (dataType == TR::ZonedDecimalSignLeadingSeparate) // Sign leading separate + { + zonedDecimalInfo = 0x3; // set SSC and LS bits + stc = 0x2; + dsc = 0x0; + } + zonedDecimalInfo = (zonedDecimalInfo << 5) | dsc; + zonedDecimalInfo = (zonedDecimalInfo << 3) | stc; + zonedDecimalInfo = (zonedDecimalInfo << 5) | precision; // DC + + // Must use decimalLength because precision does not equal length when sign is separate + int8_t firstByteIndexToLoad = decimalLength - 1; + TR::MemoryReference *zonedDecimalLowMR = generateS390MemoryReference(*sourceMR, 0, cg); + TR::MemoryReference *zonedDecimalHighMR = NULL; + if (decimalLength > TR_VECTOR_REGISTER_SIZE) + { + zonedDecimalHighMR = zonedDecimalLowMR; + firstByteIndexToLoad = firstByteIndexToLoad - TR_VECTOR_REGISTER_SIZE; + generateVSIInstruction(cg, TR::InstOpCode::VLRL, node, vZondedHighReg, zonedDecimalHighMR, firstByteIndexToLoad); + + zonedDecimalLowMR = generateS390MemoryReference(*sourceMR, decimalLength - TR_VECTOR_REGISTER_SIZE, cg); + firstByteIndexToLoad = TR_VECTOR_REGISTER_SIZE - 1; + } + + generateVSIInstruction(cg, TR::InstOpCode::VLRL, node, vZondedLowReg, zonedDecimalLowMR, firstByteIndexToLoad); + generateVRIlInstruction(cg, TR::InstOpCode::VTZ, node, vZondedHighReg, vZondedLowReg, zonedDecimalInfo); + generateRRInstruction(cg, TR::InstOpCode::IPM, node, chkResultReg, chkResultReg); + if(cg->comp()->target().is64Bit()) + { + generateRRInstruction(cg, TR::InstOpCode::LLGTR, node, chkResultReg, chkResultReg); + generateRSInstruction(cg, TR::InstOpCode::SRLG, node, chkResultReg, chkResultReg, 28); + } + else + { + generateRSInstruction(cg, TR::InstOpCode::SRL, node, chkResultReg, 28); + } + + if (vZondedLowReg) cg->stopUsingRegister(vZondedLowReg); + if (vZondedHighReg) cg->stopUsingRegister(vZondedHighReg); + + node->setRegister(chkResultReg); + cg->decReferenceCount(child); + cg->decReferenceCount(node->getSecondChild()); + cg->traceBCDExit("zdchk",node); + return chkResultReg; + } + /** * pdEvaluator - various binary packed decimal evaluators */ diff --git a/runtime/compiler/z/codegen/J9TreeEvaluator.hpp b/runtime/compiler/z/codegen/J9TreeEvaluator.hpp index 5b26c58a25b..56556facb16 100644 --- a/runtime/compiler/z/codegen/J9TreeEvaluator.hpp +++ b/runtime/compiler/z/codegen/J9TreeEvaluator.hpp @@ -470,6 +470,7 @@ class OMR_EXTENSIBLE TreeEvaluator: public J9::TreeEvaluator bool ignoreDecimalOverflow = false); static TR::Register *pdchkEvaluator(TR::Node *node, TR::CodeGenerator *cg); + static TR::Register *zdchkEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *pdcmpeqEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *pdcmpneEvaluator(TR::Node *node, TR::CodeGenerator *cg); From de59a99403adc377683e3cd99deccf324e073de6 Mon Sep 17 00:00:00 2001 From: Rahil Shah Date: Wed, 13 Nov 2024 09:59:29 -0500 Subject: [PATCH 5/6] Address PR Comment: Add emulation support for zNext instructions Signed-off-by: Rahil Shah --- runtime/compiler/z/runtime/Emulation.cpp | 33 ++++++++++++++---------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/runtime/compiler/z/runtime/Emulation.cpp b/runtime/compiler/z/runtime/Emulation.cpp index 1a673fba02f..a25fc14fc0c 100644 --- a/runtime/compiler/z/runtime/Emulation.cpp +++ b/runtime/compiler/z/runtime/Emulation.cpp @@ -89,7 +89,10 @@ void LXAEmulator::emulate(mcontext_t *cpu) } addr <<= shift; - addr += cpu->gregs[b2]; + if (b2 != 0) + { + addr += cpu->gregs[b2]; + } cpu->gregs[r1] = addr; } @@ -104,9 +107,9 @@ class BDEPGEmulator : public InstEmulator BDEPGEmulator::BDEPGEmulator(uint8_t *start) { - r1 = (start[3]&0xF0) >> 4; - r2 = start[3]&0x0F; - r3 = (start[2]&0xF0) >> 4; + r1 = (start[3] & 0xF0) >> 4; + r2 = start[3] & 0x0F; + r3 = (start[2] & 0xF0) >> 4; } void BDEPGEmulator::emulate(mcontext_t *cpu) @@ -139,9 +142,9 @@ class BEXTGEmulator : public InstEmulator BEXTGEmulator::BEXTGEmulator(uint8_t *start) { - r1 = (start[3]&0xF0) >> 4; - r2 = start[3]&0x0F; - r3 = (start[2]&0xF0) >> 4; + r1 = (start[3] & 0xF0) >> 4; + r2 = start[3] & 0x0F; + r3 = (start[2] & 0xF0) >> 4; } void BEXTGEmulator::emulate(mcontext_t *cpu) @@ -152,9 +155,9 @@ void BEXTGEmulator::emulate(mcontext_t *cpu) for (int k = 0; mask; mask <<= 1, val <<= 1) { - if (mask & (1ULL<<63)) + if (mask & (1ULL << 63)) { - res |= (val & (1ULL<<63)) >> k; + res |= (val & (1ULL << 63)) >> k; k++; } } @@ -173,8 +176,8 @@ class CLZGEmulator : public InstEmulator CLZGEmulator::CLZGEmulator(uint8_t *start) { - r1 = (start[3]&0xF0) >> 4; - r2 = start[3]&0x0F; + r1 = (start[3] & 0xF0) >> 4; + r2 = start[3] & 0x0F; } void CLZGEmulator::emulate(mcontext_t *cpu) @@ -193,8 +196,8 @@ class CTZGEmulator : public InstEmulator CTZGEmulator::CTZGEmulator(uint8_t *start) { - r1 = (start[3]&0xF0) >> 4; - r2 = start[3]&0x0F; + r1 = (start[3] & 0xF0) >> 4; + r2 = start[3] & 0x0F; } void CTZGEmulator::emulate(mcontext_t *cpu) @@ -204,7 +207,9 @@ void CTZGEmulator::emulate(mcontext_t *cpu) InstEmulator *InstEmulator::decode(uint8_t *pc) { - if (pc[-6] == 0xE3 && (pc[-1]&0xF0) == 0x60 && (pc[-1]&0x0F) < 10) + // Checking the optcode in the first byte and last byte of the instruction to + // see if it is LXA/LLXA instructions (Op-codes '0xE360' to '0xE369') + if ((pc[-6] == 0xE3) && ((pc[-1] & 0xF0) == 0x60) && ((pc[-1] & 0x0F) < 10)) { return new LXAEmulator(pc-6); } From 7366e6513f94a5ec14d88d906da6bb7ed9cf9936 Mon Sep 17 00:00:00 2001 From: Rahil Shah Date: Wed, 13 Nov 2024 10:33:23 -0500 Subject: [PATCH 6/6] Address PR Comment: Accelerate DAA ExternalDecimal.checkExternalDecimal api Signed-off-by: Rahil Shah --- .../compiler/z/codegen/J9BCDTreeEvaluator.cpp | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/runtime/compiler/z/codegen/J9BCDTreeEvaluator.cpp b/runtime/compiler/z/codegen/J9BCDTreeEvaluator.cpp index 5993e173f87..8c8393cbe7f 100644 --- a/runtime/compiler/z/codegen/J9BCDTreeEvaluator.cpp +++ b/runtime/compiler/z/codegen/J9BCDTreeEvaluator.cpp @@ -5178,37 +5178,37 @@ J9::Z::TreeEvaluator::zdchkEvaluator(TR::Node *node, TR::CodeGenerator *cg) * - Leading Sign (LS) : 1 if sign is leading, otherwise 0 for trailing sign. * - Disallowed-Spaces Count (DSC): DC is total number of digits, DSC represents number of digits * with zone and digit format, rest can have space. - * Only relevant when LS and SSC are 0. + * Only relevant when SSC is 0. * - Sign-Test Control (STC) : Specifies which codes are considered as valid sign codes. * 110 (C,D,F) for embedded sign (will return false for non-preferred sign codes). * 010 (4e,60) for sign separate (leading/trailing). * 000 all sign codes are considered valid (hex) (A-F). * - Digits Count (DC) : Integer specifying number of bytes to be verified. Does not include sign byte. */ - uint16_t zonedDecimalInfo = 0x0; // I3 operandgit d - uint8_t stc = 0x0; + #define I3_SSC_SEPARATE 0x1 + #define I3_STC_SIGN_SEPARATE 0x2 + #define I3_LS_LEADING 0x1 + #define I3_STC_EMBEDDED_SIGN 0x6 + uint16_t zonedDecimalInfo = static_cast(precision); // I3 operand int8_t dsc = static_cast(precision - bytesWithSpacesConst); if (dataType == TR::ZonedDecimalSignTrailingSeparate) // Sign trailing separate { - zonedDecimalInfo = 0x2; // set SSC bit - stc = 0x2; - dsc = 0x0; + zonedDecimalInfo |= ((I3_SSC_SEPARATE << 14) | (I3_STC_SIGN_SEPARATE << 5)); } else if (dataType == TR::ZonedDecimalSignLeadingEmbedded) // Sign leading embedded { - zonedDecimalInfo = 0x1; // set LC bit + zonedDecimalInfo |= ((I3_LS_LEADING << 13) | (dsc << 8)); } else if (dataType == TR::ZonedDecimalSignLeadingSeparate) // Sign leading separate { - zonedDecimalInfo = 0x3; // set SSC and LS bits - stc = 0x2; - dsc = 0x0; + zonedDecimalInfo |= ((I3_SSC_SEPARATE << 14) | (I3_LS_LEADING << 13) | (I3_STC_SIGN_SEPARATE << 5)); + } + else if (dataType == TR::ZonedDecimal) // Sign embedded trailing + { + zonedDecimalInfo |= (dsc << 8); } - zonedDecimalInfo = (zonedDecimalInfo << 5) | dsc; - zonedDecimalInfo = (zonedDecimalInfo << 3) | stc; - zonedDecimalInfo = (zonedDecimalInfo << 5) | precision; // DC // Must use decimalLength because precision does not equal length when sign is separate int8_t firstByteIndexToLoad = decimalLength - 1; @@ -5243,7 +5243,7 @@ J9::Z::TreeEvaluator::zdchkEvaluator(TR::Node *node, TR::CodeGenerator *cg) node->setRegister(chkResultReg); cg->decReferenceCount(child); cg->decReferenceCount(node->getSecondChild()); - cg->traceBCDExit("zdchk",node); + cg->traceBCDExit("zdchk", node); return chkResultReg; }