diff --git a/runtime/cmake/caches/common.cmake b/runtime/cmake/caches/common.cmake index 92b30f2b579..fda57f1e106 100644 --- a/runtime/cmake/caches/common.cmake +++ b/runtime/cmake/caches/common.cmake @@ -216,3 +216,4 @@ set(J9VM_THR_LOCK_RESERVATION ON CACHE BOOL "") set(J9VM_THR_PREEMPTIVE ON CACHE BOOL "") set(J9VM_THR_SMART_DEFLATION ON CACHE BOOL "") set(J9VM_OPT_OPENJDK_FFI ON CACHE BOOL "") +set(J9VM_JIT_EMULATE_ZNEXT OFF CACHE BOOL "Enable ZNext emulation") diff --git a/runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp b/runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp index 9762669ad9e..108ea09cfc6 100644 --- a/runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp +++ b/runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp @@ -527,6 +527,8 @@ java_lang_Integer_reverseBytes, java_lang_Integer_rotateLeft, java_lang_Integer_rotateRight, + java_lang_Integer_compress, + java_lang_Integer_expand, java_lang_Integer_valueOf, java_lang_Integer_toUnsignedLong, java_lang_Integer_stringSize, @@ -544,6 +546,8 @@ java_lang_Long_reverseBytes, java_lang_Long_rotateLeft, java_lang_Long_rotateRight, + java_lang_Long_compress, + java_lang_Long_expand, java_lang_Short_reverseBytes, java_lang_Long_stringSize, java_lang_Long_toString, @@ -720,6 +724,12 @@ com_ibm_dataaccess_PackedDecimal_movePackedDecimal_, com_ibm_dataaccess_PackedDecimal_checkPackedDecimal_, + // wrapper methods + com_ibm_dataaccess_ExternalDecimal_checkExternalDecimal, + + //inline methods + com_ibm_dataaccess_ExternalDecimal_checkExternalDecimal_, + com_ibm_Compiler_Internal__TR_Prefetch, com_ibm_Compiler_Internal_Quad_enableQuadOptimization, diff --git a/runtime/compiler/codegen/J9TreeEvaluator.cpp b/runtime/compiler/codegen/J9TreeEvaluator.cpp index 259b4f2a672..0b97a22fe23 100644 --- a/runtime/compiler/codegen/J9TreeEvaluator.cpp +++ b/runtime/compiler/codegen/J9TreeEvaluator.cpp @@ -36,6 +36,12 @@ #include "runtime/J9ValueProfiler.hpp" #include "util_api.h" +TR::Register* +J9::TreeEvaluator::zdchkEvaluator(TR::Node *node, TR::CodeGenerator *cg) + { + return TR::TreeEvaluator::unImpOpEvaluator(node, cg); + } + TR::Register* J9::TreeEvaluator::zdloadEvaluator(TR::Node *node, TR::CodeGenerator *cg) { diff --git a/runtime/compiler/codegen/J9TreeEvaluator.hpp b/runtime/compiler/codegen/J9TreeEvaluator.hpp index 8d30eef2890..3de452c6612 100644 --- a/runtime/compiler/codegen/J9TreeEvaluator.hpp +++ b/runtime/compiler/codegen/J9TreeEvaluator.hpp @@ -75,6 +75,7 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::TreeEvaluatorConnector float frequency; }; + static TR::Register *zdchkEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *zdloadEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *zdloadiEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *zdstoreEvaluator(TR::Node *node, TR::CodeGenerator *cg); diff --git a/runtime/compiler/env/j9method.cpp b/runtime/compiler/env/j9method.cpp index df8bed9db17..bd756df73f6 100644 --- a/runtime/compiler/env/j9method.cpp +++ b/runtime/compiler/env/j9method.cpp @@ -2617,6 +2617,14 @@ void TR_ResolvedJ9Method::construct() {TR::unknownMethod} }; + static X DataAccessExternalDecimalMethods[] = + { + {x(TR::com_ibm_dataaccess_ExternalDecimal_checkExternalDecimal , "checkExternalDecimal" , "([BIIII)I")}, + {x(TR::com_ibm_dataaccess_ExternalDecimal_checkExternalDecimal_, "checkExternalDecimal_", "([BIIII)I")}, + + {TR::unknownMethod} + }; + static X BigDecimalMethods[] = { @@ -3314,6 +3322,8 @@ void TR_ResolvedJ9Method::construct() {x(TR::java_lang_Integer_reverseBytes, "reverseBytes", "(I)I")}, {x(TR::java_lang_Integer_rotateLeft, "rotateLeft", "(II)I")}, {x(TR::java_lang_Integer_rotateRight, "rotateRight", "(II)I")}, + {x(TR::java_lang_Integer_compress, "compress", "(II)I")}, + {x(TR::java_lang_Integer_expand, "expand", "(II)I")}, {x(TR::java_lang_Integer_valueOf, "valueOf", "(I)Ljava/lang/Integer;")}, { TR::java_lang_Integer_init, 6, "", (int16_t)-1, "*"}, {x(TR::java_lang_Integer_toUnsignedLong, "toUnsignedLong", "(I)J")}, @@ -3335,6 +3345,8 @@ void TR_ResolvedJ9Method::construct() {x(TR::java_lang_Long_reverseBytes, "reverseBytes", "(J)J")}, {x(TR::java_lang_Long_rotateLeft, "rotateLeft", "(JI)J")}, {x(TR::java_lang_Long_rotateRight, "rotateRight", "(JI)J")}, + {x(TR::java_lang_Long_compress, "compress", "(JJ)J")}, + {x(TR::java_lang_Long_expand, "expand", "(JJ)J")}, { TR::java_lang_Long_init, 6, "", (int16_t)-1, "*"}, {x(TR::java_lang_Long_stringSize, "stringSize", "(J)I") }, {x(TR::java_lang_Long_toString, "toString", "(J)Ljava/lang/String;") }, @@ -4287,6 +4299,7 @@ void TR_ResolvedJ9Method::construct() { "java/util/Hashtable$HashEnumerator", HashtableHashEnumeratorMethods }, { "com/ibm/Compiler/Internal/Prefetch", PrefetchMethods }, { "java/lang/invoke/VarHandleInternal", VarHandleMethods }, + { "com/ibm/dataaccess/ExternalDecimal", DataAccessExternalDecimalMethods }, { 0 } }; @@ -9822,10 +9835,17 @@ TR_ResolvedJ9Method::isFieldFlattened(TR::Compilation *comp, int32_t cpIndex, bo return vmThread->javaVM->internalVMFunctions->isFlattenableFieldFlattened(reinterpret_cast(containingClass), fieldShape); } +bool +TR_ResolvedJ9Method::isDAAExternalDecimalWrapperMethod() + { + // DAA External Decimal check method + return (this->TR_ResolvedMethod::getRecognizedMethod() == TR::com_ibm_dataaccess_ExternalDecimal_checkExternalDecimal); + } + bool TR_ResolvedJ9Method::isDAAWrapperMethod() { - return isDAAMarshallingWrapperMethod() || isDAAPackedDecimalWrapperMethod(); + return isDAAMarshallingWrapperMethod() || isDAAPackedDecimalWrapperMethod() || isDAAExternalDecimalWrapperMethod(); } bool @@ -9966,10 +9986,17 @@ TR_ResolvedJ9Method::isDAAPackedDecimalWrapperMethod() return false; } +bool +TR_ResolvedJ9Method::isDAAExternalDecimalIntrinsicMethod() + { + // DAA External Decimal check method + return (this->TR_ResolvedMethod::getRecognizedMethod() == TR::com_ibm_dataaccess_ExternalDecimal_checkExternalDecimal_); + } + bool TR_ResolvedJ9Method::isDAAIntrinsicMethod() { - return isDAAMarshallingIntrinsicMethod() || isDAAPackedDecimalIntrinsicMethod(); + return isDAAMarshallingIntrinsicMethod() || isDAAPackedDecimalIntrinsicMethod() || isDAAExternalDecimalIntrinsicMethod(); } bool diff --git a/runtime/compiler/env/j9method.h b/runtime/compiler/env/j9method.h index 9df040a4d95..2b3583f8bba 100644 --- a/runtime/compiler/env/j9method.h +++ b/runtime/compiler/env/j9method.h @@ -471,9 +471,11 @@ class TR_ResolvedJ9Method : public TR_J9Method, public TR_ResolvedJ9MethodBase bool isDAAWrapperMethod(); bool isDAAMarshallingWrapperMethod(); bool isDAAPackedDecimalWrapperMethod(); + bool isDAAExternalDecimalWrapperMethod(); bool isDAAIntrinsicMethod(); bool isDAAMarshallingIntrinsicMethod(); bool isDAAPackedDecimalIntrinsicMethod(); + bool isDAAExternalDecimalIntrinsicMethod(); protected: TR_ResolvedMethod * aotMaskResolvedPossiblyPrivateVirtualMethod(TR::Compilation *comp, TR_ResolvedMethod *method); diff --git a/runtime/compiler/il/ILOpCodesEnum.hpp b/runtime/compiler/il/ILOpCodesEnum.hpp index 077351ee990..d745bdcc59e 100644 --- a/runtime/compiler/il/ILOpCodesEnum.hpp +++ b/runtime/compiler/il/ILOpCodesEnum.hpp @@ -26,7 +26,7 @@ #include "compiler/il/OMRILOpCodesEnum.hpp" FirstJ9Op = LastScalarOMROp + 1, - LastJ9Op = BCDCHK, + LastJ9Op = zdchk, FirstTROp = FirstOMROp, LastTROp = LastJ9Op, diff --git a/runtime/compiler/il/Opcodes.enum b/runtime/compiler/il/Opcodes.enum index d7d9a3c8e3e..e0c20c83502 100644 --- a/runtime/compiler/il/Opcodes.enum +++ b/runtime/compiler/il/Opcodes.enum @@ -1542,3 +1542,19 @@ OPCODE_MACRO(\ /* .ifCompareOpCode = */ TR::BadILOp, \ /* .description = */ \ ) +OPCODE_MACRO(\ + /* .opcode = */ zdchk, \ + /* .name = */ "zdchk", \ + /* .properties1 = */ 0, \ + /* .properties2 = */ ILProp2::ValueNumberShare | ILProp2::SupportedForPRE, \ + /* .properties3 = */ 0, \ + /* .properties4 = */ ILProp4::BinaryCodedDecimalOp, \ + /* .dataType = */ TR::Int32, \ + /* .typeProperties = */ ILTypeProp::Size_4 | ILTypeProp::Integer, \ + /* .childProperties = */ TWO_CHILD(TR::ZonedDecimal, TR::Int8), \ + /* .swapChildrenOpCode = */ TR::BadILOp, \ + /* .reverseBranchOpCode = */ TR::BadILOp, \ + /* .booleanCompareOpCode = */ TR::BadILOp, \ + /* .ifCompareOpCode = */ TR::BadILOp, \ + /* .description = zoned decimal validity checking */ \ +) diff --git a/runtime/compiler/ilgen/Walker.cpp b/runtime/compiler/ilgen/Walker.cpp index 92bd22718e0..146a5670fe4 100644 --- a/runtime/compiler/ilgen/Walker.cpp +++ b/runtime/compiler/ilgen/Walker.cpp @@ -4190,6 +4190,8 @@ break DAA_PRINT(TR::com_ibm_dataaccess_PackedDecimal_shiftRightPackedDecimal); DAA_PRINT(TR::com_ibm_dataaccess_PackedDecimal_movePackedDecimal); + DAA_PRINT(TR::com_ibm_dataaccess_ExternalDecimal_checkExternalDecimal); + default: break; } diff --git a/runtime/compiler/optimizer/DataAccessAccelerator.cpp b/runtime/compiler/optimizer/DataAccessAccelerator.cpp index e6c897e74f4..345771ac67f 100644 --- a/runtime/compiler/optimizer/DataAccessAccelerator.cpp +++ b/runtime/compiler/optimizer/DataAccessAccelerator.cpp @@ -343,6 +343,14 @@ int32_t TR_DataAccessAccelerator::performOnBlock(TR::Block* block, TreeTopContai ++result; } break; + // DAA External Decimal Check + case TR::com_ibm_dataaccess_ExternalDecimal_checkExternalDecimal_: + if (comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL_ENHANCEMENT_FACILITY_3) + && inlineCheckExternalDecimal(treeTop, callNode)) + { + ++result; + } + break; default: matched = false; @@ -846,6 +854,99 @@ bool TR_DataAccessAccelerator::inlineCheckPackedDecimal(TR::TreeTop *callTreeTop return false; } +bool TR_DataAccessAccelerator::inlineCheckExternalDecimal(TR::TreeTop *callTreeTop, TR::Node *callNode) + { + TR::Node *byteArrayNode = callNode->getChild(0); + TR::Node *offsetNode = callNode->getChild(1); + TR::Node *precisionNode = callNode->getChild(2); + TR::Node *typeNode = callNode->getChild(3); + TR::Node *bytesWithSpacesNode = callNode->getChild(4); + + int32_t precision = precisionNode->getInt(); + int32_t bytesWithSpaces = bytesWithSpacesNode->getInt(); + int32_t type = typeNode->getInt(); + const char *failMsg = NULL; + + /* Hardware expects both, precision and bytesWithSpaces to be + * 5 bit unsigned binary integer. However, 0 is valid only for + * bytesWithSpaces. This is why precision must be within [1-31] + * range and bytesWithSpaces must be within [0-31] range. + */ + // TODO: Add support for non-constant arguments + if (!isChildConst(callNode, 2)) + failMsg = "Precision is not constant"; + else if (precision < 1 || precision > 31) + failMsg = "Precision value is not in valid range [1-31]"; + else if (!isChildConst(callNode, 3)) + failMsg = "Decimal type node is not constant"; + else if (type < 1 || type > 4) + failMsg = "Invalid decimal type. Supported types are (1|2|3|4)"; + else if (!isChildConst(callNode, 4)) + failMsg = "bytesWithSpaces node is not constant"; + else if (bytesWithSpaces < 0 || bytesWithSpaces > 31) + failMsg = "bytesWithSpaces value not in valid range [0-31]"; + + if (failMsg) + { + TR::DebugCounter::incStaticDebugCounter(comp(), + TR::DebugCounter::debugCounterName(comp(), + "DAA/rejected/chkZonedDecimal")); + + return printInliningStatus (false, callNode, failMsg); + } + + if (performTransformation(comp(), "O^O TR_DataAccessAccelerator: inlineCheckZonedDecimal on callNode %p\n", callNode)) + { + TR::DebugCounter::incStaticDebugCounter(comp(), + TR::DebugCounter::debugCounterName(comp(), + "DAA/inlined/chkZonedDecimal")); + + insertByteArrayNULLCHK(callTreeTop, callNode, byteArrayNode); + + TR::DataType decimalType = TR::DataTypes::NoType; + TR::ILOpCodes loadOpCode = TR::BadILOp; + if (type == 1) + { + decimalType = TR::ZonedDecimal; + loadOpCode = TR::zdloadi; + } + else if (type == 2) + { + decimalType = TR::ZonedDecimalSignLeadingEmbedded; + loadOpCode = TR::zdsleLoadi; + } + else if (type == 3) + { + decimalType = TR::ZonedDecimalSignTrailingSeparate; + loadOpCode = TR::zdstsLoadi; + } + else if (type == 4) + { + decimalType = TR::ZonedDecimalSignLeadingSeparate; + loadOpCode = TR::zdslsLoadi; + } + int32_t precisionSizeInNumberOfBytes = TR::DataType::getSizeFromBCDPrecision(decimalType, precision); + + insertByteArrayBNDCHK(callTreeTop, callNode, byteArrayNode, offsetNode, 0); + insertByteArrayBNDCHK(callTreeTop, callNode, byteArrayNode, offsetNode, precisionSizeInNumberOfBytes - 1); + + TR::SymbolReference* zonedDecimalSymbolReference = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(decimalType, NULL, precisionSizeInNumberOfBytes, fe()); + TR::Node* zdchkChild0Node = TR::Node::createWithSymRef(loadOpCode, 1, 1, constructAddressNode(callNode, byteArrayNode, offsetNode), zonedDecimalSymbolReference); + zdchkChild0Node->setDecimalPrecision(precision); + + byteArrayNode->decReferenceCount(); + offsetNode->decReferenceCount(); + precisionNode->decReferenceCount(); + typeNode->decReferenceCount(); + + TR::Node* bytesWithSpacesConstNode = TR::Node::bconst(static_cast(bytesWithSpaces)); + TR::Node::recreateWithoutProperties(callNode, TR::zdchk, 2, zdchkChild0Node, bytesWithSpacesConstNode); + return true; + } + + return false; +} + TR::Node* TR_DataAccessAccelerator::insertIntegerGetIntrinsic(TR::TreeTop* callTreeTop, TR::Node* callNode, int32_t sourceNumBytes, int32_t targetNumBytes) { if (targetNumBytes != 1 && targetNumBytes != 2 && targetNumBytes != 4 && targetNumBytes != 8) diff --git a/runtime/compiler/optimizer/DataAccessAccelerator.hpp b/runtime/compiler/optimizer/DataAccessAccelerator.hpp index 0337e0a18f4..420ae10c19b 100644 --- a/runtime/compiler/optimizer/DataAccessAccelerator.hpp +++ b/runtime/compiler/optimizer/DataAccessAccelerator.hpp @@ -211,6 +211,7 @@ class TR_DataAccessAccelerator : public TR::Optimization TR::Node* insertDecimalSetIntrinsic(TR::TreeTop* callTreeTop, TR::Node* callNode, int32_t sourceNumBytes, int32_t targetNumBytes); bool inlineCheckPackedDecimal(TR::TreeTop* callTreeTop, TR::Node* callNode); + bool inlineCheckExternalDecimal(TR::TreeTop* callTreeTop, TR::Node* callNode); private: diff --git a/runtime/compiler/optimizer/InlinerTempForJ9.cpp b/runtime/compiler/optimizer/InlinerTempForJ9.cpp index 34f0db35b3e..28d84b43fdf 100644 --- a/runtime/compiler/optimizer/InlinerTempForJ9.cpp +++ b/runtime/compiler/optimizer/InlinerTempForJ9.cpp @@ -5431,6 +5431,9 @@ TR_J9InlinerPolicy::supressInliningRecognizedInitialCallee(TR_CallSite* callsite // DAA Packed Decimal check method case TR::com_ibm_dataaccess_PackedDecimal_checkPackedDecimal_: + // DAA External Decimal check method + case TR::com_ibm_dataaccess_ExternalDecimal_checkExternalDecimal_: + // DAA Packed Decimal <-> Integer case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToInteger_: case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToInteger_ByteBuffer_: diff --git a/runtime/compiler/optimizer/J9LocalCSE.cpp b/runtime/compiler/optimizer/J9LocalCSE.cpp index e10528f86cf..0777419cc66 100644 --- a/runtime/compiler/optimizer/J9LocalCSE.cpp +++ b/runtime/compiler/optimizer/J9LocalCSE.cpp @@ -117,6 +117,7 @@ J9::LocalCSE::shouldCommonNode(TR::Node *parent, TR::Node *node) case TR::com_ibm_dataaccess_PackedDecimal_equalsPackedDecimal_: case TR::com_ibm_dataaccess_PackedDecimal_notEqualsPackedDecimal_: case TR::com_ibm_dataaccess_PackedDecimal_checkPackedDecimal_: + case TR::com_ibm_dataaccess_ExternalDecimal_checkExternalDecimal_: case TR::com_ibm_dataaccess_DecimalData_convertExternalDecimalToPackedDecimal_: case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToExternalDecimal_: case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToUnicodeDecimal_: diff --git a/runtime/compiler/optimizer/J9SimplifierTable.enum b/runtime/compiler/optimizer/J9SimplifierTable.enum index dcebe11957d..cb530ede7d8 100644 --- a/runtime/compiler/optimizer/J9SimplifierTable.enum +++ b/runtime/compiler/optimizer/J9SimplifierTable.enum @@ -118,6 +118,7 @@ #define pdModifyPrecisionSimplifierHandler pdshlSimplifier #define countDigitsSimplifierHandler dftSimplifier #define BCDCHKSimplifierHandler dftSimplifier +#define zdchkSimplifierHandler dftSimplifier #include "optimizer/OMRSimplifierTable.enum" diff --git a/runtime/compiler/runtime/SignalHandler.c b/runtime/compiler/runtime/SignalHandler.c index cdcdab5fe7f..4507febccf1 100644 --- a/runtime/compiler/runtime/SignalHandler.c +++ b/runtime/compiler/runtime/SignalHandler.c @@ -564,11 +564,11 @@ UDATA jitPPCHandler(J9VMThread* vmThread, U_32 sigType, void* sigInfo) *iarPtr = (UDATA) ((void *) &jitHandleInternalErrorTrap); #endif return J9PORT_SIG_EXCEPTION_CONTINUE_EXECUTION; - + } else if (J9PORT_SIG_FLAG_SIGTRAP == sigType) { IDATA trapType = jitPPCIdentifyCodeCacheTrapType((U_8 *) *iarPtr); - + switch (trapType) { case TRAP_TYPE_NULL_CHECK: @@ -937,8 +937,19 @@ UDATA restoreSystemStackPointerState(J9VMThread* vmThread, U_32 sigType, void* s return J9PORT_SIG_EXCEPTION_CONTINUE_EXECUTION; } +#ifdef EMULATE_ZNEXT +extern int jitS390Emulation(J9VMThread* vmThread, void* sigInfo); +#endif + UDATA jit390Handler(J9VMThread* vmThread, U_32 sigType, void* sigInfo) { +#ifdef EMULATE_ZNEXT + if (J9PORT_SIG_FLAG_SIGILL == sigType && jitS390Emulation(vmThread, sigInfo) == 0) + { + return J9PORT_SIG_EXCEPTION_CONTINUE_EXECUTION; + } +#endif + PORT_ACCESS_FROM_VMC(vmThread); J9JITConfig *jitConfig = vmThread->javaVM->jitConfig; @@ -1251,13 +1262,13 @@ UDATA jit390Handler(J9VMThread* vmThread, U_32 sigType, void* sigInfo) /* add one to *controlPC for symmetry with IA32, handler check subs one */ jit390SetTrapHandler(controlPC, entryPointRegister, (void *) &jitHandleNullPointerExceptionTrap); return restoreSystemStackPointerState(vmThread, sigType, sigInfo); - + case TRAP_TYPE_INTERNAL_ERROR: vmThread->jitException = (J9Object *) (controlPCValue + 1); /* add one to *controlPC for symmetry with IA32, handler check subs one */ jit390SetTrapHandler(controlPC, entryPointRegister, (void *) &jitHandleInternalErrorTrap); return restoreSystemStackPointerState(vmThread, sigType, sigInfo); - + case TRAP_TYPE_ARRAY_BOUNDS: vmThread->jitException = (J9Object *) (controlPCValue + 1); /* add one to *controlPC for symmetry with IA32, handler check subs one */ diff --git a/runtime/compiler/z/codegen/J9BCDTreeEvaluator.cpp b/runtime/compiler/z/codegen/J9BCDTreeEvaluator.cpp index aef18277f89..8c8393cbe7f 100644 --- a/runtime/compiler/z/codegen/J9BCDTreeEvaluator.cpp +++ b/runtime/compiler/z/codegen/J9BCDTreeEvaluator.cpp @@ -5138,6 +5138,115 @@ J9::Z::TreeEvaluator::pdchkEvaluator(TR::Node *node, TR::CodeGenerator *cg) return chkResultReg; } +TR::Register * +J9::Z::TreeEvaluator::zdchkEvaluator(TR::Node *node, TR::CodeGenerator *cg) + { + cg->traceBCDEntry("zdchk",node); + TR::Compilation *comp = cg->comp(); + TR::Register *chkResultReg = cg->allocateRegister(TR_GPR); + generateRRInstruction(cg, TR::InstOpCode::getXORRegOpCode(), node, chkResultReg, chkResultReg); + + TR::Node *child = node->getFirstChild(); + int32_t precision = child->getDecimalPrecision(); + TR_ASSERT_FATAL_WITH_NODE(node, precision > 0 && precision < 32, + "External decimal precision was expected to be greater than 0 and less than 32, but was %d.\n", + precision); + + int32_t bytesWithSpacesConst = node->getSecondChild()->getInt(); + TR_ASSERT_FATAL_WITH_NODE(node, bytesWithSpacesConst >= 0 && bytesWithSpacesConst < 32, + "Space bytes count(%d) for external decimal was outside expected range [0-31].\n", + bytesWithSpacesConst); + + TR::DataType dataType = child->getDataType(); + // Safe to downcast here because decimalLength can at most be precision + 1 + int8_t decimalLength = static_cast(TR::DataType::getSizeFromBCDPrecision(dataType, precision)); + TR_ASSERT_FATAL_WITH_NODE(node, decimalLength > 0 && decimalLength <= (static_cast(precision) + 1), + "External decimal length was expected to be greater than 0 and less than equal to precision + 1, " + "but decimal length was %d and precision was %d.\n", + decimalLength, precision); + TR_PseudoRegister *sourceReg = cg->evaluateBCDNode(child); + sourceReg = cg->privatizeBCDRegisterIfNeeded(node, child, sourceReg); + TR::MemoryReference *sourceMR = generateS390LeftAlignedMemoryReference(child, sourceReg->getStorageReference(), cg, decimalLength); + + TR::Register *vZondedLowReg = cg->allocateRegister(TR_VRF); + TR::Register *vZondedHighReg = cg->allocateRegister(TR_VRF); + + /** + * I3: / SSC LS DSC STC DC + * 0 0 0 00000 000 00000 + * - Separate-Sign Control (SSC) : 1 if sign is separate, otherwise 0 for embedded sign. + * - Leading Sign (LS) : 1 if sign is leading, otherwise 0 for trailing sign. + * - Disallowed-Spaces Count (DSC): DC is total number of digits, DSC represents number of digits + * with zone and digit format, rest can have space. + * Only relevant when SSC is 0. + * - Sign-Test Control (STC) : Specifies which codes are considered as valid sign codes. + * 110 (C,D,F) for embedded sign (will return false for non-preferred sign codes). + * 010 (4e,60) for sign separate (leading/trailing). + * 000 all sign codes are considered valid (hex) (A-F). + * - Digits Count (DC) : Integer specifying number of bytes to be verified. Does not include sign byte. + */ + #define I3_SSC_SEPARATE 0x1 + #define I3_STC_SIGN_SEPARATE 0x2 + #define I3_LS_LEADING 0x1 + #define I3_STC_EMBEDDED_SIGN 0x6 + + uint16_t zonedDecimalInfo = static_cast(precision); // I3 operand + int8_t dsc = static_cast(precision - bytesWithSpacesConst); + + if (dataType == TR::ZonedDecimalSignTrailingSeparate) // Sign trailing separate + { + zonedDecimalInfo |= ((I3_SSC_SEPARATE << 14) | (I3_STC_SIGN_SEPARATE << 5)); + } + else if (dataType == TR::ZonedDecimalSignLeadingEmbedded) // Sign leading embedded + { + zonedDecimalInfo |= ((I3_LS_LEADING << 13) | (dsc << 8)); + } + else if (dataType == TR::ZonedDecimalSignLeadingSeparate) // Sign leading separate + { + zonedDecimalInfo |= ((I3_SSC_SEPARATE << 14) | (I3_LS_LEADING << 13) | (I3_STC_SIGN_SEPARATE << 5)); + } + else if (dataType == TR::ZonedDecimal) // Sign embedded trailing + { + zonedDecimalInfo |= (dsc << 8); + } + + // Must use decimalLength because precision does not equal length when sign is separate + int8_t firstByteIndexToLoad = decimalLength - 1; + TR::MemoryReference *zonedDecimalLowMR = generateS390MemoryReference(*sourceMR, 0, cg); + TR::MemoryReference *zonedDecimalHighMR = NULL; + if (decimalLength > TR_VECTOR_REGISTER_SIZE) + { + zonedDecimalHighMR = zonedDecimalLowMR; + firstByteIndexToLoad = firstByteIndexToLoad - TR_VECTOR_REGISTER_SIZE; + generateVSIInstruction(cg, TR::InstOpCode::VLRL, node, vZondedHighReg, zonedDecimalHighMR, firstByteIndexToLoad); + + zonedDecimalLowMR = generateS390MemoryReference(*sourceMR, decimalLength - TR_VECTOR_REGISTER_SIZE, cg); + firstByteIndexToLoad = TR_VECTOR_REGISTER_SIZE - 1; + } + + generateVSIInstruction(cg, TR::InstOpCode::VLRL, node, vZondedLowReg, zonedDecimalLowMR, firstByteIndexToLoad); + generateVRIlInstruction(cg, TR::InstOpCode::VTZ, node, vZondedHighReg, vZondedLowReg, zonedDecimalInfo); + generateRRInstruction(cg, TR::InstOpCode::IPM, node, chkResultReg, chkResultReg); + if(cg->comp()->target().is64Bit()) + { + generateRRInstruction(cg, TR::InstOpCode::LLGTR, node, chkResultReg, chkResultReg); + generateRSInstruction(cg, TR::InstOpCode::SRLG, node, chkResultReg, chkResultReg, 28); + } + else + { + generateRSInstruction(cg, TR::InstOpCode::SRL, node, chkResultReg, 28); + } + + if (vZondedLowReg) cg->stopUsingRegister(vZondedLowReg); + if (vZondedHighReg) cg->stopUsingRegister(vZondedHighReg); + + node->setRegister(chkResultReg); + cg->decReferenceCount(child); + cg->decReferenceCount(node->getSecondChild()); + cg->traceBCDExit("zdchk", node); + return chkResultReg; + } + /** * pdEvaluator - various binary packed decimal evaluators */ diff --git a/runtime/compiler/z/codegen/J9CodeGenerator.cpp b/runtime/compiler/z/codegen/J9CodeGenerator.cpp index 02b36d78393..5113d162ccf 100644 --- a/runtime/compiler/z/codegen/J9CodeGenerator.cpp +++ b/runtime/compiler/z/codegen/J9CodeGenerator.cpp @@ -3761,6 +3761,20 @@ J9::Z::CodeGenerator::suppressInliningOfRecognizedMethod(TR::RecognizedMethod me return true; } + static bool disableZNextCompressExpand = feGetEnv("TR_DisableZNextCompressExpand") != NULL; + if (!disableZNextCompressExpand && + (self()->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_MISCELLANEOUS_INSTRUCTION_EXTENSION_4) || + TR::InstOpCode(TR::InstOpCode::BEXTG).canEmulate() && TR::InstOpCode(TR::InstOpCode::BDEPG).canEmulate())) + { + if (method == TR::java_lang_Integer_compress || + method == TR::java_lang_Integer_expand || + method == TR::java_lang_Long_compress || + method == TR::java_lang_Long_expand) + { + return true; + } + } + if (method == TR::java_util_concurrent_atomic_AtomicBoolean_getAndSet || method == TR::java_util_concurrent_atomic_AtomicInteger_getAndAdd || method == TR::java_util_concurrent_atomic_AtomicInteger_getAndIncrement || @@ -4143,6 +4157,30 @@ J9::Z::CodeGenerator::inlineDirectCall( break; } + static bool disableZNextCompressExpand = feGetEnv("TR_DisableZNextCompressExpand") != NULL; + if (!disableZNextCompressExpand && + (self()->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_MISCELLANEOUS_INSTRUCTION_EXTENSION_4) || + TR::InstOpCode(TR::InstOpCode::BEXTG).canEmulate() && TR::InstOpCode(TR::InstOpCode::BDEPG).canEmulate())) + { + switch (methodSymbol->getRecognizedMethod()) + { + case TR::java_lang_Integer_compress: + resultReg = TR::TreeEvaluator::inlineBitCompress(node, cg, false); + return true; + case TR::java_lang_Integer_expand: + resultReg = TR::TreeEvaluator::inlineBitExpand(node, cg, false); + return true; + case TR::java_lang_Long_compress: + resultReg = TR::TreeEvaluator::inlineBitCompress(node, cg, true); + return true; + case TR::java_lang_Long_expand: + resultReg = TR::TreeEvaluator::inlineBitExpand(node, cg, true); + return true; + default: + break; + } + } + #ifdef J9VM_OPT_JAVA_CRYPTO_ACCELERATION if (self()->inlineCryptoMethod(node, resultReg)) { diff --git a/runtime/compiler/z/codegen/J9TreeEvaluator.hpp b/runtime/compiler/z/codegen/J9TreeEvaluator.hpp index 5b26c58a25b..56556facb16 100644 --- a/runtime/compiler/z/codegen/J9TreeEvaluator.hpp +++ b/runtime/compiler/z/codegen/J9TreeEvaluator.hpp @@ -470,6 +470,7 @@ class OMR_EXTENSIBLE TreeEvaluator: public J9::TreeEvaluator bool ignoreDecimalOverflow = false); static TR::Register *pdchkEvaluator(TR::Node *node, TR::CodeGenerator *cg); + static TR::Register *zdchkEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *pdcmpeqEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *pdcmpneEvaluator(TR::Node *node, TR::CodeGenerator *cg); diff --git a/runtime/compiler/z/env/J9CPU.cpp b/runtime/compiler/z/env/J9CPU.cpp index 95cee671bdb..5f1df25c99e 100644 --- a/runtime/compiler/z/env/J9CPU.cpp +++ b/runtime/compiler/z/env/J9CPU.cpp @@ -113,6 +113,14 @@ J9::Z::CPU::customize(OMRProcessorDesc processorDescription) omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL_ENHANCEMENT_FACILITY_2, FALSE); } + if (processorDescription.processor < OMR_PROCESSOR_S390_ZNEXT) + { + omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_S390_MISCELLANEOUS_INSTRUCTION_EXTENSION_3, FALSE); + omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_S390_VECTOR_FACILITY_ENHANCEMENT_3, FALSE); + omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_S390_PLO_EXTENSION, FALSE); + omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL_ENHANCEMENT_FACILITY_3, FALSE); + } + // This variable is used internally by the j9sysinfo macros below and cannot be folded away J9PortLibrary* privatePortLibrary = TR::Compiler->portLib; @@ -160,7 +168,11 @@ J9::Z::CPU::enableFeatureMasks() OMR_FEATURE_S390_VECTOR_FACILITY_ENHANCEMENT_2, OMR_FEATURE_S390_MISCELLANEOUS_INSTRUCTION_EXTENSION_3, OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL_ENHANCEMENT_FACILITY, - OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL_ENHANCEMENT_FACILITY_2}; + OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL_ENHANCEMENT_FACILITY_2, + OMR_FEATURE_S390_MISCELLANEOUS_INSTRUCTION_EXTENSION_4, + OMR_FEATURE_S390_VECTOR_FACILITY_ENHANCEMENT_3, + OMR_FEATURE_S390_PLO_EXTENSION, + OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL_ENHANCEMENT_FACILITY_3}; memset(_supportedFeatureMasks.features, 0, OMRPORT_SYSINFO_FEATURES_SIZE*sizeof(uint32_t)); OMRPORT_ACCESS_FROM_OMRPORT(TR::Compiler->omrPortLib); diff --git a/runtime/compiler/z/runtime/CMakeLists.txt b/runtime/compiler/z/runtime/CMakeLists.txt index a9eb8b60cea..46ad5102094 100644 --- a/runtime/compiler/z/runtime/CMakeLists.txt +++ b/runtime/compiler/z/runtime/CMakeLists.txt @@ -65,3 +65,9 @@ j9jit_files( ${CMAKE_CURRENT_BINARY_DIR}/Recompilation.s ${CMAKE_CURRENT_BINARY_DIR}/ValueProf.s ) + +if(J9VM_JIT_EMULATE_ZNEXT) + j9jit_files( + z/runtime/Emulation.cpp + ) +endif() diff --git a/runtime/compiler/z/runtime/Emulation.cpp b/runtime/compiler/z/runtime/Emulation.cpp new file mode 100644 index 00000000000..a25fc14fc0c --- /dev/null +++ b/runtime/compiler/z/runtime/Emulation.cpp @@ -0,0 +1,256 @@ +/******************************************************************************* + * Copyright IBM Corp. and others 2023 + * + * This program and the accompanying materials are made available under + * the terms of the Eclipse Public License 2.0 which accompanies this + * distribution and is available at https://www.eclipse.org/legal/epl-2.0/ + * or the Apache License, Version 2.0 which accompanies this distribution and + * is available at https://www.apache.org/licenses/LICENSE-2.0. + * + * This Source Code may also be made available under the following + * Secondary Licenses when the conditions for such availability set + * forth in the Eclipse Public License, v. 2.0 are satisfied: GNU + * General Public License, version 2 with the GNU Classpath + * Exception [1] and GNU General Public License, version 2 with the + * OpenJDK Assembly Exception [2]. + * + * [1] https://www.gnu.org/software/classpath/license.html + * [2] https://openjdk.org/legal/assembly-exception.html + * + * SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0-only WITH Classpath-exception-2.0 OR GPL-2.0-only WITH OpenJDK-assembly-exception-1.0 + *******************************************************************************/ + +#include +#include + +#include "j9protos.h" +#include "j9cfg.h" + +#ifdef LINUX + +#include "../omr/port/linuxs390/omrsignal_context.h" + +#include "infra/Bit.hpp" + +class InstEmulator + { + public: + static InstEmulator *decode(uint8_t *pc); + virtual void emulate(mcontext_t *cpu) {} + }; + +class LXAEmulator : public InstEmulator + { + private: + uint8_t r1, x2, b2; + uint32_t dx2; + uint8_t shift; + bool isLogical; + public: + LXAEmulator(uint8_t *start); + virtual void emulate(mcontext_t *cpu); + }; + +LXAEmulator::LXAEmulator(uint8_t *start) + { + r1 = (start[1] & 0xF0) >> 4; + x2 = start[1] & 0x0F; + b2 = (start[2] & 0xF0) >> 4; + uint32_t dxl2 = (uint32_t) ((*(uint16_t *)(start+2)) & 0x0FFF); + uint32_t dxh2 = start[4]; + dx2 = dxl2 | (dxh2 << 12); + shift = (start[5] & 0xE) >> 1; + isLogical = (start[5] & 1) == 1; + } + +void LXAEmulator::emulate(mcontext_t *cpu) + { + int64_t addr; + int32_t tmp = (int32_t) dx2; + + // sign extend immediate + if (tmp & 0x80000) + { + tmp |= 0xFFF00000; + } + + if (x2 != 0) + { + tmp += cpu->gregs[x2]; + } + + if (isLogical) + { + addr = (int64_t)(uint32_t) tmp; + } + else + { + addr = (int64_t) tmp; + } + + addr <<= shift; + if (b2 != 0) + { + addr += cpu->gregs[b2]; + } + cpu->gregs[r1] = addr; + } + +class BDEPGEmulator : public InstEmulator + { + private: + uint8_t r1, r2, r3; + public: + BDEPGEmulator(uint8_t *start); + virtual void emulate(mcontext_t *cpu); + }; + +BDEPGEmulator::BDEPGEmulator(uint8_t *start) + { + r1 = (start[3] & 0xF0) >> 4; + r2 = start[3] & 0x0F; + r3 = (start[2] & 0xF0) >> 4; + } + +void BDEPGEmulator::emulate(mcontext_t *cpu) + { + uint64_t val = cpu->gregs[r2]; + uint64_t mask = cpu->gregs[r3]; + uint64_t res = 0; + + for (int n = 0; mask; n++) + { + if (mask & (1ULL << 63)) + { + res |= (val & (1ULL << 63)) >> n; + val <<= 1; + } + mask <<= 1; + } + + cpu->gregs[r1] = res; + } + +class BEXTGEmulator : public InstEmulator + { + private: + uint8_t r1, r2, r3; + public: + BEXTGEmulator(uint8_t *start); + virtual void emulate(mcontext_t *cpu); + }; + +BEXTGEmulator::BEXTGEmulator(uint8_t *start) + { + r1 = (start[3] & 0xF0) >> 4; + r2 = start[3] & 0x0F; + r3 = (start[2] & 0xF0) >> 4; + } + +void BEXTGEmulator::emulate(mcontext_t *cpu) + { + uint64_t val = cpu->gregs[r2]; + uint64_t mask = cpu->gregs[r3]; + uint64_t res = 0; + + for (int k = 0; mask; mask <<= 1, val <<= 1) + { + if (mask & (1ULL << 63)) + { + res |= (val & (1ULL << 63)) >> k; + k++; + } + } + + cpu->gregs[r1] = res; + } + +class CLZGEmulator : public InstEmulator + { + private: + uint8_t r1, r2; + public: + CLZGEmulator(uint8_t *start); + virtual void emulate(mcontext_t *cpu); + }; + +CLZGEmulator::CLZGEmulator(uint8_t *start) + { + r1 = (start[3] & 0xF0) >> 4; + r2 = start[3] & 0x0F; + } + +void CLZGEmulator::emulate(mcontext_t *cpu) + { + cpu->gregs[r1] = (uint64_t)leadingZeroes(cpu->gregs[r2]); + } + +class CTZGEmulator : public InstEmulator + { + private: + uint8_t r1, r2; + public: + CTZGEmulator(uint8_t *start); + virtual void emulate(mcontext_t *cpu); + }; + +CTZGEmulator::CTZGEmulator(uint8_t *start) + { + r1 = (start[3] & 0xF0) >> 4; + r2 = start[3] & 0x0F; + } + +void CTZGEmulator::emulate(mcontext_t *cpu) + { + cpu->gregs[r1] = (uint64_t)trailingZeroes(cpu->gregs[r2]); + } + +InstEmulator *InstEmulator::decode(uint8_t *pc) + { + // Checking the optcode in the first byte and last byte of the instruction to + // see if it is LXA/LLXA instructions (Op-codes '0xE360' to '0xE369') + if ((pc[-6] == 0xE3) && ((pc[-1] & 0xF0) == 0x60) && ((pc[-1] & 0x0F) < 10)) + { + return new LXAEmulator(pc-6); + } + else if (*(uint16_t*)(pc-4) == 0xB96D) + { + return new BDEPGEmulator(pc-4); + } + else if (*(uint16_t*)(pc-4) == 0xB96C) + { + return new BEXTGEmulator(pc-4); + } + else if (*(uint16_t*)(pc-4) == 0xB968) + { + return new CLZGEmulator(pc-4); + } + else if (*(uint16_t*)(pc-4) == 0xB969) + { + return new CTZGEmulator(pc-4); + } + + return NULL; + } + +#endif /* LINUX */ + +extern "C" +int jitS390Emulation(J9VMThread* vmThread, void* sigInfo) + { +#ifdef LINUX + OMRUnixSignalInfo *unixSigInfo = (OMRUnixSignalInfo*) sigInfo; + + uint8_t *pc = (uint8_t*) unixSigInfo->platformSignalInfo.context->uc_mcontext.psw.addr; + + InstEmulator *inst = InstEmulator::decode(pc); + if (inst != NULL) + { + inst->emulate(&unixSigInfo->platformSignalInfo.context->uc_mcontext); + delete inst; + return 0; + } +#endif /* LINUX */ + + return -1; + }