Skip to content

Commit b930ce2

Browse files
committed
[GR-60402] Backport to 24.2: [JDK-8346653] Add vzeroupper upon the entrance of AMD64 sha1 and sha256 stubs.
PullRequest: graal/19685
2 parents 5046f15 + 7eb97e5 commit b930ce2

File tree

2 files changed

+106
-40
lines changed

2 files changed

+106
-40
lines changed

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/AMD64SHA1Op.java

+53-19
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -24,8 +24,16 @@
2424
*/
2525
package jdk.graal.compiler.lir.amd64;
2626

27+
import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.pointerConstant;
28+
import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.recordExternalAddress;
2729
import static jdk.vm.ci.amd64.AMD64.xmm0;
2830
import static jdk.vm.ci.amd64.AMD64.xmm1;
31+
import static jdk.vm.ci.amd64.AMD64.xmm10;
32+
import static jdk.vm.ci.amd64.AMD64.xmm11;
33+
import static jdk.vm.ci.amd64.AMD64.xmm12;
34+
import static jdk.vm.ci.amd64.AMD64.xmm13;
35+
import static jdk.vm.ci.amd64.AMD64.xmm14;
36+
import static jdk.vm.ci.amd64.AMD64.xmm15;
2937
import static jdk.vm.ci.amd64.AMD64.xmm2;
3038
import static jdk.vm.ci.amd64.AMD64.xmm3;
3139
import static jdk.vm.ci.amd64.AMD64.xmm4;
@@ -35,20 +43,18 @@
3543
import static jdk.vm.ci.amd64.AMD64.xmm8;
3644
import static jdk.vm.ci.amd64.AMD64.xmm9;
3745
import static jdk.vm.ci.code.ValueUtil.asRegister;
38-
import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.pointerConstant;
39-
import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.recordExternalAddress;
4046

4147
import jdk.graal.compiler.asm.Label;
4248
import jdk.graal.compiler.asm.amd64.AMD64Address;
4349
import jdk.graal.compiler.asm.amd64.AMD64Assembler.ConditionFlag;
4450
import jdk.graal.compiler.asm.amd64.AMD64MacroAssembler;
51+
import jdk.graal.compiler.core.amd64.AMD64LIRGenerator;
4552
import jdk.graal.compiler.debug.GraalError;
4653
import jdk.graal.compiler.lir.LIRInstructionClass;
4754
import jdk.graal.compiler.lir.SyncPort;
4855
import jdk.graal.compiler.lir.asm.ArrayDataPointerConstant;
4956
import jdk.graal.compiler.lir.asm.CompilationResultBuilder;
50-
import jdk.graal.compiler.lir.gen.LIRGeneratorTool;
51-
57+
import jdk.vm.ci.amd64.AMD64.CPUFeature;
5258
import jdk.vm.ci.amd64.AMD64Kind;
5359
import jdk.vm.ci.code.Register;
5460
import jdk.vm.ci.meta.AllocatableValue;
@@ -76,11 +82,11 @@ public final class AMD64SHA1Op extends AMD64LIRInstruction {
7682
@Temp({OperandFlag.REG}) private Value[] temps;
7783
private final boolean multiBlock;
7884

79-
public AMD64SHA1Op(LIRGeneratorTool tool, AllocatableValue bufValue, AllocatableValue stateValue) {
85+
public AMD64SHA1Op(AMD64LIRGenerator tool, AllocatableValue bufValue, AllocatableValue stateValue) {
8086
this(tool, bufValue, stateValue, Value.ILLEGAL, Value.ILLEGAL, Value.ILLEGAL, false);
8187
}
8288

83-
public AMD64SHA1Op(LIRGeneratorTool tool, AllocatableValue bufValue, AllocatableValue stateValue, AllocatableValue ofsValue,
89+
public AMD64SHA1Op(AMD64LIRGenerator tool, AllocatableValue bufValue, AllocatableValue stateValue, AllocatableValue ofsValue,
8490
AllocatableValue limitValue, AllocatableValue resultValue, boolean multiBlock) {
8591
super(TYPE);
8692

@@ -92,18 +98,40 @@ public AMD64SHA1Op(LIRGeneratorTool tool, AllocatableValue bufValue, Allocatable
9298

9399
this.multiBlock = multiBlock;
94100

95-
this.temps = new Value[]{
96-
xmm0.asValue(),
97-
xmm1.asValue(),
98-
xmm2.asValue(),
99-
xmm3.asValue(),
100-
xmm4.asValue(),
101-
xmm5.asValue(),
102-
xmm6.asValue(),
103-
xmm7.asValue(),
104-
xmm8.asValue(),
105-
xmm9.asValue(),
106-
};
101+
if (tool.supportsCPUFeature(CPUFeature.AVX)) {
102+
// vzeroupper clears upper bits of xmm0-xmm15
103+
this.temps = new Value[]{
104+
xmm0.asValue(),
105+
xmm1.asValue(),
106+
xmm2.asValue(),
107+
xmm3.asValue(),
108+
xmm4.asValue(),
109+
xmm5.asValue(),
110+
xmm6.asValue(),
111+
xmm7.asValue(),
112+
xmm8.asValue(),
113+
xmm9.asValue(),
114+
xmm10.asValue(),
115+
xmm11.asValue(),
116+
xmm12.asValue(),
117+
xmm13.asValue(),
118+
xmm14.asValue(),
119+
xmm15.asValue(),
120+
};
121+
} else {
122+
this.temps = new Value[]{
123+
xmm0.asValue(),
124+
xmm1.asValue(),
125+
xmm2.asValue(),
126+
xmm3.asValue(),
127+
xmm4.asValue(),
128+
xmm5.asValue(),
129+
xmm6.asValue(),
130+
xmm7.asValue(),
131+
xmm8.asValue(),
132+
xmm9.asValue(),
133+
};
134+
}
107135

108136
if (multiBlock) {
109137
this.bufTempValue = tool.newVariable(bufValue.getValueKind());
@@ -168,6 +196,12 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
168196
Label labelDoneHash = new Label();
169197
Label labelLoop0 = new Label();
170198

199+
if (masm.supports(CPUFeature.AVX)) {
200+
// Insert vzeroupper here to avoid performance penalty of SSE-AVX transition between
201+
// previously executed AVX instructions and the following SHA-1 instructions.
202+
masm.vzeroupper();
203+
}
204+
171205
masm.movdqu(abcd, new AMD64Address(state, 0));
172206
masm.pinsrd(e0, new AMD64Address(state, 16), 3);
173207
masm.movdqu(shufMask, recordExternalAddress(crb, upperWordMask));

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/AMD64SHA256Op.java

+53-21
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -24,9 +24,17 @@
2424
*/
2525
package jdk.graal.compiler.lir.amd64;
2626

27+
import static jdk.graal.compiler.asm.amd64.AMD64Assembler.ConditionFlag.BelowEqual;
28+
import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.pointerConstant;
29+
import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.recordExternalAddress;
2730
import static jdk.vm.ci.amd64.AMD64.xmm0;
2831
import static jdk.vm.ci.amd64.AMD64.xmm1;
2932
import static jdk.vm.ci.amd64.AMD64.xmm10;
33+
import static jdk.vm.ci.amd64.AMD64.xmm11;
34+
import static jdk.vm.ci.amd64.AMD64.xmm12;
35+
import static jdk.vm.ci.amd64.AMD64.xmm13;
36+
import static jdk.vm.ci.amd64.AMD64.xmm14;
37+
import static jdk.vm.ci.amd64.AMD64.xmm15;
3038
import static jdk.vm.ci.amd64.AMD64.xmm2;
3139
import static jdk.vm.ci.amd64.AMD64.xmm3;
3240
import static jdk.vm.ci.amd64.AMD64.xmm4;
@@ -36,20 +44,17 @@
3644
import static jdk.vm.ci.amd64.AMD64.xmm8;
3745
import static jdk.vm.ci.amd64.AMD64.xmm9;
3846
import static jdk.vm.ci.code.ValueUtil.asRegister;
39-
import static jdk.graal.compiler.asm.amd64.AMD64Assembler.ConditionFlag.BelowEqual;
40-
import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.pointerConstant;
41-
import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.recordExternalAddress;
4247

4348
import jdk.graal.compiler.asm.Label;
4449
import jdk.graal.compiler.asm.amd64.AMD64Address;
4550
import jdk.graal.compiler.asm.amd64.AMD64MacroAssembler;
51+
import jdk.graal.compiler.core.amd64.AMD64LIRGenerator;
4652
import jdk.graal.compiler.debug.GraalError;
4753
import jdk.graal.compiler.lir.LIRInstructionClass;
4854
import jdk.graal.compiler.lir.SyncPort;
4955
import jdk.graal.compiler.lir.asm.ArrayDataPointerConstant;
5056
import jdk.graal.compiler.lir.asm.CompilationResultBuilder;
51-
import jdk.graal.compiler.lir.gen.LIRGeneratorTool;
52-
57+
import jdk.vm.ci.amd64.AMD64.CPUFeature;
5358
import jdk.vm.ci.amd64.AMD64Kind;
5459
import jdk.vm.ci.code.Register;
5560
import jdk.vm.ci.meta.AllocatableValue;
@@ -79,11 +84,11 @@ public final class AMD64SHA256Op extends AMD64LIRInstruction {
7984

8085
private final boolean multiBlock;
8186

82-
public AMD64SHA256Op(LIRGeneratorTool tool, AllocatableValue bufValue, AllocatableValue stateValue) {
87+
public AMD64SHA256Op(AMD64LIRGenerator tool, AllocatableValue bufValue, AllocatableValue stateValue) {
8388
this(tool, bufValue, stateValue, Value.ILLEGAL, Value.ILLEGAL, Value.ILLEGAL, false);
8489
}
8590

86-
public AMD64SHA256Op(LIRGeneratorTool tool, AllocatableValue bufValue, AllocatableValue stateValue, AllocatableValue ofsValue,
91+
public AMD64SHA256Op(AMD64LIRGenerator tool, AllocatableValue bufValue, AllocatableValue stateValue, AllocatableValue ofsValue,
8792
AllocatableValue limitValue, AllocatableValue resultValue, boolean multiBlock) {
8893
super(TYPE);
8994

@@ -97,19 +102,40 @@ public AMD64SHA256Op(LIRGeneratorTool tool, AllocatableValue bufValue, Allocatab
97102

98103
this.keyTempValue = tool.newVariable(bufValue.getValueKind());
99104

100-
this.temps = new Value[]{
101-
xmm0.asValue(),
102-
xmm1.asValue(),
103-
xmm2.asValue(),
104-
xmm3.asValue(),
105-
xmm4.asValue(),
106-
xmm5.asValue(),
107-
xmm6.asValue(),
108-
xmm7.asValue(),
109-
xmm8.asValue(),
110-
xmm9.asValue(),
111-
xmm10.asValue(),
112-
};
105+
if (tool.supportsCPUFeature(CPUFeature.AVX)) {
106+
// vzeroupper clears upper bits of xmm0-xmm15
107+
this.temps = new Value[]{
108+
xmm0.asValue(),
109+
xmm1.asValue(),
110+
xmm2.asValue(),
111+
xmm3.asValue(),
112+
xmm4.asValue(),
113+
xmm5.asValue(),
114+
xmm6.asValue(),
115+
xmm7.asValue(),
116+
xmm8.asValue(),
117+
xmm9.asValue(),
118+
xmm10.asValue(),
119+
xmm11.asValue(),
120+
xmm12.asValue(),
121+
xmm13.asValue(),
122+
xmm14.asValue(),
123+
xmm15.asValue(),
124+
};
125+
} else {
126+
this.temps = new Value[]{
127+
xmm0.asValue(),
128+
xmm1.asValue(),
129+
xmm2.asValue(),
130+
xmm3.asValue(),
131+
xmm4.asValue(),
132+
xmm5.asValue(),
133+
xmm6.asValue(),
134+
xmm7.asValue(),
135+
xmm8.asValue(),
136+
xmm9.asValue(),
137+
};
138+
}
113139

114140
if (multiBlock) {
115141
this.bufTempValue = tool.newVariable(bufValue.getValueKind());
@@ -199,6 +225,12 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
199225
// keyTemp replaces the hardcoded rax in the original stub.
200226
Register keyTemp = asRegister(keyTempValue);
201227

228+
if (masm.supports(CPUFeature.AVX)) {
229+
// Insert vzeroupper here to avoid performance penalty of SSE-AVX transition between
230+
// previously executed AVX instructions and the following SHA-256 instructions.
231+
masm.vzeroupper();
232+
}
233+
202234
masm.movdqu(state0, new AMD64Address(state, 0));
203235
masm.movdqu(state1, new AMD64Address(state, 16));
204236

0 commit comments

Comments
 (0)