Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GR-60516] Implement Vector API rearrange operation #10376

Merged
merged 1 commit into from
Dec 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2327,7 +2327,11 @@ public static class VexRVMOp extends VexOp {
public static final VexRVMOp VSQRTSD = new VexRVMOp("VSQRTSD", VEXPrefixConfig.P_F2, VEXPrefixConfig.M_0F, VEXPrefixConfig.WIG, 0x51, VEXOpAssertion.AVX1_AVX512F_128, EVEXTuple.T1S_64BIT, VEXPrefixConfig.W1);
public static final VexRVMOp VSQRTSS = new VexRVMOp("VSQRTSS", VEXPrefixConfig.P_F3, VEXPrefixConfig.M_0F, VEXPrefixConfig.WIG, 0x51, VEXOpAssertion.AVX1_AVX512F_128, EVEXTuple.T1S_32BIT, VEXPrefixConfig.W0);

public static final VexRVMOp VPERMILPS = new VexRVMOp("VPERMILPS", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F38, VEXPrefixConfig.W0, 0x0C, VEXOpAssertion.AVX1_AVX512F_VL, EVEXTuple.FVM, VEXPrefixConfig.W0);
public static final VexRVMOp VPERMD = new VexRVMOp("VPERMD", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F38, VEXPrefixConfig.W0, 0x36, VEXOpAssertion.AVX2_AVX512F_VL_256_512, EVEXTuple.FVM, VEXPrefixConfig.W0);
public static final VexRVMOp VPERMPS = new VexRVMOp("VPERMPS", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F38, VEXPrefixConfig.W0, 0x16, VEXOpAssertion.AVX2_AVX512F_VL_256_512, EVEXTuple.FVM, VEXPrefixConfig.W0);
public static final VexRVMOp VPERMILPD = new VexRVMOp("VPERMILPD", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F38, VEXPrefixConfig.W0, 0x0D, VEXOpAssertion.AVX1_AVX512F_VL, EVEXTuple.FVM, VEXPrefixConfig.W1);

public static final VexRVMOp VMOVSS = new VexRVMOp("VMOVSS", VEXPrefixConfig.P_F3, VEXPrefixConfig.M_0F, VEXPrefixConfig.WIG, 0x10, VEXOpAssertion.AVX1_AVX512F_128, EVEXTuple.T1S_32BIT, VEXPrefixConfig.W0);
public static final VexRVMOp VMOVSD = new VexRVMOp("VMOVSD", VEXPrefixConfig.P_F2, VEXPrefixConfig.M_0F, VEXPrefixConfig.WIG, 0x10, VEXOpAssertion.AVX1_AVX512F_128, EVEXTuple.T1S_64BIT, VEXPrefixConfig.W1);
public static final VexRVMOp VMOVHPD = new VexRVMOp("VMOVHPD", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F, VEXPrefixConfig.WIG, 0x16, VEXOpAssertion.AVX1_AVX512F_128, EVEXTuple.T1S_64BIT, VEXPrefixConfig.W1);
Expand Down Expand Up @@ -2431,8 +2435,14 @@ public static class VexRVMOp extends VexOp {
public static final VexRVMOp EVSQRTSD = new VexRVMOp("EVSQRTSD", VSQRTSD);
public static final VexRVMOp EVSQRTSS = new VexRVMOp("EVSQRTSS", VSQRTSS);

public static final VexRVMOp EVPERMB = new VexRVMOp("EVPERMB", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F38, VEXPrefixConfig.W0, 0x8D, VEXOpAssertion.AVX512_VBMI_VL, EVEXTuple.FVM, VEXPrefixConfig.W0, true);
public static final VexRVMOp EVPERMW = new VexRVMOp("EVPERMW", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F38, VEXPrefixConfig.W1, 0x8D, VEXOpAssertion.AVX512BW_VL, EVEXTuple.FVM, VEXPrefixConfig.W1, true);
public static final VexRVMOp EVPERMILPS = new VexRVMOp("EVPERMILPS", VPERMILPS);
public static final VexRVMOp EVPERMD = new VexRVMOp("EVPERMD", VPERMD);
public static final VexRVMOp EVPERMPS = new VexRVMOp("EVPERMPS", VPERMPS);
public static final VexRVMOp EVPERMILPD = new VexRVMOp("EVPERMILPD", VPERMILPD);
public static final VexRVMOp EVPERMQ = new VexRVMOp("EVPERMQ", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F38, VEXPrefixConfig.W1, 0x36, VEXOpAssertion.AVX512F_VL_256_512, EVEXTuple.FVM, VEXPrefixConfig.W1, true);
public static final VexRVMOp EVPERMPD = new VexRVMOp("EVPERMPD", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F38, VEXPrefixConfig.W1, 0x16, VEXOpAssertion.AVX512F_VL_256_512, EVEXTuple.FVM, VEXPrefixConfig.W1, true);

public static final VexRVMOp EVPBLENDMB = new VexRVMOp("EVPBLENDMB", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F38, VEXPrefixConfig.W0, 0x66, VEXOpAssertion.AVX512BW_VL, EVEXTuple.FVM, VEXPrefixConfig.W0, true);
public static final VexRVMOp EVPBLENDMW = new VexRVMOp("EVPBLENDMW", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F38, VEXPrefixConfig.W1, 0x66, VEXOpAssertion.AVX512BW_VL, EVEXTuple.FVM, VEXPrefixConfig.W1, true);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,17 @@
import jdk.graal.compiler.asm.aarch64.AArch64ASIMDAssembler.ASIMDSize;
import jdk.graal.compiler.asm.aarch64.AArch64ASIMDAssembler.ElementSize;
import jdk.graal.compiler.asm.aarch64.AArch64MacroAssembler;
import jdk.graal.compiler.core.common.LIRKind;
import jdk.graal.compiler.debug.GraalError;
import jdk.graal.compiler.lir.asm.CompilationResultBuilder;
import jdk.graal.compiler.lir.LIRInstructionClass;
import jdk.graal.compiler.lir.Opcode;

import jdk.graal.compiler.lir.gen.LIRGeneratorTool;
import jdk.vm.ci.aarch64.AArch64Kind;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.meta.AllocatableValue;
import jdk.vm.ci.meta.Value;

/**
* This enum encapsulates AArch64 instructions which perform permutations.
Expand Down Expand Up @@ -102,4 +106,61 @@ public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) {

}
}

public static class ASIMDPermuteOp extends AArch64LIRInstruction {
private static final LIRInstructionClass<ASIMDPermuteOp> TYPE = LIRInstructionClass.create(ASIMDPermuteOp.class);

@Def protected AllocatableValue result;
@Alive protected AllocatableValue source;
@Use protected AllocatableValue indices;
@Temp({OperandFlag.REG, OperandFlag.ILLEGAL}) protected AllocatableValue xtmp1;
@Temp({OperandFlag.REG, OperandFlag.ILLEGAL}) protected AllocatableValue xtmp2;

public ASIMDPermuteOp(LIRGeneratorTool tool, AllocatableValue result, AllocatableValue source, AllocatableValue indices) {
super(TYPE);
this.result = result;
this.source = source;
this.indices = indices;
AArch64Kind eKind = ((AArch64Kind) result.getPlatformKind()).getScalar();
this.xtmp1 = eKind == AArch64Kind.BYTE ? Value.ILLEGAL : tool.newVariable(LIRKind.value(AArch64Kind.V128_BYTE));
this.xtmp2 = eKind == AArch64Kind.BYTE ? Value.ILLEGAL : tool.newVariable(LIRKind.value(AArch64Kind.V128_BYTE));
}

@Override
public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) {
AArch64Kind vKind = (AArch64Kind) result.getPlatformKind();
AArch64Kind eKind = vKind.getScalar();
ASIMDSize vSize = ASIMDSize.fromVectorKind(vKind);
Register xtmp1Reg = xtmp1.equals(Value.ILLEGAL) ? Register.None : asRegister(xtmp1);
Register xtmp2Reg = xtmp2.equals(Value.ILLEGAL) ? Register.None : asRegister(xtmp2);
Register currentIdxReg = asRegister(indices);
// Since NEON only supports byte look up, we repeatedly convert a 2W-bit look up into
// W-bit look up by transforming a 2W-bit index with value v into a pair of W-bit
// indices v * 2, v * 2 + 1 until we reach the element width equal to Byte.SIZE
if (eKind.getSizeInBytes() == AArch64Kind.QWORD.getSizeInBytes()) {
masm.neon.shlVVI(vSize, ElementSize.DoubleWord, xtmp1Reg, currentIdxReg, 1);
masm.neon.shlVVI(vSize, ElementSize.DoubleWord, xtmp2Reg, xtmp1Reg, Integer.SIZE);
masm.neon.orrVVV(vSize, xtmp1Reg, xtmp1Reg, xtmp2Reg);
masm.neon.orrVI(vSize, ElementSize.DoubleWord, xtmp1Reg, 1L << Integer.SIZE);
currentIdxReg = xtmp1Reg;
eKind = AArch64Kind.DWORD;
}
if (eKind.getSizeInBytes() == AArch64Kind.DWORD.getSizeInBytes()) {
masm.neon.shlVVI(vSize, ElementSize.Word, xtmp1Reg, currentIdxReg, 1);
masm.neon.shlVVI(vSize, ElementSize.Word, xtmp2Reg, xtmp1Reg, Short.SIZE);
masm.neon.orrVVV(vSize, xtmp1Reg, xtmp1Reg, xtmp2Reg);
masm.neon.orrVI(vSize, ElementSize.Word, xtmp1Reg, 1 << Short.SIZE);
currentIdxReg = xtmp1Reg;
eKind = AArch64Kind.WORD;
}
if (eKind.getSizeInBytes() == AArch64Kind.WORD.getSizeInBytes()) {
masm.neon.shlVVI(vSize, ElementSize.HalfWord, xtmp1Reg, currentIdxReg, 1);
masm.neon.shlVVI(vSize, ElementSize.HalfWord, xtmp2Reg, xtmp1Reg, Byte.SIZE);
masm.neon.orrVVV(vSize, xtmp1Reg, xtmp1Reg, xtmp2Reg);
masm.neon.orrVI(vSize, ElementSize.HalfWord, xtmp1Reg, 1 << Byte.SIZE);
currentIdxReg = xtmp1Reg;
}
masm.neon.tblVVV(vSize, asRegister(result), asRegister(source), currentIdxReg);
}
}
}
Loading