Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 43 additions & 17 deletions hphp/runtime/vm/jit/relocation-arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,13 +290,13 @@ InstrSet findLiterals(Instruction* start, Instruction* end) {
*/
constexpr auto kFarJccLen = 3 * kInstructionSize;

TCA farJccTarget(TCA inst) {
TCA farCondBranchTarget(TCA inst) {
auto const b = Instruction::Cast(inst);
auto const ldr = b->NextInstruction();
auto const br = ldr->NextInstruction();
auto const next = br->NextInstruction();

if (b->IsCondBranchImm() &&
if ((b->IsCondBranchImm() || b->IsCompareBranch()) &&
b->ImmPCOffsetTarget() == next &&
ldr->IsLoadLiteral() &&
ldr->Mask(LoadLiteralMask) == LDR_w_lit &&
Expand All @@ -320,6 +320,12 @@ ConditionCode farJccCond(TCA inst) {
return arm::convertCC(InvertCondition(static_cast<Condition>(b->Bits(3, 0))));
}

std::pair<Register,bool> farCbDetails(TCA inst) {
auto const b = Instruction::Cast(inst);
assertx(b->IsCompareBranch());
Register reg = Register(b->Rt(), b->Bit(31) ? kXRegSize : kWRegSize);
return {reg, b->Bit(24)};
}

/*
* This function attempts to optimize a "far jcc" pattern, i.e.:
Expand All @@ -340,8 +346,8 @@ ConditionCode farJccCond(TCA inst) {
*
* This function returns whether or not the code was optimized.
*/
bool optimizeFarJcc(Env& env, TCA srcAddr, TCA destAddr,
size_t& srcCount, size_t& destCount) {
bool optimizeFarCondBranch(Env& env, TCA srcAddr, TCA destAddr,
size_t& srcCount, size_t& destCount) {
auto const srcFrom = Instruction::Cast(srcAddr);
auto const srcAddrActual = env.srcBlock.toDestAddress(srcAddr);
auto const src = Instruction::Cast(srcAddrActual);
Expand All @@ -350,7 +356,7 @@ bool optimizeFarJcc(Env& env, TCA srcAddr, TCA destAddr,
if (env.far.contains(src)) return false;
if (env.end < srcAddr + kFarJccLen) return false;

auto const target = farJccTarget(srcAddrActual);
auto const target = farCondBranchTarget(srcAddrActual);
if (!target) return false;

// We can only rely on the target address to shrink the code sequence if it's
Expand Down Expand Up @@ -388,19 +394,39 @@ bool optimizeFarJcc(Env& env, TCA srcAddr, TCA destAddr,
vixl::MacroAssembler a { env.destBlock };
env.destBlock.setFrontier(destAddr);

// This inverts the condition code for us.
auto const cc = arm::convertCC(farJccCond(srcAddrActual));
auto const b = Instruction::Cast(srcAddrActual);
if (b->IsCondBranchImm()) {
// This inverts the condition code for us.
auto const cc = arm::convertCC(farJccCond(srcAddrActual));

if (is_int19(imm)) {
a.b(imm, cc);
if (is_int19(imm)) {
a.b(imm, cc);
} else {
// Branch over the next instruction.
const int nextImm = 2;
a.b(nextImm, vixl::InvertCondition(cc));
// NB: the imm offset was computed relative to destAddr, but we emitted an
// extra branch above, thus the -1 here.
a.b(imm - 1);
destCount++;
}
} else {
// Branch over the next instruction.
const int nextImm = 2;
a.b(nextImm, vixl::InvertCondition(cc));
// NB: the imm offset was computed relative to destAddr, but we emitted an
// extra branch above, thus the -1 here.
a.b(imm - 1);
destCount++;
assertx(b->IsCompareBranch());
std::pair<Register,bool> details = farCbDetails(srcAddrActual);

if (is_int19(imm)) {
if (!details.second) a.cbnz(details.first, imm);
else a.cbz(details.first, imm);
} else {
// Branch over the next instruction.
const int nextImm = 2;
if (!details.second) a.cbz(details.first, nextImm);
else a.cbnz(details.first, nextImm);
// NB: the imm offset was computed relative to destAddr, but we emitted an
// extra branch above, thus the -1 here.
a.b(imm - 1);
destCount++;
}
}

srcCount = kFarJccLen >> kInstructionSizeLog2;
Expand Down Expand Up @@ -977,7 +1003,7 @@ size_t relocateImpl(Env& env) {
}
// Relocate functions are needed for correctness, while optimize
// functions will attempt to improve instruction sequences.
optimizeFarJcc(env, srcAddr, destAddr, srcCount, destCount) ||
optimizeFarCondBranch(env, srcAddr, destAddr, srcCount, destCount) ||
optimizeFarJmp(env, srcAddr, destAddr, srcCount, destCount) ||
relocatePCRelative(env, srcAddr, destAddr, srcCount, destCount) ||
relocateImmediate(env, srcAddr, destAddr, srcCount, destCount);
Expand Down
8 changes: 8 additions & 0 deletions hphp/runtime/vm/jit/smashable-instr-arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,14 @@ bool possiblySmashableJcc(TCA inst) {
return b->IsCondBranchImm() && isVeneer(b->ImmPCOffsetTarget());
}

bool possiblySmashableCb(TCA inst) {
using namespace vixl;

auto const b = Instruction::Cast(inst);

return b->IsCompareBranch() && isVeneer(b->ImmPCOffsetTarget());
}

///////////////////////////////////////////////////////////////////////////////

void smashMovq(TCA inst, uint64_t target) {
Expand Down
1 change: 1 addition & 0 deletions hphp/runtime/vm/jit/smashable-instr-arm.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ void smashInterceptJmp(TCA inst);
bool possiblySmashableMovq(TCA inst);
bool possiblySmashableJmp(TCA inst);
bool possiblySmashableJcc(TCA inst);
bool possiblySmashableCb(TCA inst);

uint64_t smashableMovqImm(TCA inst);
uint32_t smashableCmpqImm(TCA inst);
Expand Down
126 changes: 119 additions & 7 deletions hphp/runtime/vm/jit/vasm-arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ struct Vgen {
, next(env.next)
, jmps(env.jmps)
, jccs(env.jccs)
, cmpbrs(env.cmpbrs)
, catches(env.catches)
, vveneers(env.vveneers)
{}
Expand All @@ -228,6 +229,8 @@ struct Vgen {
static void handleLiterals(Venv& env);
static void retargetBinds(Venv& env);
static void patch(Venv& env);
void doCompareBranch(vixl::Register r, const Vlabel targets[2],
StringTag tag, bool BranchOnZero);

static void pad(CodeBlock& cb) {
vixl::MacroAssembler a { cb };
Expand Down Expand Up @@ -324,6 +327,10 @@ struct Vgen {
a->Sub(vixl::xzr, rVixlScratch0, X(i.s1), UF(i.fl));
a->Mov(X(i.d), rVixlScratch0);
}
void emit(const cbzl& i);
void emit(const cbnzl& i);
void emit(const cbzq& i);
void emit(const cbnzq& i);
void emit(const cmovb& i) { a->Csel(W(i.d), W(i.t), W(i.f), C(i.cc)); }
void emit(const cmovw& i) { a->Csel(W(i.d), W(i.t), W(i.f), C(i.cc)); }
void emit(const cmovl& i) { a->Csel(W(i.d), W(i.t), W(i.f), C(i.cc)); }
Expand Down Expand Up @@ -479,6 +486,7 @@ struct Vgen {
const Vlabel next;
jit::vector<Venv::LabelPatch>& jmps;
jit::vector<Venv::LabelPatch>& jccs;
jit::vector<Venv::LabelPatch>& cmpbrs;
jit::vector<Venv::LabelPatch>& catches;
jit::vector<Venv::LabelPatch>& vveneers;
};
Expand Down Expand Up @@ -550,10 +558,24 @@ void Vgen::emitVeneers(Venv& env) {
always_assert(is_int28(offset));
at.bl(offset >> kInstructionSizeLog2);

} else if (sourceInst->IsCondBranchImm()) {
auto const cond = static_cast<Condition>(sourceInst->ConditionBranch());
} else if (sourceInst->IsCondBranchImm() || sourceInst->IsCompareBranch()) {
auto getCbDetails = [&]() {
Register reg = Register(sourceInst->Rt(),
sourceInst->Bit(31) ? kXRegSize : kWRegSize);
return std::pair<Register,bool>(reg, sourceInst->Bit(24));
};

if (is_int21(offset)) {
at.b(offset >> kInstructionSizeLog2, cond);
if (sourceInst->IsCompareBranch()) {
auto cb_details = getCbDetails();
if (cb_details.second)
at.cbnz(cb_details.first, offset >> kInstructionSizeLog2);
else
at.cbz(cb_details.first, offset >> kInstructionSizeLog2);
} else {
auto const cond = static_cast<Condition>(sourceInst->ConditionBranch());
at.b(offset >> kInstructionSizeLog2, cond);
}
} else {
// The offset doesn't fit in a conditional jump. Hopefully it still fits
// in an unconditional jump, in which case we add an appendix to the
Expand All @@ -577,8 +599,18 @@ void Vgen::emitVeneers(Venv& env) {
at.b(offset >> kInstructionSizeLog2);

// Emit appendix.
auto const appendix = cb->frontier();
av.b(-2 /* veneer starts 2 instructions before the appendix */, cond);
auto const appendix = cb->frontier(); // This is a TCA
int imm19 = -2 /* veneer starts 2 instructions before the appendix */;
if (sourceInst->IsCondBranchImm()) {
auto const cond = static_cast<Condition>(sourceInst->ConditionBranch());
av.b(imm19, cond);
} else {
auto cb_details = getCbDetails();
if (cb_details.second)
av.cbnz(cb_details.first, imm19);
else
av.cbz(cb_details.first, imm19);
}
const int64_t nextOffset = (veneer.source + kInstructionSize) - // NEXT
(vaddr + 3 * kInstructionSize); // addr of "B NEXT"
always_assert(is_int28(nextOffset));
Expand Down Expand Up @@ -723,6 +755,22 @@ void Vgen::patch(Venv& env) {
}
patch(addr, target);
}
for (auto const& p : env.cmpbrs) {
auto addr = env.text.toDestAddress(p.instr);
auto const target = env.addrs[p.target];
assertx(target);
if (env.meta.smashableLocations.contains(p.instr)) {
assertx(possiblySmashableCb(addr));
// Update `addr' to point to the veneer.
addr = TCA(vixl::Instruction::Cast(addr)->ImmPCOffsetTarget());
} else {
assertx(Instruction::Cast(addr)->IsCompareBranch());
// If the cbz/cbnz starts with a conditional jump, patch the next instruction
// (which should start with a LDR).
addr += kInstructionSize;
}
patch(addr, target);
}
for (auto const& p : env.leas) {
auto addr = env.text.toDestAddress(p.instr);
auto const target = env.vaddrs[p.target];
Expand Down Expand Up @@ -1043,6 +1091,70 @@ void Vgen::emit(const decqmlocknosf& i) {
a->SetScratchRegisters(rVixlScratch0, rVixlScratch1);
}

void Vgen::doCompareBranch(vixl::Register r, const Vlabel targets[2], StringTag tag,
bool BranchOnZero) {
if (targets[1] != targets[0]) {
if (next == targets[1]) {
const Vlabel new_targets[2] = {targets[1], targets[0]};
return doCompareBranch(r, new_targets, tag, !BranchOnZero);
}
auto taken = targets[1];

// If the taken block is in a different code area than the jcc, we emit a
// veneer and jump through it to the taken block. This avoids having to
// flip the branch and penalizing the fall-through path. Otherwise, we flip
// the branch and emit a "far JCC" sequence that should be optimized later
// during relocation's optimizeFarCondBranch, which will flip the branch back.
if (env.unit.blocks[env.current].area_idx != env.unit.blocks[taken].area_idx) {
auto source = a->frontier();
vveneers.push_back({source, taken});
vixl::Label veneer_addr;
a->bind(&veneer_addr);
if (BranchOnZero) {
a->Cbz(r, &veneer_addr);
} else {
a->Cbnz(r, &veneer_addr);
}
// NB: this will be patched later.
} else {
cmpbrs.push_back({a->frontier(), taken});
vixl::Label skip, data;

// Emit a "far JCC" sequence for easy patching later. Static relocation
// might be able to simplify this later (see optimizeFarCondBranch()).
recordAddressImmediate();
if (BranchOnZero)
a->Cbnz(r, &skip);
else
a->Cbz(r, &skip);
recordAddressImmediate();
poolLiteral(*env.cb, env.meta, (uint64_t)makeTarget32(a->frontier()),
32, false);
a->bind(&data); // This will be remmaped during the handleLiterals phase.
a->Ldr(rAsm_w, &data);
a->Br(rAsm);
a->bind(&skip);
}
}
emit(jmp{targets[0]});
}

void Vgen::emit(const cbzl& i) {
doCompareBranch(W(i.s), i.targets, i.tag, true);
}

void Vgen::emit(const cbnzl& i) {
doCompareBranch(W(i.s), i.targets, i.tag, false);
}

void Vgen::emit(const cbzq& i) {
doCompareBranch(X(i.s), i.targets, i.tag, true);
}

void Vgen::emit(const cbnzq& i) {
doCompareBranch(X(i.s), i.targets, i.tag, false);
}

void Vgen::emit(const jcc& i) {
if (i.targets[1] != i.targets[0]) {
if (next == i.targets[1]) {
Expand All @@ -1054,7 +1166,7 @@ void Vgen::emit(const jcc& i) {
// veneer and jump through it to the taken block. This avoids having to
// flip the branch and penalizing the fall-through path. Otherwise, we flip
// the branch and emit a "far JCC" sequence that should be optimized later
// during relocation's optimizeFarJcc, which will flip the branch back.
// during relocation's optimizeFarCondBranch, which will flip the branch back.
if (env.unit.blocks[env.current].area_idx != env.unit.blocks[taken].area_idx) {
auto source = a->frontier();
vveneers.push_back({source, taken});
Expand All @@ -1066,7 +1178,7 @@ void Vgen::emit(const jcc& i) {
vixl::Label skip, data;

// Emit a "far JCC" sequence for easy patching later. Static relocation
// might be able to simplify this later (see optimizeFarJcc()).
// might be able to simplify this later (see optimizeFarCondBranch()).
recordAddressImmediate();
a->B(&skip, vixl::InvertCondition(C(i.cc)));
recordAddressImmediate();
Expand Down
4 changes: 4 additions & 0 deletions hphp/runtime/vm/jit/vasm-info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,10 @@ bool effectsImpl(const Vinstr& inst, bool pure) {
case Vinstr::callr:
case Vinstr::calls:
case Vinstr::callstub:
case Vinstr::cbzl:
case Vinstr::cbnzl:
case Vinstr::cbzq:
case Vinstr::cbnzq:
case Vinstr::conjure:
case Vinstr::contenter:
case Vinstr::cqo:
Expand Down
8 changes: 8 additions & 0 deletions hphp/runtime/vm/jit/vasm-instr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ bool isBlockEnd(const Vinstr& inst) {
case Vinstr::bindjmp:
case Vinstr::fallback:
// control flow
case Vinstr::cbzl:
case Vinstr::cbnzl:
case Vinstr::cbzq:
case Vinstr::cbnzq:
case Vinstr::jcc:
case Vinstr::jmp:
case Vinstr::jmpr:
Expand Down Expand Up @@ -309,6 +313,8 @@ Width width(Vinstr::Opcode op) {
case Vinstr::cmpli:
case Vinstr::cmplm:
case Vinstr::cmplim:
case Vinstr::cbzl:
case Vinstr::cbnzl:
case Vinstr::testl:
case Vinstr::testli:
case Vinstr::testlim:
Expand Down Expand Up @@ -366,6 +372,8 @@ Width width(Vinstr::Opcode op) {
case Vinstr::cmpqi:
case Vinstr::cmpqm:
case Vinstr::cmpqim:
case Vinstr::cbzq:
case Vinstr::cbnzq:
case Vinstr::testq:
case Vinstr::testqi:
case Vinstr::testqm:
Expand Down
8 changes: 8 additions & 0 deletions hphp/runtime/vm/jit/vasm-instr.h
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,10 @@ struct Vunit;
O(loadpairl, Inone, U(s), D(d0) D(d1))\
O(storepair, Inone, U(s0) U(s1) UW(d), Dn)\
O(storepairl, Inone, U(s0) U(s1) UW(d), Dn)\
O(cbzl, Inone, U(s), Dn)\
O(cbnzl, Inone, U(s), Dn)\
O(cbzq, Inone, U(s), Dn)\
O(cbnzq, Inone, U(s), Dn)\
/* */

/*
Expand Down Expand Up @@ -1286,6 +1290,10 @@ struct loadpair { Vptr128 s; Vreg64 d0, d1; };
struct loadpairl { Vptr64 s; Vreg32 d0, d1; };
struct storepair { Vreg64 s0, s1; Vptr128 d; };
struct storepairl { Vreg32 s0, s1; Vptr64 d; };
struct cbzl { Vreg32 s; Vlabel targets[2]; StringTag tag; };
struct cbnzl { Vreg32 s; Vlabel targets[2]; StringTag tag; };
struct cbzq { Vreg64 s; Vlabel targets[2]; StringTag tag; };
struct cbnzq { Vreg64 s; Vlabel targets[2]; StringTag tag; };

///////////////////////////////////////////////////////////////////////////////

Expand Down
2 changes: 1 addition & 1 deletion hphp/runtime/vm/jit/vasm-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ struct Venv {
jit::vector<CodeAddress> addrs;
jit::vector<CodeAddress> vaddrs;
jit::vector<AddrPatch> leas;
jit::vector<LabelPatch> jmps, jccs;
jit::vector<LabelPatch> jmps, jccs, cmpbrs;
jit::vector<LabelPatch> catches;
jit::vector<LabelPatch> vveneers;
jit::vector<LdBindRetAddrPatch> ldbindretaddrs;
Expand Down
Loading