52 #define DEBUG_TYPE "x86-cf-opt"
56 cl::desc(
"Avoid optimizing x86 call frames for size"),
72 CallContext() : FrameSetup(nullptr), ArgStoreVector(4, nullptr) {}
84 int64_t ExpectedDist = 0;
90 bool NoStackParams =
false;
110 enum InstClassification { Convert,
Skip, Exit };
117 StringRef getPassName()
const override {
return "X86 Optimize Call Frame"; }
123 unsigned SlotSize = 0;
124 unsigned Log2SlotSize = 0;
130 "X86 Call Frame Optimization",
false,
false)
142 if (STI->isTargetDarwin() &&
143 (!MF.getLandingPads().empty() ||
144 (MF.getFunction().needsUnwindTableEntry() && !TFL->hasFP(MF))))
149 if (STI->isTargetWin64())
164 unsigned FrameSetupOpcode =
TII->getCallFrameSetupOpcode();
165 unsigned FrameDestroyOpcode =
TII->getCallFrameDestroyOpcode();
166 bool EmitStackProbeCall = STI->getTargetLowering()->hasStackProbeSymbol(MF);
167 unsigned StackProbeSize = STI->getTargetLowering()->getStackProbeSize(MF);
169 bool InsideFrameSequence =
false;
171 if (
MI.getOpcode() == FrameSetupOpcode) {
172 if (
TII->getFrameSize(
MI) >= StackProbeSize && EmitStackProbeCall)
174 if (InsideFrameSequence)
176 InsideFrameSequence =
true;
177 }
else if (
MI.getOpcode() == FrameDestroyOpcode) {
178 if (!InsideFrameSequence)
180 InsideFrameSequence =
false;
184 if (InsideFrameSequence)
194 ContextVector &CallSeqVector) {
199 if (CannotReserveFrame)
204 int64_t Advantage = 0;
205 for (
const auto &CC : CallSeqVector) {
209 if (CC.NoStackParams)
229 Advantage += (CC.ExpectedDist >> Log2SlotSize) * 3;
233 return Advantage >= 0;
236 bool X86CallFrameOptimization::runOnMachineFunction(
MachineFunction &MF) {
238 TII = STI->getInstrInfo();
239 TFL = STI->getFrameLowering();
246 Log2SlotSize =
Log2_32(SlotSize);
248 if (skipFunction(MF.
getFunction()) || !isLegal(MF))
251 unsigned FrameSetupOpcode =
TII->getCallFrameSetupOpcode();
253 bool Changed =
false;
255 ContextVector CallSeqVector;
259 if (
MI.getOpcode() == FrameSetupOpcode) {
262 CallSeqVector.push_back(
Context);
265 if (!isProfitable(MF, CallSeqVector))
268 for (
const auto &CC : CallSeqVector) {
270 adjustCallSequence(MF, CC);
278 X86CallFrameOptimization::InstClassification
279 X86CallFrameOptimization::classifyInstruction(
287 switch (
MI->getOpcode()) {
290 case X86::AND64mi8: {
292 return ImmOp.
getImm() == 0 ? Convert : Exit;
298 return ImmOp.
getImm() == -1 ? Convert : Exit;
332 if (
MI->isCall() ||
MI->mayStore())
339 if (!
Reg.isPhysical())
344 for (
unsigned int U : UsedRegs)
345 if (RegInfo.regsOverlap(
Reg, U))
363 assert(
I->getOpcode() ==
TII->getCallFrameSetupOpcode());
365 Context.FrameSetup = FrameSetup;
369 unsigned int MaxAdjust =
TII->getFrameSize(*FrameSetup) >> Log2SlotSize;
380 while (
I->getOpcode() == X86::LEA32r ||
I->isDebugInstr())
384 auto StackPtrCopyInst =
MBB.
end();
393 for (
auto J =
I; !J->isCall(); ++J)
394 if (J->isCopy() && J->getOperand(0).isReg() && J->getOperand(1).isReg() &&
395 J->getOperand(1).getReg() == StackPtr) {
396 StackPtrCopyInst = J;
407 Context.ArgStoreVector.resize(MaxAdjust,
nullptr);
411 for (InstClassification Classification = Skip; Classification != Exit; ++
I) {
413 if (
I == StackPtrCopyInst)
415 Classification = classifyInstruction(
MBB,
I, RegInfo, UsedRegs);
416 if (Classification != Convert)
438 "Negative stack displacement when passing parameters");
441 if (StackDisp & (SlotSize - 1))
443 StackDisp >>= Log2SlotSize;
446 "Function call has more parameters than the stack is adjusted for.");
449 if (
Context.ArgStoreVector[StackDisp] !=
nullptr)
451 Context.ArgStoreVector[StackDisp] = &*
I;
457 if (
Reg.isPhysical())
475 auto MMI =
Context.ArgStoreVector.begin(), MME =
Context.ArgStoreVector.end();
476 for (; MMI != MME; ++MMI,
Context.ExpectedDist += SlotSize)
481 if (MMI ==
Context.ArgStoreVector.begin())
486 for (; MMI != MME; ++MMI)
500 TII->setFrameAdjustment(*FrameSetup,
Context.ExpectedDist);
502 const DebugLoc &
DL = FrameSetup->getDebugLoc();
503 bool Is64Bit = STI->is64Bit();
507 for (
int Idx = (
Context.ExpectedDist >> Log2SlotSize) - 1; Idx >= 0; --Idx) {
512 switch (
Store->getOpcode()) {
523 PushOpcode = Is64Bit ? X86::PUSH64i32 : X86::PUSHi32;
528 if (PushOp.
isImm()) {
529 int64_t Val = PushOp.
getImm();
531 PushOpcode = Is64Bit ? X86::PUSH64i8 : X86::PUSH32i8;
534 Push->cloneMemRefs(MF, *
Store);
542 if (Is64Bit &&
Store->getOpcode() == X86::MOV32mr) {
554 bool SlowPUSHrmm = STI->slowTwoMemOps();
559 if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup,
Reg))) {
560 PushOpcode = Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm;
566 Push->cloneMergedMemRefs(MF, {DefMov, &*
Store});
569 PushOpcode = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
573 Push->cloneMemRefs(MF, *
Store);
584 MBB, std::next(Push),
DL,
593 Context.SPCopy->eraseFromParent();
601 MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush(
613 if (!
Reg.isVirtual())
624 if ((
DefMI.getOpcode() != X86::MOV32rm &&
625 DefMI.getOpcode() != X86::MOV64rm) ||
626 DefMI.getParent() != FrameSetup->getParent())
632 if (
I->isLoadFoldBarrier())
639 return new X86CallFrameOptimization();