52#define DEBUG_TYPE "x86-cf-opt"
56 cl::desc(
"Avoid optimizing x86 call frames for size"),
72 CallContext() : FrameSetup(nullptr), ArgStoreVector(4, nullptr) {}
84 int64_t ExpectedDist = 0;
90 bool NoStackParams =
false;
105 void adjustCallSequence(
MachineFunction &MF,
const CallContext &Context);
110 enum InstClassification { Convert,
Skip,
Exit };
123 unsigned SlotSize = 0;
124 unsigned Log2SlotSize = 0;
128char X86CallFrameOptimization::ID = 0;
130 "X86 Call Frame Optimization",
false,
false)
142 if (STI->isTargetDarwin() &&
143 (!MF.getLandingPads().empty() ||
144 (MF.getFunction().needsUnwindTableEntry() && !TFL->hasFP(MF))))
149 if (STI->isTargetWin64())
164 unsigned FrameSetupOpcode =
TII->getCallFrameSetupOpcode();
165 unsigned FrameDestroyOpcode =
TII->getCallFrameDestroyOpcode();
166 bool EmitStackProbeCall = STI->getTargetLowering()->hasStackProbeSymbol(MF);
167 unsigned StackProbeSize = STI->getTargetLowering()->getStackProbeSize(MF);
169 bool InsideFrameSequence =
false;
171 if (
MI.getOpcode() == FrameSetupOpcode) {
172 if (
TII->getFrameSize(
MI) >= StackProbeSize && EmitStackProbeCall)
174 if (InsideFrameSequence)
176 InsideFrameSequence =
true;
177 }
else if (
MI.getOpcode() == FrameDestroyOpcode) {
178 if (!InsideFrameSequence)
180 InsideFrameSequence =
false;
184 if (InsideFrameSequence)
194 ContextVector &CallSeqVector) {
199 if (CannotReserveFrame)
202 Align StackAlign = TFL->getStackAlign();
204 int64_t Advantage = 0;
205 for (
const auto &
CC : CallSeqVector) {
209 if (
CC.NoStackParams)
229 Advantage += (
CC.ExpectedDist >> Log2SlotSize) * 3;
233 return Advantage >= 0;
236bool X86CallFrameOptimization::runOnMachineFunction(
MachineFunction &MF) {
238 TII = STI->getInstrInfo();
239 TFL = STI->getFrameLowering();
244 SlotSize =
RegInfo.getSlotSize();
246 Log2SlotSize =
Log2_32(SlotSize);
248 if (skipFunction(MF.
getFunction()) || !isLegal(MF))
251 unsigned FrameSetupOpcode =
TII->getCallFrameSetupOpcode();
253 bool Changed =
false;
255 ContextVector CallSeqVector;
259 if (
MI.getOpcode() == FrameSetupOpcode) {
261 collectCallInfo(MF,
MBB,
MI, Context);
262 CallSeqVector.push_back(Context);
265 if (!isProfitable(MF, CallSeqVector))
268 for (
const auto &
CC : CallSeqVector) {
270 adjustCallSequence(MF,
CC);
278X86CallFrameOptimization::InstClassification
279X86CallFrameOptimization::classifyInstruction(
287 switch (
MI->getOpcode()) {
290 case X86::AND64mi32: {
296 case X86::OR64mi32: {
332 if (
MI->isCall() ||
MI->mayStore())
339 if (!
Reg.isPhysical())
344 for (
unsigned int U : UsedRegs)
345 if (
RegInfo.regsOverlap(Reg, U))
356 CallContext &Context) {
363 assert(
I->getOpcode() ==
TII->getCallFrameSetupOpcode());
365 Context.FrameSetup = FrameSetup;
369 unsigned int MaxAdjust =
TII->getFrameSize(*FrameSetup) >> Log2SlotSize;
373 Context.NoStackParams =
true;
380 while (
I->getOpcode() == X86::LEA32r ||
I->isDebugInstr())
384 auto StackPtrCopyInst =
MBB.
end();
393 for (
auto J =
I; !J->isCall(); ++J)
394 if (J->isCopy() && J->getOperand(0).isReg() && J->getOperand(1).isReg() &&
395 J->getOperand(1).getReg() == StackPtr) {
396 StackPtrCopyInst = J;
397 Context.SPCopy = &*J++;
398 StackPtr = Context.SPCopy->getOperand(0).getReg();
407 Context.ArgStoreVector.resize(MaxAdjust,
nullptr);
411 for (InstClassification Classification = Skip; Classification !=
Exit; ++
I) {
413 if (
I == StackPtrCopyInst)
415 Classification = classifyInstruction(
MBB,
I,
RegInfo, UsedRegs);
416 if (Classification != Convert)
438 "Negative stack displacement when passing parameters");
441 if (StackDisp & (SlotSize - 1))
443 StackDisp >>= Log2SlotSize;
445 assert((
size_t)StackDisp < Context.ArgStoreVector.size() &&
446 "Function call has more parameters than the stack is adjusted for.");
449 if (Context.ArgStoreVector[StackDisp] !=
nullptr)
451 Context.ArgStoreVector[StackDisp] = &*
I;
457 if (
Reg.isPhysical())
475 auto MMI = Context.ArgStoreVector.begin(), MME = Context.ArgStoreVector.end();
476 for (; MMI != MME; ++MMI, Context.ExpectedDist += SlotSize)
481 if (MMI == Context.ArgStoreVector.begin())
486 for (; MMI != MME; ++MMI)
490 Context.UsePush =
true;
494 const CallContext &Context) {
500 TII->setFrameAdjustment(*FrameSetup, Context.ExpectedDist);
502 const DebugLoc &
DL = FrameSetup->getDebugLoc();
503 bool Is64Bit = STI->is64Bit();
507 for (
int Idx = (Context.ExpectedDist >> Log2SlotSize) - 1;
Idx >= 0; --
Idx) {
512 switch (
Store->getOpcode()) {
523 PushOpcode = Is64Bit ? X86::PUSH64i32 : X86::PUSH32i;
525 Push->cloneMemRefs(MF, *Store);
533 if (Is64Bit &&
Store->getOpcode() == X86::MOV32mr) {
534 Register UndefReg =
MRI->createVirtualRegister(&X86::GR64RegClass);
535 Reg =
MRI->createVirtualRegister(&X86::GR64RegClass);
545 bool SlowPUSHrmm = STI->slowTwoMemOps();
550 if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) {
551 PushOpcode = Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm;
557 Push->cloneMergedMemRefs(MF, {DefMov, &*
Store});
560 PushOpcode = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
564 Push->cloneMemRefs(MF, *Store);
575 MBB, std::next(Push),
DL,
583 if (Context.SPCopy &&
MRI->use_empty(Context.SPCopy->getOperand(0).getReg()))
584 Context.SPCopy->eraseFromParent();
592MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush(
604 if (!
Reg.isVirtual())
608 if (!
MRI->hasOneNonDBGUse(Reg))
615 if ((
DefMI.getOpcode() != X86::MOV32rm &&
616 DefMI.getOpcode() != X86::MOV64rm) ||
617 DefMI.getParent() != FrameSetup->getParent())
623 if (
I->isLoadFoldBarrier())
630 return new X86CallFrameOptimization();
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseSet and SmallDenseSet classes.
const HexagonInstrInfo * TII
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static cl::opt< bool > NoX86CFOpt("no-x86-call-frame-opt", cl::desc("Avoid optimizing x86 call frames for size"), cl::init(false), cl::Hidden)
Implements a dense probed hash-table based set.
FunctionPass class - This class is used to implement most global optimizations.
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int64_t Adjustment, SMLoc Loc={})
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
void setHasPushSequences(bool HasPush)
std::pair< iterator, bool > insert(const ValueT &V)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
FunctionPass * createX86CallFrameOptimization()
Return a pass that optimizes the code-size of x86 call sequences.
This struct is a compact representation of a valid (non-zero power of two) alignment.