39 #define DEBUG_TYPE "x86-cf-opt"
43 cl::desc(
"Avoid optimizing x86 call frames for size"),
57 : FrameSetup(nullptr),
Call(nullptr), SPCopy(nullptr), ExpectedDist(0),
58 MovVector(4, nullptr), NoStackParams(
false), UsePush(
false) {}
96 enum InstClassification { Convert, Skip, Exit };
103 StringRef getPassName()
const override {
return "X86 Optimize Call Frame"; }
110 unsigned Log2SlotSize;
118 return new X86CallFrameOptimization();
131 if (STI->isTargetDarwin() &&
138 if (STI->isTargetWin64())
148 unsigned FrameSetupOpcode =
TII->getCallFrameSetupOpcode();
149 unsigned FrameDestroyOpcode =
TII->getCallFrameDestroyOpcode();
151 bool InsideFrameSequence =
false;
153 if (
MI.getOpcode() == FrameSetupOpcode) {
154 if (InsideFrameSequence)
156 InsideFrameSequence =
true;
157 }
else if (
MI.getOpcode() == FrameDestroyOpcode) {
158 if (!InsideFrameSequence)
160 InsideFrameSequence =
false;
164 if (InsideFrameSequence)
174 ContextVector &CallSeqVector) {
179 if (CannotReserveFrame)
182 unsigned StackAlign = TFL->getStackAlignment();
184 int64_t Advantage = 0;
185 for (
auto CC : CallSeqVector) {
189 if (CC.NoStackParams)
205 if (CC.ExpectedDist % StackAlign)
209 Advantage += (CC.ExpectedDist >> Log2SlotSize) * 3;
213 return Advantage >= 0;
216 bool X86CallFrameOptimization::runOnMachineFunction(
MachineFunction &MF) {
218 TII = STI->getInstrInfo();
219 TFL = STI->getFrameLowering();
226 Log2SlotSize =
Log2_32(SlotSize);
228 if (skipFunction(*MF.
getFunction()) || !isLegal(MF))
231 unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode();
233 bool Changed =
false;
235 ContextVector CallSeqVector;
239 if (
MI.getOpcode() == FrameSetupOpcode) {
241 collectCallInfo(MF, MBB,
MI, Context);
242 CallSeqVector.push_back(Context);
245 if (!isProfitable(MF, CallSeqVector))
248 for (
auto CC : CallSeqVector) {
250 adjustCallSequence(MF, CC);
258 X86CallFrameOptimization::InstClassification
259 X86CallFrameOptimization::classifyInstruction(
266 int Opcode = MI->getOpcode();
267 if (Opcode == X86::MOV32mi || Opcode == X86::MOV32mr ||
268 Opcode == X86::MOV64mi32 || Opcode == X86::MOV64mr)
296 if (MI->isCall() || MI->mayStore())
302 unsigned int Reg = MO.getReg();
303 if (!RegInfo.isPhysicalRegister(Reg))
308 for (
unsigned int U : UsedRegs)
309 if (RegInfo.regsOverlap(Reg, U))
320 CallContext &Context) {
325 unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
328 assert(I->getOpcode() == TII->getCallFrameSetupOpcode());
330 Context.FrameSetup = FrameSetup;
334 unsigned int MaxAdjust =
335 FrameSetup->getOperand(0).getImm() >> Log2SlotSize;
339 Context.NoStackParams =
true;
346 while (I->getOpcode() == X86::LEA32r || I->isDebugValue())
353 if (I->isCopy() && I->getOperand(0).isReg() && I->getOperand(1).isReg() &&
354 I->getOperand(1).getReg() == StackPtr) {
355 Context.SPCopy = &*I++;
356 StackPtr = Context.SPCopy->getOperand(0).getReg();
364 Context.MovVector.resize(MaxAdjust,
nullptr);
366 InstClassification Classification;
369 while ((Classification = classifyInstruction(MBB, I, RegInfo, UsedRegs)) !=
371 if (Classification == Skip) {
396 "Negative stack displacement when passing parameters");
399 if (StackDisp & (SlotSize - 1))
401 StackDisp >>= Log2SlotSize;
403 assert((
size_t)StackDisp < Context.MovVector.size() &&
404 "Function call has more parameters than the stack is adjusted for.");
407 if (Context.MovVector[StackDisp] !=
nullptr)
409 Context.MovVector[StackDisp] = &*
I;
414 unsigned int Reg = MO.getReg();
415 if (RegInfo.isPhysicalRegister(Reg))
424 if (I == MBB.
end() || !I->isCall())
428 if ((++I)->getOpcode() != FrameDestroyOpcode)
433 auto MMI = Context.MovVector.begin(), MME = Context.MovVector.end();
434 for (; MMI != MME; ++MMI, Context.ExpectedDist += SlotSize)
439 if (MMI == Context.MovVector.begin())
444 for (; MMI != MME; ++MMI)
448 Context.UsePush =
true;
452 const CallContext &Context) {
458 FrameSetup->getOperand(1).setImm(Context.ExpectedDist);
461 bool Is64Bit = STI->is64Bit();
465 for (
int Idx = (Context.ExpectedDist >> Log2SlotSize) - 1; Idx >= 0; --Idx) {
470 switch (MOV->getOpcode()) {
475 PushOpcode = Is64Bit ? X86::PUSH64i32 : X86::PUSHi32;
480 if (PushOp.
isImm()) {
481 int64_t Val = PushOp.
getImm();
483 PushOpcode = Is64Bit ? X86::PUSH64i8 : X86::PUSH32i8;
485 Push =
BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode))
490 unsigned int Reg = PushOp.
getReg();
494 if (Is64Bit && MOV->getOpcode() == X86::MOV32mr) {
495 unsigned UndefReg =
MRI->createVirtualRegister(&X86::GR64RegClass);
496 Reg =
MRI->createVirtualRegister(&X86::GR64RegClass);
497 BuildMI(MBB, Context.Call, DL, TII->get(X86::IMPLICIT_DEF), UndefReg);
498 BuildMI(MBB, Context.Call, DL, TII->get(X86::INSERT_SUBREG),
Reg)
506 bool SlowPUSHrmm = STI->isAtom() || STI->isSLM();
511 if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) {
512 PushOpcode = Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm;
513 Push =
BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode));
521 PushOpcode = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
522 Push =
BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode))
534 MBB, std::next(Push), DL,
542 if (Context.SPCopy &&
MRI->use_empty(Context.SPCopy->getOperand(0).getReg()))
543 Context.SPCopy->eraseFromParent();
551 MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush(
567 if (!
MRI->hasOneNonDBGUse(Reg))
574 if ((DefMI.
getOpcode() != X86::MOV32rm &&
576 DefMI.
getParent() != FrameSetup->getParent())
582 if (I->isLoadFoldBarrier())
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
Implements a dense probed hash-table based set.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
const std::vector< LandingPadInfo > & getLandingPads() const
Return a reference to the landing pad info for the current function.
constexpr bool isInt< 8 >(int64_t x)
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
FunctionPass * createX86CallFrameOptimization()
Return a pass that optimizes the code-size of x86 call sequences.
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int Adjustment)
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
AddrSegmentReg - The operand # of the segment in the memory operand.
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Reg
All possible values of the reg field in the ModR/M byte.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Function Alias Analysis false
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
void setHasPushSequences(bool HasPush)
const MachineBasicBlock * getParent() const
TargetInstrInfo - Interface to description of machine instruction set.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
initializer< Ty > init(const Ty &Val)
AddrNumOperands - Total number of operands in a memory reference.
unsigned const MachineRegisterInfo * MRI
constexpr bool isPowerOf2_32(uint32_t Value)
isPowerOf2_32 - This function returns true if the argument is a power of two > 0. ...
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
const MachineOperand & getOperand(unsigned i) const
std::pair< iterator, bool > insert(const ValueT &V)
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
FunctionPass class - This class is used to implement most global optimizations.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getStackRegister() const
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
static cl::opt< bool > NoX86CFOpt("no-x86-call-frame-opt", cl::desc("Avoid optimizing x86 call frames for size"), cl::init(false), cl::Hidden)
unsigned Log2_32(uint32_t Value)
Log2_32 - This function returns the floor log base 2 of the specified value, -1 if the value is zero...
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getSlotSize() const
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
unsigned getReg() const
getReg - Returns the register number.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
StringRef - Represent a constant reference to a string, i.e.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly. ...