LLVM  10.0.0svn
X86CallFrameOptimization.cpp
Go to the documentation of this file.
1 //===----- X86CallFrameOptimization.cpp - Optimize x86 call sequences -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a pass that optimizes call sequences on x86.
10 // Currently, it converts movs of function parameters onto the stack into
11 // pushes. This is beneficial for two main reasons:
12 // 1) The push instruction encoding is much smaller than a stack-ptr-based mov.
13 // 2) It is possible to push memory arguments directly. So, if the
14 // the transformation is performed pre-reg-alloc, it can help relieve
15 // register pressure.
16 //
17 //===----------------------------------------------------------------------===//
18 
20 #include "X86FrameLowering.h"
21 #include "X86InstrInfo.h"
22 #include "X86MachineFunctionInfo.h"
23 #include "X86RegisterInfo.h"
24 #include "X86Subtarget.h"
25 #include "llvm/ADT/DenseSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/StringRef.h"
38 #include "llvm/IR/DebugLoc.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/MC/MCDwarf.h"
44 #include <cassert>
45 #include <cstddef>
46 #include <cstdint>
47 #include <iterator>
48 
49 using namespace llvm;
50 
51 #define DEBUG_TYPE "x86-cf-opt"
52 
53 static cl::opt<bool>
54  NoX86CFOpt("no-x86-call-frame-opt",
55  cl::desc("Avoid optimizing x86 call frames for size"),
56  cl::init(false), cl::Hidden);
57 
58 namespace {
59 
60 class X86CallFrameOptimization : public MachineFunctionPass {
61 public:
62  X86CallFrameOptimization() : MachineFunctionPass(ID) { }
63 
64  bool runOnMachineFunction(MachineFunction &MF) override;
65 
66  static char ID;
67 
68 private:
69  // Information we know about a particular call site
70  struct CallContext {
71  CallContext() : FrameSetup(nullptr), ArgStoreVector(4, nullptr) {}
72 
73  // Iterator referring to the frame setup instruction
74  MachineBasicBlock::iterator FrameSetup;
75 
76  // Actual call instruction
77  MachineInstr *Call = nullptr;
78 
79  // A copy of the stack pointer
80  MachineInstr *SPCopy = nullptr;
81 
82  // The total displacement of all passed parameters
83  int64_t ExpectedDist = 0;
84 
85  // The sequence of storing instructions used to pass the parameters
86  SmallVector<MachineInstr *, 4> ArgStoreVector;
87 
88  // True if this call site has no stack parameters
89  bool NoStackParams = false;
90 
91  // True if this call site can use push instructions
92  bool UsePush = false;
93  };
94 
95  typedef SmallVector<CallContext, 8> ContextVector;
96 
97  bool isLegal(MachineFunction &MF);
98 
99  bool isProfitable(MachineFunction &MF, ContextVector &CallSeqMap);
100 
101  void collectCallInfo(MachineFunction &MF, MachineBasicBlock &MBB,
102  MachineBasicBlock::iterator I, CallContext &Context);
103 
104  void adjustCallSequence(MachineFunction &MF, const CallContext &Context);
105 
106  MachineInstr *canFoldIntoRegPush(MachineBasicBlock::iterator FrameSetup,
107  unsigned Reg);
108 
109  enum InstClassification { Convert, Skip, Exit };
110 
111  InstClassification classifyInstruction(MachineBasicBlock &MBB,
113  const X86RegisterInfo &RegInfo,
114  DenseSet<unsigned int> &UsedRegs);
115 
116  StringRef getPassName() const override { return "X86 Optimize Call Frame"; }
117 
118  const X86InstrInfo *TII;
119  const X86FrameLowering *TFL;
120  const X86Subtarget *STI;
122  unsigned SlotSize;
123  unsigned Log2SlotSize;
124 };
125 
126 } // end anonymous namespace
128 INITIALIZE_PASS(X86CallFrameOptimization, DEBUG_TYPE,
129  "X86 Call Frame Optimization", false, false)
130 
131 // This checks whether the transformation is legal.
132 // Also returns false in cases where it's potentially legal, but
133 // we don't even want to try.
134 bool X86CallFrameOptimization::isLegal(MachineFunction &MF) {
135  if (NoX86CFOpt.getValue())
136  return false;
137 
138  // We can't encode multiple DW_CFA_GNU_args_size or DW_CFA_def_cfa_offset
139  // in the compact unwind encoding that Darwin uses. So, bail if there
140  // is a danger of that being generated.
141  if (STI->isTargetDarwin() &&
142  (!MF.getLandingPads().empty() ||
143  (MF.getFunction().needsUnwindTableEntry() && !TFL->hasFP(MF))))
144  return false;
145 
146  // It is not valid to change the stack pointer outside the prolog/epilog
147  // on 64-bit Windows.
148  if (STI->isTargetWin64())
149  return false;
150 
151  // You would expect straight-line code between call-frame setup and
152  // call-frame destroy. You would be wrong. There are circumstances (e.g.
153  // CMOV_GR8 expansion of a select that feeds a function call!) where we can
154  // end up with the setup and the destroy in different basic blocks.
155  // This is bad, and breaks SP adjustment.
156  // So, check that all of the frames in the function are closed inside
157  // the same block, and, for good measure, that there are no nested frames.
158  //
159  // If any call allocates more argument stack memory than the stack
160  // probe size, don't do this optimization. Otherwise, this pass
161  // would need to synthesize additional stack probe calls to allocate
162  // memory for arguments.
163  unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode();
164  unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
165  bool UseStackProbe =
166  !STI->getTargetLowering()->getStackProbeSymbolName(MF).empty();
167  unsigned StackProbeSize = STI->getTargetLowering()->getStackProbeSize(MF);
168  for (MachineBasicBlock &BB : MF) {
169  bool InsideFrameSequence = false;
170  for (MachineInstr &MI : BB) {
171  if (MI.getOpcode() == FrameSetupOpcode) {
172  if (TII->getFrameSize(MI) >= StackProbeSize && UseStackProbe)
173  return false;
174  if (InsideFrameSequence)
175  return false;
176  InsideFrameSequence = true;
177  } else if (MI.getOpcode() == FrameDestroyOpcode) {
178  if (!InsideFrameSequence)
179  return false;
180  InsideFrameSequence = false;
181  }
182  }
183 
184  if (InsideFrameSequence)
185  return false;
186  }
187 
188  return true;
189 }
190 
191 // Check whether this transformation is profitable for a particular
192 // function - in terms of code size.
193 bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,
194  ContextVector &CallSeqVector) {
195  // This transformation is always a win when we do not expect to have
196  // a reserved call frame. Under other circumstances, it may be either
197  // a win or a loss, and requires a heuristic.
198  bool CannotReserveFrame = MF.getFrameInfo().hasVarSizedObjects();
199  if (CannotReserveFrame)
200  return true;
201 
202  unsigned StackAlign = TFL->getStackAlignment();
203 
204  int64_t Advantage = 0;
205  for (auto CC : CallSeqVector) {
206  // Call sites where no parameters are passed on the stack
207  // do not affect the cost, since there needs to be no
208  // stack adjustment.
209  if (CC.NoStackParams)
210  continue;
211 
212  if (!CC.UsePush) {
213  // If we don't use pushes for a particular call site,
214  // we pay for not having a reserved call frame with an
215  // additional sub/add esp pair. The cost is ~3 bytes per instruction,
216  // depending on the size of the constant.
217  // TODO: Callee-pop functions should have a smaller penalty, because
218  // an add is needed even with a reserved call frame.
219  Advantage -= 6;
220  } else {
221  // We can use pushes. First, account for the fixed costs.
222  // We'll need a add after the call.
223  Advantage -= 3;
224  // If we have to realign the stack, we'll also need a sub before
225  if (CC.ExpectedDist % StackAlign)
226  Advantage -= 3;
227  // Now, for each push, we save ~3 bytes. For small constants, we actually,
228  // save more (up to 5 bytes), but 3 should be a good approximation.
229  Advantage += (CC.ExpectedDist >> Log2SlotSize) * 3;
230  }
231  }
232 
233  return Advantage >= 0;
234 }
235 
236 bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) {
237  STI = &MF.getSubtarget<X86Subtarget>();
238  TII = STI->getInstrInfo();
239  TFL = STI->getFrameLowering();
240  MRI = &MF.getRegInfo();
241 
242  const X86RegisterInfo &RegInfo =
243  *static_cast<const X86RegisterInfo *>(STI->getRegisterInfo());
244  SlotSize = RegInfo.getSlotSize();
245  assert(isPowerOf2_32(SlotSize) && "Expect power of 2 stack slot size");
246  Log2SlotSize = Log2_32(SlotSize);
247 
248  if (skipFunction(MF.getFunction()) || !isLegal(MF))
249  return false;
250 
251  unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode();
252 
253  bool Changed = false;
254 
255  ContextVector CallSeqVector;
256 
257  for (auto &MBB : MF)
258  for (auto &MI : MBB)
259  if (MI.getOpcode() == FrameSetupOpcode) {
260  CallContext Context;
261  collectCallInfo(MF, MBB, MI, Context);
262  CallSeqVector.push_back(Context);
263  }
264 
265  if (!isProfitable(MF, CallSeqVector))
266  return false;
267 
268  for (auto CC : CallSeqVector) {
269  if (CC.UsePush) {
270  adjustCallSequence(MF, CC);
271  Changed = true;
272  }
273  }
274 
275  return Changed;
276 }
277 
278 X86CallFrameOptimization::InstClassification
279 X86CallFrameOptimization::classifyInstruction(
281  const X86RegisterInfo &RegInfo, DenseSet<unsigned int> &UsedRegs) {
282  if (MI == MBB.end())
283  return Exit;
284 
285  // The instructions we actually care about are movs onto the stack or special
286  // cases of constant-stores to stack
287  switch (MI->getOpcode()) {
288  case X86::AND16mi8:
289  case X86::AND32mi8:
290  case X86::AND64mi8: {
291  MachineOperand ImmOp = MI->getOperand(X86::AddrNumOperands);
292  return ImmOp.getImm() == 0 ? Convert : Exit;
293  }
294  case X86::OR16mi8:
295  case X86::OR32mi8:
296  case X86::OR64mi8: {
297  MachineOperand ImmOp = MI->getOperand(X86::AddrNumOperands);
298  return ImmOp.getImm() == -1 ? Convert : Exit;
299  }
300  case X86::MOV32mi:
301  case X86::MOV32mr:
302  case X86::MOV64mi32:
303  case X86::MOV64mr:
304  return Convert;
305  }
306 
307  // Not all calling conventions have only stack MOVs between the stack
308  // adjust and the call.
309 
310  // We want to tolerate other instructions, to cover more cases.
311  // In particular:
312  // a) PCrel calls, where we expect an additional COPY of the basereg.
313  // b) Passing frame-index addresses.
314  // c) Calling conventions that have inreg parameters. These generate
315  // both copies and movs into registers.
316  // To avoid creating lots of special cases, allow any instruction
317  // that does not write into memory, does not def or use the stack
318  // pointer, and does not def any register that was used by a preceding
319  // push.
320  // (Reading from memory is allowed, even if referenced through a
321  // frame index, since these will get adjusted properly in PEI)
322 
323  // The reason for the last condition is that the pushes can't replace
324  // the movs in place, because the order must be reversed.
325  // So if we have a MOV32mr that uses EDX, then an instruction that defs
326  // EDX, and then the call, after the transformation the push will use
327  // the modified version of EDX, and not the original one.
328  // Since we are still in SSA form at this point, we only need to
329  // make sure we don't clobber any *physical* registers that were
330  // used by an earlier mov that will become a push.
331 
332  if (MI->isCall() || MI->mayStore())
333  return Exit;
334 
335  for (const MachineOperand &MO : MI->operands()) {
336  if (!MO.isReg())
337  continue;
338  Register Reg = MO.getReg();
340  continue;
341  if (RegInfo.regsOverlap(Reg, RegInfo.getStackRegister()))
342  return Exit;
343  if (MO.isDef()) {
344  for (unsigned int U : UsedRegs)
345  if (RegInfo.regsOverlap(Reg, U))
346  return Exit;
347  }
348  }
349 
350  return Skip;
351 }
352 
353 void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
354  MachineBasicBlock &MBB,
356  CallContext &Context) {
357  // Check that this particular call sequence is amenable to the
358  // transformation.
359  const X86RegisterInfo &RegInfo =
360  *static_cast<const X86RegisterInfo *>(STI->getRegisterInfo());
361 
362  // We expect to enter this at the beginning of a call sequence
363  assert(I->getOpcode() == TII->getCallFrameSetupOpcode());
364  MachineBasicBlock::iterator FrameSetup = I++;
365  Context.FrameSetup = FrameSetup;
366 
367  // How much do we adjust the stack? This puts an upper bound on
368  // the number of parameters actually passed on it.
369  unsigned int MaxAdjust = TII->getFrameSize(*FrameSetup) >> Log2SlotSize;
370 
371  // A zero adjustment means no stack parameters
372  if (!MaxAdjust) {
373  Context.NoStackParams = true;
374  return;
375  }
376 
377  // Skip over DEBUG_VALUE.
378  // For globals in PIC mode, we can have some LEAs here. Skip them as well.
379  // TODO: Extend this to something that covers more cases.
380  while (I->getOpcode() == X86::LEA32r || I->isDebugInstr())
381  ++I;
382 
383  Register StackPtr = RegInfo.getStackRegister();
384  auto StackPtrCopyInst = MBB.end();
385  // SelectionDAG (but not FastISel) inserts a copy of ESP into a virtual
386  // register. If it's there, use that virtual register as stack pointer
387  // instead. Also, we need to locate this instruction so that we can later
388  // safely ignore it while doing the conservative processing of the call chain.
389  // The COPY can be located anywhere between the call-frame setup
390  // instruction and its first use. We use the call instruction as a boundary
391  // because it is usually cheaper to check if an instruction is a call than
392  // checking if an instruction uses a register.
393  for (auto J = I; !J->isCall(); ++J)
394  if (J->isCopy() && J->getOperand(0).isReg() && J->getOperand(1).isReg() &&
395  J->getOperand(1).getReg() == StackPtr) {
396  StackPtrCopyInst = J;
397  Context.SPCopy = &*J++;
398  StackPtr = Context.SPCopy->getOperand(0).getReg();
399  break;
400  }
401 
402  // Scan the call setup sequence for the pattern we're looking for.
403  // We only handle a simple case - a sequence of store instructions that
404  // push a sequence of stack-slot-aligned values onto the stack, with
405  // no gaps between them.
406  if (MaxAdjust > 4)
407  Context.ArgStoreVector.resize(MaxAdjust, nullptr);
408 
409  DenseSet<unsigned int> UsedRegs;
410 
411  for (InstClassification Classification = Skip; Classification != Exit; ++I) {
412  // If this is the COPY of the stack pointer, it's ok to ignore.
413  if (I == StackPtrCopyInst)
414  continue;
415  Classification = classifyInstruction(MBB, I, RegInfo, UsedRegs);
416  if (Classification != Convert)
417  continue;
418  // We know the instruction has a supported store opcode.
419  // We only want movs of the form:
420  // mov imm/reg, k(%StackPtr)
421  // If we run into something else, bail.
422  // Note that AddrBaseReg may, counter to its name, not be a register,
423  // but rather a frame index.
424  // TODO: Support the fi case. This should probably work now that we
425  // have the infrastructure to track the stack pointer within a call
426  // sequence.
427  if (!I->getOperand(X86::AddrBaseReg).isReg() ||
428  (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) ||
429  !I->getOperand(X86::AddrScaleAmt).isImm() ||
430  (I->getOperand(X86::AddrScaleAmt).getImm() != 1) ||
431  (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) ||
432  (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) ||
433  !I->getOperand(X86::AddrDisp).isImm())
434  return;
435 
436  int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm();
437  assert(StackDisp >= 0 &&
438  "Negative stack displacement when passing parameters");
439 
440  // We really don't want to consider the unaligned case.
441  if (StackDisp & (SlotSize - 1))
442  return;
443  StackDisp >>= Log2SlotSize;
444 
445  assert((size_t)StackDisp < Context.ArgStoreVector.size() &&
446  "Function call has more parameters than the stack is adjusted for.");
447 
448  // If the same stack slot is being filled twice, something's fishy.
449  if (Context.ArgStoreVector[StackDisp] != nullptr)
450  return;
451  Context.ArgStoreVector[StackDisp] = &*I;
452 
453  for (const MachineOperand &MO : I->uses()) {
454  if (!MO.isReg())
455  continue;
456  Register Reg = MO.getReg();
458  UsedRegs.insert(Reg);
459  }
460  }
461 
462  --I;
463 
464  // We now expect the end of the sequence. If we stopped early,
465  // or reached the end of the block without finding a call, bail.
466  if (I == MBB.end() || !I->isCall())
467  return;
468 
469  Context.Call = &*I;
470  if ((++I)->getOpcode() != TII->getCallFrameDestroyOpcode())
471  return;
472 
473  // Now, go through the vector, and see that we don't have any gaps,
474  // but only a series of storing instructions.
475  auto MMI = Context.ArgStoreVector.begin(), MME = Context.ArgStoreVector.end();
476  for (; MMI != MME; ++MMI, Context.ExpectedDist += SlotSize)
477  if (*MMI == nullptr)
478  break;
479 
480  // If the call had no parameters, do nothing
481  if (MMI == Context.ArgStoreVector.begin())
482  return;
483 
484  // We are either at the last parameter, or a gap.
485  // Make sure it's not a gap
486  for (; MMI != MME; ++MMI)
487  if (*MMI != nullptr)
488  return;
489 
490  Context.UsePush = true;
491 }
492 
493 void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
494  const CallContext &Context) {
495  // Ok, we can in fact do the transformation for this call.
496  // Do not remove the FrameSetup instruction, but adjust the parameters.
497  // PEI will end up finalizing the handling of this.
498  MachineBasicBlock::iterator FrameSetup = Context.FrameSetup;
499  MachineBasicBlock &MBB = *(FrameSetup->getParent());
500  TII->setFrameAdjustment(*FrameSetup, Context.ExpectedDist);
501 
502  DebugLoc DL = FrameSetup->getDebugLoc();
503  bool Is64Bit = STI->is64Bit();
504  // Now, iterate through the vector in reverse order, and replace the store to
505  // stack with pushes. MOVmi/MOVmr doesn't have any defs, so no need to
506  // replace uses.
507  for (int Idx = (Context.ExpectedDist >> Log2SlotSize) - 1; Idx >= 0; --Idx) {
508  MachineBasicBlock::iterator Store = *Context.ArgStoreVector[Idx];
509  MachineOperand PushOp = Store->getOperand(X86::AddrNumOperands);
510  MachineBasicBlock::iterator Push = nullptr;
511  unsigned PushOpcode;
512  switch (Store->getOpcode()) {
513  default:
514  llvm_unreachable("Unexpected Opcode!");
515  case X86::AND16mi8:
516  case X86::AND32mi8:
517  case X86::AND64mi8:
518  case X86::OR16mi8:
519  case X86::OR32mi8:
520  case X86::OR64mi8:
521  case X86::MOV32mi:
522  case X86::MOV64mi32:
523  PushOpcode = Is64Bit ? X86::PUSH64i32 : X86::PUSHi32;
524  // If the operand is a small (8-bit) immediate, we can use a
525  // PUSH instruction with a shorter encoding.
526  // Note that isImm() may fail even though this is a MOVmi, because
527  // the operand can also be a symbol.
528  if (PushOp.isImm()) {
529  int64_t Val = PushOp.getImm();
530  if (isInt<8>(Val))
531  PushOpcode = Is64Bit ? X86::PUSH64i8 : X86::PUSH32i8;
532  }
533  Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)).add(PushOp);
534  break;
535  case X86::MOV32mr:
536  case X86::MOV64mr: {
537  Register Reg = PushOp.getReg();
538 
539  // If storing a 32-bit vreg on 64-bit targets, extend to a 64-bit vreg
540  // in preparation for the PUSH64. The upper 32 bits can be undef.
541  if (Is64Bit && Store->getOpcode() == X86::MOV32mr) {
542  Register UndefReg = MRI->createVirtualRegister(&X86::GR64RegClass);
543  Reg = MRI->createVirtualRegister(&X86::GR64RegClass);
544  BuildMI(MBB, Context.Call, DL, TII->get(X86::IMPLICIT_DEF), UndefReg);
545  BuildMI(MBB, Context.Call, DL, TII->get(X86::INSERT_SUBREG), Reg)
546  .addReg(UndefReg)
547  .add(PushOp)
548  .addImm(X86::sub_32bit);
549  }
550 
551  // If PUSHrmm is not slow on this target, try to fold the source of the
552  // push into the instruction.
553  bool SlowPUSHrmm = STI->isAtom() || STI->isSLM();
554 
555  // Check that this is legal to fold. Right now, we're extremely
556  // conservative about that.
557  MachineInstr *DefMov = nullptr;
558  if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) {
559  PushOpcode = Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm;
560  Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode));
561 
562  unsigned NumOps = DefMov->getDesc().getNumOperands();
563  for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
564  Push->addOperand(DefMov->getOperand(i));
565 
566  DefMov->eraseFromParent();
567  } else {
568  PushOpcode = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
569  Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode))
570  .addReg(Reg)
571  .getInstr();
572  }
573  break;
574  }
575  }
576 
577  // For debugging, when using SP-based CFA, we need to adjust the CFA
578  // offset after each push.
579  // TODO: This is needed only if we require precise CFA.
580  if (!TFL->hasFP(MF))
581  TFL->BuildCFI(
582  MBB, std::next(Push), DL,
583  MCCFIInstruction::createAdjustCfaOffset(nullptr, SlotSize));
584 
585  MBB.erase(Store);
586  }
587 
588  // The stack-pointer copy is no longer used in the call sequences.
589  // There should not be any other users, but we can't commit to that, so:
590  if (Context.SPCopy && MRI->use_empty(Context.SPCopy->getOperand(0).getReg()))
591  Context.SPCopy->eraseFromParent();
592 
593  // Once we've done this, we need to make sure PEI doesn't assume a reserved
594  // frame.
596  FuncInfo->setHasPushSequences(true);
597 }
598 
599 MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush(
600  MachineBasicBlock::iterator FrameSetup, unsigned Reg) {
601  // Do an extremely restricted form of load folding.
602  // ISel will often create patterns like:
603  // movl 4(%edi), %eax
604  // movl 8(%edi), %ecx
605  // movl 12(%edi), %edx
606  // movl %edx, 8(%esp)
607  // movl %ecx, 4(%esp)
608  // movl %eax, (%esp)
609  // call
610  // Get rid of those with prejudice.
611  if (!Register::isVirtualRegister(Reg))
612  return nullptr;
613 
614  // Make sure this is the only use of Reg.
615  if (!MRI->hasOneNonDBGUse(Reg))
616  return nullptr;
617 
618  MachineInstr &DefMI = *MRI->getVRegDef(Reg);
619 
620  // Make sure the def is a MOV from memory.
621  // If the def is in another block, give up.
622  if ((DefMI.getOpcode() != X86::MOV32rm &&
623  DefMI.getOpcode() != X86::MOV64rm) ||
624  DefMI.getParent() != FrameSetup->getParent())
625  return nullptr;
626 
627  // Make sure we don't have any instructions between DefMI and the
628  // push that make folding the load illegal.
629  for (MachineBasicBlock::iterator I = DefMI; I != FrameSetup; ++I)
630  if (I->isLoadFoldBarrier())
631  return nullptr;
632 
633  return &DefMI;
634 }
635 
637  return new X86CallFrameOptimization();
638 }
const MachineInstrBuilder & add(const MachineOperand &MO) const
LLVMContext & Context
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Implements a dense probed hash-table based set.
Definition: DenseSet.h:249
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:63
unsigned Reg
INITIALIZE_PASS(X86CallFrameOptimization, DEBUG_TYPE, "X86 Call Frame Optimization", false, false) bool X86CallFrameOptimization
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:339
A debug info location.
Definition: DebugLoc.h:33
FunctionPass * createX86CallFrameOptimization()
Return a pass that optimizes the code-size of x86 call sequences.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
unsigned getSlotSize() const
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int Adjustment)
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
Definition: MCDwarf.h:501
AddrNumOperands - Total number of operands in a memory reference.
Definition: X86BaseInfo.h:41
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:226
const HexagonInstrInfo * TII
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:410
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:407
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
unsigned const MachineRegisterInfo * MRI
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:465
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
AddrSegmentReg - The operand # of the segment in the memory operand.
Definition: X86BaseInfo.h:38
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
#define DEBUG_TYPE
static uint64_t add(uint64_t LeftOp, uint64_t RightOp)
Definition: FileCheck.cpp:215
MachineOperand class - Representation of each machine instruction operand.
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly. ...
static cl::opt< bool > NoX86CFOpt("no-x86-call-frame-opt", cl::desc("Avoid optimizing x86 call frames for size"), cl::init(false), cl::Hidden)
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:585
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:255
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
Definition: MachineInstr.h:63
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
#define I(x, y, z)
Definition: MD5.cpp:58
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:69
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
Register getReg() const
getReg - Returns the register number.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:415
Register getStackRegister() const
Wrapper class representing virtual and physical registers.
Definition: Register.h:19