LLVM  10.0.0svn
CombinerHelper.cpp
Go to the documentation of this file.
1 //===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
21 
22 #define DEBUG_TYPE "gi-combiner"
23 
24 using namespace llvm;
25 
26 // Option to allow testing of the combiner while no targets know about indexed
27 // addressing.
28 static cl::opt<bool>
29  ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
30  cl::desc("Force all indexed operations to be "
31  "legal for the GlobalISel combiner"));
32 
33 
37  : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer),
38  KB(KB), MDT(MDT) {
39  (void)this->KB;
40 }
41 
43  Register ToReg) const {
44  Observer.changingAllUsesOfReg(MRI, FromReg);
45 
46  if (MRI.constrainRegAttrs(ToReg, FromReg))
47  MRI.replaceRegWith(FromReg, ToReg);
48  else
49  Builder.buildCopy(ToReg, FromReg);
50 
52 }
53 
55  MachineOperand &FromRegOp,
56  Register ToReg) const {
57  assert(FromRegOp.getParent() && "Expected an operand in an MI");
58  Observer.changingInstr(*FromRegOp.getParent());
59 
60  FromRegOp.setReg(ToReg);
61 
62  Observer.changedInstr(*FromRegOp.getParent());
63 }
64 
66  if (matchCombineCopy(MI)) {
67  applyCombineCopy(MI);
68  return true;
69  }
70  return false;
71 }
73  if (MI.getOpcode() != TargetOpcode::COPY)
74  return false;
75  Register DstReg = MI.getOperand(0).getReg();
76  Register SrcReg = MI.getOperand(1).getReg();
77  LLT DstTy = MRI.getType(DstReg);
78  LLT SrcTy = MRI.getType(SrcReg);
79  // Simple Copy Propagation.
80  // a(sx) = COPY b(sx) -> Replace all uses of a with b.
81  if (DstTy.isValid() && SrcTy.isValid() && DstTy == SrcTy)
82  return true;
83  return false;
84 }
86  Register DstReg = MI.getOperand(0).getReg();
87  Register SrcReg = MI.getOperand(1).getReg();
88  MI.eraseFromParent();
89  replaceRegWith(MRI, DstReg, SrcReg);
90 }
91 
92 namespace {
93 
94 /// Select a preference between two uses. CurrentUse is the current preference
95 /// while *ForCandidate is attributes of the candidate under consideration.
96 PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
97  const LLT &TyForCandidate,
98  unsigned OpcodeForCandidate,
99  MachineInstr *MIForCandidate) {
100  if (!CurrentUse.Ty.isValid()) {
101  if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
102  CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
103  return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
104  return CurrentUse;
105  }
106 
107  // We permit the extend to hoist through basic blocks but this is only
108  // sensible if the target has extending loads. If you end up lowering back
109  // into a load and extend during the legalizer then the end result is
110  // hoisting the extend up to the load.
111 
112  // Prefer defined extensions to undefined extensions as these are more
113  // likely to reduce the number of instructions.
114  if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
115  CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
116  return CurrentUse;
117  else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
118  OpcodeForCandidate != TargetOpcode::G_ANYEXT)
119  return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
120 
121  // Prefer sign extensions to zero extensions as sign-extensions tend to be
122  // more expensive.
123  if (CurrentUse.Ty == TyForCandidate) {
124  if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
125  OpcodeForCandidate == TargetOpcode::G_ZEXT)
126  return CurrentUse;
127  else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
128  OpcodeForCandidate == TargetOpcode::G_SEXT)
129  return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
130  }
131 
132  // This is potentially target specific. We've chosen the largest type
133  // because G_TRUNC is usually free. One potential catch with this is that
134  // some targets have a reduced number of larger registers than smaller
135  // registers and this choice potentially increases the live-range for the
136  // larger value.
137  if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
138  return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
139  }
140  return CurrentUse;
141 }
142 
143 /// Find a suitable place to insert some instructions and insert them. This
144 /// function accounts for special cases like inserting before a PHI node.
145 /// The current strategy for inserting before PHI's is to duplicate the
146 /// instructions for each predecessor. However, while that's ok for G_TRUNC
147 /// on most targets since it generally requires no code, other targets/cases may
148 /// want to try harder to find a dominating block.
149 static void InsertInsnsWithoutSideEffectsBeforeUse(
152  MachineOperand &UseMO)>
153  Inserter) {
154  MachineInstr &UseMI = *UseMO.getParent();
155 
156  MachineBasicBlock *InsertBB = UseMI.getParent();
157 
158  // If the use is a PHI then we want the predecessor block instead.
159  if (UseMI.isPHI()) {
160  MachineOperand *PredBB = std::next(&UseMO);
161  InsertBB = PredBB->getMBB();
162  }
163 
164  // If the block is the same block as the def then we want to insert just after
165  // the def instead of at the start of the block.
166  if (InsertBB == DefMI.getParent()) {
168  Inserter(InsertBB, std::next(InsertPt), UseMO);
169  return;
170  }
171 
172  // Otherwise we want the start of the BB
173  Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
174 }
175 } // end anonymous namespace
176 
178  PreferredTuple Preferred;
179  if (matchCombineExtendingLoads(MI, Preferred)) {
180  applyCombineExtendingLoads(MI, Preferred);
181  return true;
182  }
183  return false;
184 }
185 
187  PreferredTuple &Preferred) {
188  // We match the loads and follow the uses to the extend instead of matching
189  // the extends and following the def to the load. This is because the load
190  // must remain in the same position for correctness (unless we also add code
191  // to find a safe place to sink it) whereas the extend is freely movable.
192  // It also prevents us from duplicating the load for the volatile case or just
193  // for performance.
194 
195  if (MI.getOpcode() != TargetOpcode::G_LOAD &&
196  MI.getOpcode() != TargetOpcode::G_SEXTLOAD &&
197  MI.getOpcode() != TargetOpcode::G_ZEXTLOAD)
198  return false;
199 
200  auto &LoadValue = MI.getOperand(0);
201  assert(LoadValue.isReg() && "Result wasn't a register?");
202 
203  LLT LoadValueTy = MRI.getType(LoadValue.getReg());
204  if (!LoadValueTy.isScalar())
205  return false;
206 
207  // Most architectures are going to legalize <s8 loads into at least a 1 byte
208  // load, and the MMOs can only describe memory accesses in multiples of bytes.
209  // If we try to perform extload combining on those, we can end up with
210  // %a(s8) = extload %ptr (load 1 byte from %ptr)
211  // ... which is an illegal extload instruction.
212  if (LoadValueTy.getSizeInBits() < 8)
213  return false;
214 
215  // For non power-of-2 types, they will very likely be legalized into multiple
216  // loads. Don't bother trying to match them into extending loads.
217  if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))
218  return false;
219 
220  // Find the preferred type aside from the any-extends (unless it's the only
221  // one) and non-extending ops. We'll emit an extending load to that type and
222  // and emit a variant of (extend (trunc X)) for the others according to the
223  // relative type sizes. At the same time, pick an extend to use based on the
224  // extend involved in the chosen type.
225  unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD
226  ? TargetOpcode::G_ANYEXT
227  : MI.getOpcode() == TargetOpcode::G_SEXTLOAD
228  ? TargetOpcode::G_SEXT
229  : TargetOpcode::G_ZEXT;
230  Preferred = {LLT(), PreferredOpcode, nullptr};
231  for (auto &UseMI : MRI.use_instructions(LoadValue.getReg())) {
232  if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
233  UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
234  UseMI.getOpcode() == TargetOpcode::G_ANYEXT) {
235  Preferred = ChoosePreferredUse(Preferred,
236  MRI.getType(UseMI.getOperand(0).getReg()),
237  UseMI.getOpcode(), &UseMI);
238  }
239  }
240 
241  // There were no extends
242  if (!Preferred.MI)
243  return false;
244  // It should be impossible to chose an extend without selecting a different
245  // type since by definition the result of an extend is larger.
246  assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
247 
248  LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
249  return true;
250 }
251 
253  PreferredTuple &Preferred) {
254  // Rewrite the load to the chosen extending load.
255  Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
256 
257  // Inserter to insert a truncate back to the original type at a given point
258  // with some basic CSE to limit truncate duplication to one per BB.
260  auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
261  MachineBasicBlock::iterator InsertBefore,
262  MachineOperand &UseMO) {
263  MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
264  if (PreviouslyEmitted) {
265  Observer.changingInstr(*UseMO.getParent());
266  UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
267  Observer.changedInstr(*UseMO.getParent());
268  return;
269  }
270 
271  Builder.setInsertPt(*InsertIntoBB, InsertBefore);
272  Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
273  MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
274  EmittedInsns[InsertIntoBB] = NewMI;
275  replaceRegOpWith(MRI, UseMO, NewDstReg);
276  };
277 
279  MI.setDesc(
280  Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT
281  ? TargetOpcode::G_SEXTLOAD
282  : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT
283  ? TargetOpcode::G_ZEXTLOAD
284  : TargetOpcode::G_LOAD));
285 
286  // Rewrite all the uses to fix up the types.
287  auto &LoadValue = MI.getOperand(0);
289  for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
290  Uses.push_back(&UseMO);
291 
292  for (auto *UseMO : Uses) {
293  MachineInstr *UseMI = UseMO->getParent();
294 
295  // If the extend is compatible with the preferred extend then we should fix
296  // up the type and extend so that it uses the preferred use.
297  if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
298  UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
299  Register UseDstReg = UseMI->getOperand(0).getReg();
300  MachineOperand &UseSrcMO = UseMI->getOperand(1);
301  const LLT &UseDstTy = MRI.getType(UseDstReg);
302  if (UseDstReg != ChosenDstReg) {
303  if (Preferred.Ty == UseDstTy) {
304  // If the use has the same type as the preferred use, then merge
305  // the vregs and erase the extend. For example:
306  // %1:_(s8) = G_LOAD ...
307  // %2:_(s32) = G_SEXT %1(s8)
308  // %3:_(s32) = G_ANYEXT %1(s8)
309  // ... = ... %3(s32)
310  // rewrites to:
311  // %2:_(s32) = G_SEXTLOAD ...
312  // ... = ... %2(s32)
313  replaceRegWith(MRI, UseDstReg, ChosenDstReg);
314  Observer.erasingInstr(*UseMO->getParent());
315  UseMO->getParent()->eraseFromParent();
316  } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
317  // If the preferred size is smaller, then keep the extend but extend
318  // from the result of the extending load. For example:
319  // %1:_(s8) = G_LOAD ...
320  // %2:_(s32) = G_SEXT %1(s8)
321  // %3:_(s64) = G_ANYEXT %1(s8)
322  // ... = ... %3(s64)
323  /// rewrites to:
324  // %2:_(s32) = G_SEXTLOAD ...
325  // %3:_(s64) = G_ANYEXT %2:_(s32)
326  // ... = ... %3(s64)
327  replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
328  } else {
329  // If the preferred size is large, then insert a truncate. For
330  // example:
331  // %1:_(s8) = G_LOAD ...
332  // %2:_(s64) = G_SEXT %1(s8)
333  // %3:_(s32) = G_ZEXT %1(s8)
334  // ... = ... %3(s32)
335  /// rewrites to:
336  // %2:_(s64) = G_SEXTLOAD ...
337  // %4:_(s8) = G_TRUNC %2:_(s32)
338  // %3:_(s64) = G_ZEXT %2:_(s8)
339  // ... = ... %3(s64)
340  InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
341  InsertTruncAt);
342  }
343  continue;
344  }
345  // The use is (one of) the uses of the preferred use we chose earlier.
346  // We're going to update the load to def this value later so just erase
347  // the old extend.
348  Observer.erasingInstr(*UseMO->getParent());
349  UseMO->getParent()->eraseFromParent();
350  continue;
351  }
352 
353  // The use isn't an extend. Truncate back to the type we originally loaded.
354  // This is free on many targets.
355  InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
356  }
357 
358  MI.getOperand(0).setReg(ChosenDstReg);
360 }
361 
363  assert(DefMI.getParent() == UseMI.getParent());
364  if (&DefMI == &UseMI)
365  return false;
366 
367  // Loop through the basic block until we find one of the instructions.
369  for (; &*I != &DefMI && &*I != &UseMI; ++I)
370  return &*I == &DefMI;
371 
372  llvm_unreachable("Block must contain instructions");
373 }
374 
376  if (MDT)
377  return MDT->dominates(&DefMI, &UseMI);
378  else if (DefMI.getParent() != UseMI.getParent())
379  return false;
380 
381  return isPredecessor(DefMI, UseMI);
382 }
383 
384 bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
386  auto &MF = *MI.getParent()->getParent();
387  const auto &TLI = *MF.getSubtarget().getTargetLowering();
388 
389 #ifndef NDEBUG
390  unsigned Opcode = MI.getOpcode();
391  assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
392  Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
393 #endif
394 
395  Base = MI.getOperand(1).getReg();
396  MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base);
397  if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
398  return false;
399 
400  LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
401 
402  for (auto &Use : MRI.use_instructions(Base)) {
403  if (Use.getOpcode() != TargetOpcode::G_GEP)
404  continue;
405 
406  Offset = Use.getOperand(2).getReg();
407  if (!ForceLegalIndexing &&
408  !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) {
409  LLVM_DEBUG(dbgs() << " Ignoring candidate with illegal addrmode: "
410  << Use);
411  continue;
412  }
413 
414  // Make sure the offset calculation is before the potentially indexed op.
415  // FIXME: we really care about dependency here. The offset calculation might
416  // be movable.
417  MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset);
418  if (!OffsetDef || !dominates(*OffsetDef, MI)) {
419  LLVM_DEBUG(dbgs() << " Ignoring candidate with offset after mem-op: "
420  << Use);
421  continue;
422  }
423 
424  // FIXME: check whether all uses of Base are load/store with foldable
425  // addressing modes. If so, using the normal addr-modes is better than
426  // forming an indexed one.
427 
428  bool MemOpDominatesAddrUses = true;
429  for (auto &GEPUse : MRI.use_instructions(Use.getOperand(0).getReg())) {
430  if (!dominates(MI, GEPUse)) {
431  MemOpDominatesAddrUses = false;
432  break;
433  }
434  }
435 
436  if (!MemOpDominatesAddrUses) {
437  LLVM_DEBUG(
438  dbgs() << " Ignoring candidate as memop does not dominate uses: "
439  << Use);
440  continue;
441  }
442 
443  LLVM_DEBUG(dbgs() << " Found match: " << Use);
444  Addr = Use.getOperand(0).getReg();
445  return true;
446  }
447 
448  return false;
449 }
450 
451 bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
452  Register &Base, Register &Offset) {
453  auto &MF = *MI.getParent()->getParent();
454  const auto &TLI = *MF.getSubtarget().getTargetLowering();
455 
456 #ifndef NDEBUG
457  unsigned Opcode = MI.getOpcode();
458  assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
459  Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
460 #endif
461 
462  Addr = MI.getOperand(1).getReg();
463  MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_GEP, Addr, MRI);
464  if (!AddrDef || MRI.hasOneUse(Addr))
465  return false;
466 
467  Base = AddrDef->getOperand(1).getReg();
468  Offset = AddrDef->getOperand(2).getReg();
469 
470  LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI);
471 
472  if (!ForceLegalIndexing &&
473  !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) {
474  LLVM_DEBUG(dbgs() << " Skipping, not legal for target");
475  return false;
476  }
477 
478  MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
479  if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
480  LLVM_DEBUG(dbgs() << " Skipping, frame index would need copy anyway.");
481  return false;
482  }
483 
484  if (MI.getOpcode() == TargetOpcode::G_STORE) {
485  // Would require a copy.
486  if (Base == MI.getOperand(0).getReg()) {
487  LLVM_DEBUG(dbgs() << " Skipping, storing base so need copy anyway.");
488  return false;
489  }
490 
491  // We're expecting one use of Addr in MI, but it could also be the
492  // value stored, which isn't actually dominated by the instruction.
493  if (MI.getOperand(0).getReg() == Addr) {
494  LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses");
495  return false;
496  }
497  }
498 
499  // FIXME: check whether all uses of the base pointer are constant GEPs. That
500  // might allow us to end base's liveness here by adjusting the constant.
501 
502  for (auto &UseMI : MRI.use_instructions(Addr)) {
503  if (!dominates(MI, UseMI)) {
504  LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses.");
505  return false;
506  }
507  }
508 
509  return true;
510 }
511 
513  unsigned Opcode = MI.getOpcode();
514  if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
515  Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
516  return false;
517 
518  bool IsStore = Opcode == TargetOpcode::G_STORE;
519  Register Addr, Base, Offset;
520  bool IsPre = findPreIndexCandidate(MI, Addr, Base, Offset);
521  if (!IsPre && !findPostIndexCandidate(MI, Addr, Base, Offset))
522  return false;
523 
524 
525  unsigned NewOpcode;
526  switch (Opcode) {
527  case TargetOpcode::G_LOAD:
528  NewOpcode = TargetOpcode::G_INDEXED_LOAD;
529  break;
530  case TargetOpcode::G_SEXTLOAD:
531  NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
532  break;
533  case TargetOpcode::G_ZEXTLOAD:
534  NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
535  break;
536  case TargetOpcode::G_STORE:
537  NewOpcode = TargetOpcode::G_INDEXED_STORE;
538  break;
539  default:
540  llvm_unreachable("Unknown load/store opcode");
541  }
542 
543  MachineInstr &AddrDef = *MRI.getUniqueVRegDef(Addr);
544  MachineIRBuilder MIRBuilder(MI);
545  auto MIB = MIRBuilder.buildInstr(NewOpcode);
546  if (IsStore) {
547  MIB.addDef(Addr);
548  MIB.addUse(MI.getOperand(0).getReg());
549  } else {
550  MIB.addDef(MI.getOperand(0).getReg());
551  MIB.addDef(Addr);
552  }
553 
554  MIB.addUse(Base);
555  MIB.addUse(Offset);
556  MIB.addImm(IsPre);
557  MI.eraseFromParent();
558  AddrDef.eraseFromParent();
559 
560  LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
561  return true;
562 }
563 
565  assert(MI.getOpcode() == TargetOpcode::G_BR && "Expected a G_BR");
566  // Try to match the following:
567  // bb1:
568  // %c(s32) = G_ICMP pred, %a, %b
569  // %c1(s1) = G_TRUNC %c(s32)
570  // G_BRCOND %c1, %bb2
571  // G_BR %bb3
572  // bb2:
573  // ...
574  // bb3:
575 
576  // The above pattern does not have a fall through to the successor bb2, always
577  // resulting in a branch no matter which path is taken. Here we try to find
578  // and replace that pattern with conditional branch to bb3 and otherwise
579  // fallthrough to bb2.
580 
581  MachineBasicBlock *MBB = MI.getParent();
583  if (BrIt == MBB->begin())
584  return false;
585  assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
586 
587  MachineInstr *BrCond = &*std::prev(BrIt);
588  if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
589  return false;
590 
591  // Check that the next block is the conditional branch target.
592  if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB()))
593  return false;
594 
595  MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
596  if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP ||
597  !MRI.hasOneUse(CmpMI->getOperand(0).getReg()))
598  return false;
599  return true;
600 }
601 
603  if (!matchCombineBr(MI))
604  return false;
605  MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
607  MachineInstr *BrCond = &*std::prev(BrIt);
608  MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
609 
612 
613  // Invert the G_ICMP condition.
614  Observer.changingInstr(*CmpMI);
615  CmpMI->getOperand(1).setPredicate(InversePred);
616  Observer.changedInstr(*CmpMI);
617 
618  // Change the conditional branch target.
619  Observer.changingInstr(*BrCond);
620  BrCond->getOperand(1).setMBB(BrTarget);
621  Observer.changedInstr(*BrCond);
622  MI.eraseFromParent();
623  return true;
624 }
625 
627  // On Darwin, -Os means optimize for size without hurting performance, so
628  // only really optimize for size when -Oz (MinSize) is used.
629  if (MF.getTarget().getTargetTriple().isOSDarwin())
630  return MF.getFunction().hasMinSize();
631  return MF.getFunction().hasOptSize();
632 }
633 
634 // Returns a list of types to use for memory op lowering in MemOps. A partial
635 // port of findOptimalMemOpLowering in TargetLowering.
637  std::vector<LLT> &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign,
638  unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
639  bool AllowOverlap, unsigned DstAS, unsigned SrcAS,
640  const AttributeList &FuncAttributes, const TargetLowering &TLI) {
641  // If 'SrcAlign' is zero, that means the memory operation does not need to
642  // load the value, i.e. memset or memcpy from constant string. Otherwise,
643  // it's the inferred alignment of the source. 'DstAlign', on the other hand,
644  // is the specified alignment of the memory operation. If it is zero, that
645  // means it's possible to change the alignment of the destination.
646  // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
647  // not need to be loaded.
648  if (SrcAlign != 0 && SrcAlign < DstAlign)
649  return false;
650 
651  LLT Ty = TLI.getOptimalMemOpLLT(Size, DstAlign, SrcAlign, IsMemset,
652  ZeroMemset, MemcpyStrSrc, FuncAttributes);
653 
654  if (Ty == LLT()) {
655  // Use the largest scalar type whose alignment constraints are satisfied.
656  // We only need to check DstAlign here as SrcAlign is always greater or
657  // equal to DstAlign (or zero).
658  Ty = LLT::scalar(64);
659  while (DstAlign && DstAlign < Ty.getSizeInBytes() &&
660  !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, DstAlign))
661  Ty = LLT::scalar(Ty.getSizeInBytes());
662  assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
663  // FIXME: check for the largest legal type we can load/store to.
664  }
665 
666  unsigned NumMemOps = 0;
667  while (Size != 0) {
668  unsigned TySize = Ty.getSizeInBytes();
669  while (TySize > Size) {
670  // For now, only use non-vector load / store's for the left-over pieces.
671  LLT NewTy = Ty;
672  // FIXME: check for mem op safety and legality of the types. Not all of
673  // SDAGisms map cleanly to GISel concepts.
674  if (NewTy.isVector())
675  NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
676  NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
677  unsigned NewTySize = NewTy.getSizeInBytes();
678  assert(NewTySize > 0 && "Could not find appropriate type");
679 
680  // If the new LLT cannot cover all of the remaining bits, then consider
681  // issuing a (or a pair of) unaligned and overlapping load / store.
682  bool Fast;
683  // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
684  MVT VT = getMVTForLLT(Ty);
685  if (NumMemOps && AllowOverlap && NewTySize < Size &&
687  VT, DstAS, DstAlign, MachineMemOperand::MONone, &Fast) &&
688  Fast)
689  TySize = Size;
690  else {
691  Ty = NewTy;
692  TySize = NewTySize;
693  }
694  }
695 
696  if (++NumMemOps > Limit)
697  return false;
698 
699  MemOps.push_back(Ty);
700  Size -= TySize;
701  }
702 
703  return true;
704 }
705 
707  if (Ty.isVector())
709  Ty.getNumElements());
710  return IntegerType::get(C, Ty.getSizeInBits());
711 }
712 
713 // Get a vectorized representation of the memset value operand, GISel edition.
715  MachineRegisterInfo &MRI = *MIB.getMRI();
716  unsigned NumBits = Ty.getScalarSizeInBits();
717  auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
718  if (!Ty.isVector() && ValVRegAndVal) {
719  unsigned KnownVal = ValVRegAndVal->Value;
720  APInt Scalar = APInt(8, KnownVal);
721  APInt SplatVal = APInt::getSplat(NumBits, Scalar);
722  return MIB.buildConstant(Ty, SplatVal).getReg(0);
723  }
724  // FIXME: for vector types create a G_BUILD_VECTOR.
725  if (Ty.isVector())
726  return Register();
727 
728  // Extend the byte value to the larger type, and then multiply by a magic
729  // value 0x010101... in order to replicate it across every byte.
730  LLT ExtType = Ty.getScalarType();
731  auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
732  if (NumBits > 8) {
733  APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
734  auto MagicMI = MIB.buildConstant(ExtType, Magic);
735  Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
736  }
737 
738  assert(ExtType == Ty && "Vector memset value type not supported yet");
739  return Val;
740 }
741 
742 bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, Register Val,
743  unsigned KnownLen, unsigned Align,
744  bool IsVolatile) {
745  auto &MF = *MI.getParent()->getParent();
746  const auto &TLI = *MF.getSubtarget().getTargetLowering();
747  auto &DL = MF.getDataLayout();
749 
750  assert(KnownLen != 0 && "Have a zero length memset length!");
751 
752  bool DstAlignCanChange = false;
753  MachineFrameInfo &MFI = MF.getFrameInfo();
754  bool OptSize = shouldLowerMemFuncForSize(MF);
755 
756  MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
757  if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
758  DstAlignCanChange = true;
759 
760  unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
761  std::vector<LLT> MemOps;
762 
763  const auto &DstMMO = **MI.memoperands_begin();
764  MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
765 
766  auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
767  bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
768 
770  MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Align), 0,
771  /*IsMemset=*/true,
772  /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false,
773  /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(), ~0u,
774  MF.getFunction().getAttributes(), TLI))
775  return false;
776 
777  if (DstAlignCanChange) {
778  // Get an estimate of the type from the LLT.
779  Type *IRTy = getTypeForLLT(MemOps[0], C);
780  unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy);
781  if (NewAlign > Align) {
782  unsigned FI = FIDef->getOperand(1).getIndex();
783  // Give the stack frame object a larger alignment if needed.
784  if (MFI.getObjectAlignment(FI) < NewAlign)
785  MFI.setObjectAlignment(FI, NewAlign);
786  Align = NewAlign;
787  }
788  }
789 
790  MachineIRBuilder MIB(MI);
791  // Find the largest store and generate the bit pattern for it.
792  LLT LargestTy = MemOps[0];
793  for (unsigned i = 1; i < MemOps.size(); i++)
794  if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
795  LargestTy = MemOps[i];
796 
797  // The memset stored value is always defined as an s8, so in order to make it
798  // work with larger store types we need to repeat the bit pattern across the
799  // wider type.
800  Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
801 
802  if (!MemSetValue)
803  return false;
804 
805  // Generate the stores. For each store type in the list, we generate the
806  // matching store of that type to the destination address.
807  LLT PtrTy = MRI.getType(Dst);
808  unsigned DstOff = 0;
809  unsigned Size = KnownLen;
810  for (unsigned I = 0; I < MemOps.size(); I++) {
811  LLT Ty = MemOps[I];
812  unsigned TySize = Ty.getSizeInBytes();
813  if (TySize > Size) {
814  // Issuing an unaligned load / store pair that overlaps with the previous
815  // pair. Adjust the offset accordingly.
816  assert(I == MemOps.size() - 1 && I != 0);
817  DstOff -= TySize - Size;
818  }
819 
820  // If this store is smaller than the largest store see whether we can get
821  // the smaller value for free with a truncate.
822  Register Value = MemSetValue;
823  if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
824  MVT VT = getMVTForLLT(Ty);
825  MVT LargestVT = getMVTForLLT(LargestTy);
826  if (!LargestTy.isVector() && !Ty.isVector() &&
827  TLI.isTruncateFree(LargestVT, VT))
828  Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
829  else
830  Value = getMemsetValue(Val, Ty, MIB);
831  if (!Value)
832  return false;
833  }
834 
835  auto *StoreMMO =
836  MF.getMachineMemOperand(&DstMMO, DstOff, Ty.getSizeInBytes());
837 
838  Register Ptr = Dst;
839  if (DstOff != 0) {
840  auto Offset =
841  MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
842  Ptr = MIB.buildGEP(PtrTy, Dst, Offset).getReg(0);
843  }
844 
845  MIB.buildStore(Value, Ptr, *StoreMMO);
846  DstOff += Ty.getSizeInBytes();
847  Size -= TySize;
848  }
849 
850  MI.eraseFromParent();
851  return true;
852 }
853 
854 
855 bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
856  Register Src, unsigned KnownLen,
857  unsigned DstAlign, unsigned SrcAlign,
858  bool IsVolatile) {
859  auto &MF = *MI.getParent()->getParent();
860  const auto &TLI = *MF.getSubtarget().getTargetLowering();
861  auto &DL = MF.getDataLayout();
863 
864  assert(KnownLen != 0 && "Have a zero length memcpy length!");
865 
866  bool DstAlignCanChange = false;
867  MachineFrameInfo &MFI = MF.getFrameInfo();
868  bool OptSize = shouldLowerMemFuncForSize(MF);
869  unsigned Align = MinAlign(DstAlign, SrcAlign);
870 
871  MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
872  if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
873  DstAlignCanChange = true;
874 
875  // FIXME: infer better src pointer alignment like SelectionDAG does here.
876  // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
877  // if the memcpy is in a tail call position.
878 
879  unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize);
880  std::vector<LLT> MemOps;
881 
882  const auto &DstMMO = **MI.memoperands_begin();
883  const auto &SrcMMO = **std::next(MI.memoperands_begin());
884  MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
885  MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
886 
888  MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Align), SrcAlign,
889  /*IsMemset=*/false,
890  /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
891  /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(),
892  SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI))
893  return false;
894 
895  if (DstAlignCanChange) {
896  // Get an estimate of the type from the LLT.
897  Type *IRTy = getTypeForLLT(MemOps[0], C);
898  unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy);
899 
900  // Don't promote to an alignment that would require dynamic stack
901  // realignment.
902  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
903  if (!TRI->needsStackRealignment(MF))
904  while (NewAlign > Align &&
905  DL.exceedsNaturalStackAlignment(llvm::Align(NewAlign)))
906  NewAlign /= 2;
907 
908  if (NewAlign > Align) {
909  unsigned FI = FIDef->getOperand(1).getIndex();
910  // Give the stack frame object a larger alignment if needed.
911  if (MFI.getObjectAlignment(FI) < NewAlign)
912  MFI.setObjectAlignment(FI, NewAlign);
913  Align = NewAlign;
914  }
915  }
916 
917  LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
918 
919  MachineIRBuilder MIB(MI);
920  // Now we need to emit a pair of load and stores for each of the types we've
921  // collected. I.e. for each type, generate a load from the source pointer of
922  // that type width, and then generate a corresponding store to the dest buffer
923  // of that value loaded. This can result in a sequence of loads and stores
924  // mixed types, depending on what the target specifies as good types to use.
925  unsigned CurrOffset = 0;
926  LLT PtrTy = MRI.getType(Src);
927  unsigned Size = KnownLen;
928  for (auto CopyTy : MemOps) {
929  // Issuing an unaligned load / store pair that overlaps with the previous
930  // pair. Adjust the offset accordingly.
931  if (CopyTy.getSizeInBytes() > Size)
932  CurrOffset -= CopyTy.getSizeInBytes() - Size;
933 
934  // Construct MMOs for the accesses.
935  auto *LoadMMO =
936  MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
937  auto *StoreMMO =
938  MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
939 
940  // Create the load.
941  Register LoadPtr = Src;
943  if (CurrOffset != 0) {
944  Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
945  .getReg(0);
946  LoadPtr = MIB.buildGEP(PtrTy, Src, Offset).getReg(0);
947  }
948  auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
949 
950  // Create the store.
951  Register StorePtr =
952  CurrOffset == 0 ? Dst : MIB.buildGEP(PtrTy, Dst, Offset).getReg(0);
953  MIB.buildStore(LdVal, StorePtr, *StoreMMO);
954  CurrOffset += CopyTy.getSizeInBytes();
955  Size -= CopyTy.getSizeInBytes();
956  }
957 
958  MI.eraseFromParent();
959  return true;
960 }
961 
962 bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
963  Register Src, unsigned KnownLen,
964  unsigned DstAlign, unsigned SrcAlign,
965  bool IsVolatile) {
966  auto &MF = *MI.getParent()->getParent();
967  const auto &TLI = *MF.getSubtarget().getTargetLowering();
968  auto &DL = MF.getDataLayout();
970 
971  assert(KnownLen != 0 && "Have a zero length memmove length!");
972 
973  bool DstAlignCanChange = false;
974  MachineFrameInfo &MFI = MF.getFrameInfo();
975  bool OptSize = shouldLowerMemFuncForSize(MF);
976  unsigned Align = MinAlign(DstAlign, SrcAlign);
977 
978  MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
979  if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
980  DstAlignCanChange = true;
981 
982  unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
983  std::vector<LLT> MemOps;
984 
985  const auto &DstMMO = **MI.memoperands_begin();
986  const auto &SrcMMO = **std::next(MI.memoperands_begin());
987  MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
988  MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
989 
990  // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
991  // to a bug in it's findOptimalMemOpLowering implementation. For now do the
992  // same thing here.
994  MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Align), SrcAlign,
995  /*IsMemset=*/false,
996  /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
997  /*AllowOverlap=*/false, DstPtrInfo.getAddrSpace(),
998  SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI))
999  return false;
1000 
1001  if (DstAlignCanChange) {
1002  // Get an estimate of the type from the LLT.
1003  Type *IRTy = getTypeForLLT(MemOps[0], C);
1004  unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy);
1005 
1006  // Don't promote to an alignment that would require dynamic stack
1007  // realignment.
1008  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1009  if (!TRI->needsStackRealignment(MF))
1010  while (NewAlign > Align &&
1011  DL.exceedsNaturalStackAlignment(llvm::Align(NewAlign)))
1012  NewAlign /= 2;
1013 
1014  if (NewAlign > Align) {
1015  unsigned FI = FIDef->getOperand(1).getIndex();
1016  // Give the stack frame object a larger alignment if needed.
1017  if (MFI.getObjectAlignment(FI) < NewAlign)
1018  MFI.setObjectAlignment(FI, NewAlign);
1019  Align = NewAlign;
1020  }
1021  }
1022 
1023  LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
1024 
1025  MachineIRBuilder MIB(MI);
1026  // Memmove requires that we perform the loads first before issuing the stores.
1027  // Apart from that, this loop is pretty much doing the same thing as the
1028  // memcpy codegen function.
1029  unsigned CurrOffset = 0;
1030  LLT PtrTy = MRI.getType(Src);
1031  SmallVector<Register, 16> LoadVals;
1032  for (auto CopyTy : MemOps) {
1033  // Construct MMO for the load.
1034  auto *LoadMMO =
1035  MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1036 
1037  // Create the load.
1038  Register LoadPtr = Src;
1039  if (CurrOffset != 0) {
1040  auto Offset =
1041  MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1042  LoadPtr = MIB.buildGEP(PtrTy, Src, Offset).getReg(0);
1043  }
1044  LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
1045  CurrOffset += CopyTy.getSizeInBytes();
1046  }
1047 
1048  CurrOffset = 0;
1049  for (unsigned I = 0; I < MemOps.size(); ++I) {
1050  LLT CopyTy = MemOps[I];
1051  // Now store the values loaded.
1052  auto *StoreMMO =
1053  MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1054 
1055  Register StorePtr = Dst;
1056  if (CurrOffset != 0) {
1057  auto Offset =
1058  MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1059  StorePtr = MIB.buildGEP(PtrTy, Dst, Offset).getReg(0);
1060  }
1061  MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
1062  CurrOffset += CopyTy.getSizeInBytes();
1063  }
1064  MI.eraseFromParent();
1065  return true;
1066 }
1067 
1069  // This combine is fairly complex so it's not written with a separate
1070  // matcher function.
1071  assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1073  assert((ID == Intrinsic::memcpy || ID == Intrinsic::memmove ||
1074  ID == Intrinsic::memset) &&
1075  "Expected a memcpy like intrinsic");
1076 
1077  auto MMOIt = MI.memoperands_begin();
1078  const MachineMemOperand *MemOp = *MMOIt;
1079  bool IsVolatile = MemOp->isVolatile();
1080  // Don't try to optimize volatile.
1081  if (IsVolatile)
1082  return false;
1083 
1084  unsigned DstAlign = MemOp->getBaseAlignment();
1085  unsigned SrcAlign = 0;
1086  Register Dst = MI.getOperand(1).getReg();
1087  Register Src = MI.getOperand(2).getReg();
1088  Register Len = MI.getOperand(3).getReg();
1089 
1090  if (ID != Intrinsic::memset) {
1091  assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
1092  MemOp = *(++MMOIt);
1093  SrcAlign = MemOp->getBaseAlignment();
1094  }
1095 
1096  // See if this is a constant length copy
1097  auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
1098  if (!LenVRegAndVal)
1099  return false; // Leave it to the legalizer to lower it to a libcall.
1100  unsigned KnownLen = LenVRegAndVal->Value;
1101 
1102  if (KnownLen == 0) {
1103  MI.eraseFromParent();
1104  return true;
1105  }
1106 
1107  if (MaxLen && KnownLen > MaxLen)
1108  return false;
1109 
1110  if (ID == Intrinsic::memcpy)
1111  return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1112  if (ID == Intrinsic::memmove)
1113  return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1114  if (ID == Intrinsic::memset)
1115  return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
1116  return false;
1117 }
1118 
1120  if (tryCombineCopy(MI))
1121  return true;
1122  if (tryCombineExtendingLoads(MI))
1123  return true;
1125  return true;
1126  return false;
1127 }
uint64_t CallInst * C
bool isOSDarwin() const
isOSDarwin - Is this a "Darwin" OS (OS X, iOS, or watchOS).
Definition: Triple.h:481
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
bool dominates(MachineInstr &DefMI, MachineInstr &UseMI)
Returns true if DefMI dominates UseMI.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo)
MachineBasicBlock * getMBB() const
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Register getReg(unsigned Idx) const
Get the register for the operand index.
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:622
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
unsigned getScalarSizeInBits() const
unsigned getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
bool tryCombine(MachineInstr &MI)
Try to transform MI by using all of the above combine functions.
static cl::opt< bool > ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), cl::desc("Force all indexed operations to be " "legal for the GlobalISel combiner"))
static Type * getTypeForLLT(LLT Ty, LLVMContext &C)
virtual const TargetLowering * getTargetLowering() const
LLT getScalarType() const
bool constrainRegAttrs(unsigned Reg, unsigned ConstrainingReg, unsigned MinNumRegs=0)
Constrain the register class or the register bank of the virtual register Reg (and low-level type) to...
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
unsigned const TargetRegisterInfo * TRI
bool isPHI() const
GISelChangeObserver & Observer
uint64_t getBaseAlignment() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:284
unsigned getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
void applyCombineCopy(MachineInstr &MI)
bool isVector() const
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo)
virtual LLT getOptimalMemOpLLT(uint64_t, unsigned, unsigned, bool, bool, bool, const AttributeList &) const
LLT returning variant.
A description of a memory reference used in the backend.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:831
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
bool tryCombineBr(MachineInstr &MI)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
MachineIRBuilder & Builder
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
void setReg(Register Reg)
Change the register this operand corresponds to.
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
void finishedChangingAllUsesOfReg()
All instructions reported as changing by changingAllUsesOfReg() have finished being changed...
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
Definition: Utils.cpp:416
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:614
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
MachineRegisterInfo * getMRI()
Getter for MRI.
Abstract class that contains various methods for clients to notify about changes. ...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder & UseMI
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, unsigned Align=1, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *=nullptr) const
Determine if the target supports unaligned memory accesses.
Helper class to build MachineInstr.
void setMBB(MachineBasicBlock *MBB)
MachineDominatorTree * MDT
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
bool isValid() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
bool tryCombineExtendingLoads(MachineInstr &MI)
If MI is extend that consumes the result of a load, try to combine it.
const Triple & getTargetTriple() const
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:59
MachineRegisterInfo & MRI
MachineInstrBuilder buildGEP(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert Res = G_GEP Op0, Op1.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
virtual void erasingInstr(MachineInstr &MI)=0
An instruction is about to be erased.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC Op.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:300
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
static const char *const Magic
Definition: Archive.cpp:41
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
bool tryCombineCopy(MachineInstr &MI)
If MI is COPY, try to combine it.
void changingAllUsesOfReg(const MachineRegisterInfo &MRI, unsigned Reg)
All the instructions using the given register are being changed.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:239
Optional< ValueAndVReg > getConstantVRegValWithLookThrough(unsigned VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT (LookThroug...
Definition: Utils.cpp:218
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Build and insert Res = G_MUL Op0, Op1.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:552
iterator_range< use_iterator > use_operands(unsigned Reg) const
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
MachineInstrBuilder MachineInstrBuilder & DefMI
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:577
bool matchCombineCopy(MachineInstr &MI)
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Promote Memory to Register
Definition: Mem2Reg.cpp:109
const TargetInstrInfo & getTII()
MachineInstr * MI
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
This file declares the MachineIRBuilder class.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Class for arbitrary precision integers.
Definition: APInt.h:69
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, bool AllowOverlap, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
void replaceRegWith(unsigned FromReg, unsigned ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
bool isPredecessor(MachineInstr &DefMI, MachineInstr &UseMI)
Returns true if DefMI precedes UseMI or they are the same instruction.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:256
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
void setPredicate(unsigned Predicate)
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
bool hasOneUse(unsigned RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:44
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:609
#define I(x, y, z)
Definition: MD5.cpp:58
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:619
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Optimize memcpy intrinsics et al, e.g.
uint32_t Size
Definition: Profile.cpp:46
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:185
iterator_range< use_instr_iterator > use_instructions(unsigned Reg) const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getIntrinsicID() const
Returns the Intrinsic::ID for this instruction.
uint64_t PowerOf2Floor(uint64_t A)
Returns the power of two which is less than or equal to the given value.
Definition: MathExtras.h:656
bool needsStackRealignment(const MachineFunction &MF) const
True if storage within the function requires the stack pointer to be aligned more than the normal cal...
LLVM Value Representation.
Definition: Value.h:73
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
bool matchCombineBr(MachineInstr &MI)
print Print MemDeps of function
IRTranslator LLVM IR MI
Register getReg() const
getReg - Returns the register number.
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, GISelKnownBits *KB=nullptr, MachineDominatorTree *MDT=nullptr)
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
void setObjectAlignment(int ObjectIdx, unsigned Align)
setObjectAlignment - Change the alignment of the specified stack object.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
This file describes how to lower LLVM code to machine code.
mmo_iterator memoperands_end() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:559
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes...
unsigned getPredicate() const
bool tryCombineIndexedLoadStore(MachineInstr &MI)
Combine MI into a pre-indexed or post-indexed load/store operation if legal and the surrounding code ...