LLVM  10.0.0svn
CombinerHelper.cpp
Go to the documentation of this file.
1 //===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
21 
22 #define DEBUG_TYPE "gi-combiner"
23 
24 using namespace llvm;
25 
26 // Option to allow testing of the combiner while no targets know about indexed
27 // addressing.
28 static cl::opt<bool>
29  ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
30  cl::desc("Force all indexed operations to be "
31  "legal for the GlobalISel combiner"));
32 
33 
37  : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer),
38  KB(KB), MDT(MDT) {
39  (void)this->KB;
40 }
41 
43  Register ToReg) const {
44  Observer.changingAllUsesOfReg(MRI, FromReg);
45 
46  if (MRI.constrainRegAttrs(ToReg, FromReg))
47  MRI.replaceRegWith(FromReg, ToReg);
48  else
49  Builder.buildCopy(ToReg, FromReg);
50 
52 }
53 
55  MachineOperand &FromRegOp,
56  Register ToReg) const {
57  assert(FromRegOp.getParent() && "Expected an operand in an MI");
58  Observer.changingInstr(*FromRegOp.getParent());
59 
60  FromRegOp.setReg(ToReg);
61 
62  Observer.changedInstr(*FromRegOp.getParent());
63 }
64 
66  if (matchCombineCopy(MI)) {
67  applyCombineCopy(MI);
68  return true;
69  }
70  return false;
71 }
73  if (MI.getOpcode() != TargetOpcode::COPY)
74  return false;
75  Register DstReg = MI.getOperand(0).getReg();
76  Register SrcReg = MI.getOperand(1).getReg();
77  LLT DstTy = MRI.getType(DstReg);
78  LLT SrcTy = MRI.getType(SrcReg);
79  // Simple Copy Propagation.
80  // a(sx) = COPY b(sx) -> Replace all uses of a with b.
81  if (DstTy.isValid() && SrcTy.isValid() && DstTy == SrcTy)
82  return true;
83  return false;
84 }
86  Register DstReg = MI.getOperand(0).getReg();
87  Register SrcReg = MI.getOperand(1).getReg();
88  MI.eraseFromParent();
89  replaceRegWith(MRI, DstReg, SrcReg);
90 }
91 
93  bool IsUndef = false;
95  if (matchCombineConcatVectors(MI, IsUndef, Ops)) {
96  applyCombineConcatVectors(MI, IsUndef, Ops);
97  return true;
98  }
99  return false;
100 }
101 
104  assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
105  "Invalid instruction");
106  IsUndef = true;
107  MachineInstr *Undef = nullptr;
108 
109  // Walk over all the operands of concat vectors and check if they are
110  // build_vector themselves or undef.
111  // Then collect their operands in Ops.
112  for (const MachineOperand &MO : MI.operands()) {
113  // Skip the instruction definition.
114  if (MO.isDef())
115  continue;
116  Register Reg = MO.getReg();
117  MachineInstr *Def = MRI.getVRegDef(Reg);
118  assert(Def && "Operand not defined");
119  switch (Def->getOpcode()) {
120  case TargetOpcode::G_BUILD_VECTOR:
121  IsUndef = false;
122  // Remember the operands of the build_vector to fold
123  // them into the yet-to-build flattened concat vectors.
124  for (const MachineOperand &BuildVecMO : Def->operands()) {
125  // Skip the definition.
126  if (BuildVecMO.isDef())
127  continue;
128  Ops.push_back(BuildVecMO.getReg());
129  }
130  break;
131  case TargetOpcode::G_IMPLICIT_DEF: {
132  LLT OpType = MRI.getType(Reg);
133  // Keep one undef value for all the undef operands.
134  if (!Undef) {
136  Undef = Builder.buildUndef(OpType.getScalarType());
137  }
138  assert(MRI.getType(Undef->getOperand(0).getReg()) ==
139  OpType.getScalarType() &&
140  "All undefs should have the same type");
141  // Break the undef vector in as many scalar elements as needed
142  // for the flattening.
143  for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
144  EltIdx != EltEnd; ++EltIdx)
145  Ops.push_back(Undef->getOperand(0).getReg());
146  break;
147  }
148  default:
149  return false;
150  }
151  }
152  return true;
153 }
155  MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) {
156  // We determined that the concat_vectors can be flatten.
157  // Generate the flattened build_vector.
158  Register DstReg = MI.getOperand(0).getReg();
160  Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
161 
162  // Note: IsUndef is sort of redundant. We could have determine it by
163  // checking that at all Ops are undef. Alternatively, we could have
164  // generate a build_vector of undefs and rely on another combine to
165  // clean that up. For now, given we already gather this information
166  // in tryCombineConcatVectors, just save compile time and issue the
167  // right thing.
168  if (IsUndef)
169  Builder.buildUndef(NewDstReg);
170  else
171  Builder.buildBuildVector(NewDstReg, Ops);
172  MI.eraseFromParent();
173  replaceRegWith(MRI, DstReg, NewDstReg);
174 }
175 
176 namespace {
177 
178 /// Select a preference between two uses. CurrentUse is the current preference
179 /// while *ForCandidate is attributes of the candidate under consideration.
180 PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
181  const LLT &TyForCandidate,
182  unsigned OpcodeForCandidate,
183  MachineInstr *MIForCandidate) {
184  if (!CurrentUse.Ty.isValid()) {
185  if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
186  CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
187  return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
188  return CurrentUse;
189  }
190 
191  // We permit the extend to hoist through basic blocks but this is only
192  // sensible if the target has extending loads. If you end up lowering back
193  // into a load and extend during the legalizer then the end result is
194  // hoisting the extend up to the load.
195 
196  // Prefer defined extensions to undefined extensions as these are more
197  // likely to reduce the number of instructions.
198  if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
199  CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
200  return CurrentUse;
201  else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
202  OpcodeForCandidate != TargetOpcode::G_ANYEXT)
203  return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
204 
205  // Prefer sign extensions to zero extensions as sign-extensions tend to be
206  // more expensive.
207  if (CurrentUse.Ty == TyForCandidate) {
208  if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
209  OpcodeForCandidate == TargetOpcode::G_ZEXT)
210  return CurrentUse;
211  else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
212  OpcodeForCandidate == TargetOpcode::G_SEXT)
213  return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
214  }
215 
216  // This is potentially target specific. We've chosen the largest type
217  // because G_TRUNC is usually free. One potential catch with this is that
218  // some targets have a reduced number of larger registers than smaller
219  // registers and this choice potentially increases the live-range for the
220  // larger value.
221  if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
222  return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
223  }
224  return CurrentUse;
225 }
226 
227 /// Find a suitable place to insert some instructions and insert them. This
228 /// function accounts for special cases like inserting before a PHI node.
229 /// The current strategy for inserting before PHI's is to duplicate the
230 /// instructions for each predecessor. However, while that's ok for G_TRUNC
231 /// on most targets since it generally requires no code, other targets/cases may
232 /// want to try harder to find a dominating block.
233 static void InsertInsnsWithoutSideEffectsBeforeUse(
236  MachineOperand &UseMO)>
237  Inserter) {
238  MachineInstr &UseMI = *UseMO.getParent();
239 
240  MachineBasicBlock *InsertBB = UseMI.getParent();
241 
242  // If the use is a PHI then we want the predecessor block instead.
243  if (UseMI.isPHI()) {
244  MachineOperand *PredBB = std::next(&UseMO);
245  InsertBB = PredBB->getMBB();
246  }
247 
248  // If the block is the same block as the def then we want to insert just after
249  // the def instead of at the start of the block.
250  if (InsertBB == DefMI.getParent()) {
252  Inserter(InsertBB, std::next(InsertPt), UseMO);
253  return;
254  }
255 
256  // Otherwise we want the start of the BB
257  Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
258 }
259 } // end anonymous namespace
260 
262  PreferredTuple Preferred;
263  if (matchCombineExtendingLoads(MI, Preferred)) {
264  applyCombineExtendingLoads(MI, Preferred);
265  return true;
266  }
267  return false;
268 }
269 
271  PreferredTuple &Preferred) {
272  // We match the loads and follow the uses to the extend instead of matching
273  // the extends and following the def to the load. This is because the load
274  // must remain in the same position for correctness (unless we also add code
275  // to find a safe place to sink it) whereas the extend is freely movable.
276  // It also prevents us from duplicating the load for the volatile case or just
277  // for performance.
278 
279  if (MI.getOpcode() != TargetOpcode::G_LOAD &&
280  MI.getOpcode() != TargetOpcode::G_SEXTLOAD &&
281  MI.getOpcode() != TargetOpcode::G_ZEXTLOAD)
282  return false;
283 
284  auto &LoadValue = MI.getOperand(0);
285  assert(LoadValue.isReg() && "Result wasn't a register?");
286 
287  LLT LoadValueTy = MRI.getType(LoadValue.getReg());
288  if (!LoadValueTy.isScalar())
289  return false;
290 
291  // Most architectures are going to legalize <s8 loads into at least a 1 byte
292  // load, and the MMOs can only describe memory accesses in multiples of bytes.
293  // If we try to perform extload combining on those, we can end up with
294  // %a(s8) = extload %ptr (load 1 byte from %ptr)
295  // ... which is an illegal extload instruction.
296  if (LoadValueTy.getSizeInBits() < 8)
297  return false;
298 
299  // For non power-of-2 types, they will very likely be legalized into multiple
300  // loads. Don't bother trying to match them into extending loads.
301  if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))
302  return false;
303 
304  // Find the preferred type aside from the any-extends (unless it's the only
305  // one) and non-extending ops. We'll emit an extending load to that type and
306  // and emit a variant of (extend (trunc X)) for the others according to the
307  // relative type sizes. At the same time, pick an extend to use based on the
308  // extend involved in the chosen type.
309  unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD
310  ? TargetOpcode::G_ANYEXT
311  : MI.getOpcode() == TargetOpcode::G_SEXTLOAD
312  ? TargetOpcode::G_SEXT
313  : TargetOpcode::G_ZEXT;
314  Preferred = {LLT(), PreferredOpcode, nullptr};
315  for (auto &UseMI : MRI.use_instructions(LoadValue.getReg())) {
316  if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
317  UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
318  UseMI.getOpcode() == TargetOpcode::G_ANYEXT) {
319  Preferred = ChoosePreferredUse(Preferred,
320  MRI.getType(UseMI.getOperand(0).getReg()),
321  UseMI.getOpcode(), &UseMI);
322  }
323  }
324 
325  // There were no extends
326  if (!Preferred.MI)
327  return false;
328  // It should be impossible to chose an extend without selecting a different
329  // type since by definition the result of an extend is larger.
330  assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
331 
332  LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
333  return true;
334 }
335 
337  PreferredTuple &Preferred) {
338  // Rewrite the load to the chosen extending load.
339  Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
340 
341  // Inserter to insert a truncate back to the original type at a given point
342  // with some basic CSE to limit truncate duplication to one per BB.
344  auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
345  MachineBasicBlock::iterator InsertBefore,
346  MachineOperand &UseMO) {
347  MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
348  if (PreviouslyEmitted) {
349  Observer.changingInstr(*UseMO.getParent());
350  UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
351  Observer.changedInstr(*UseMO.getParent());
352  return;
353  }
354 
355  Builder.setInsertPt(*InsertIntoBB, InsertBefore);
356  Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
357  MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
358  EmittedInsns[InsertIntoBB] = NewMI;
359  replaceRegOpWith(MRI, UseMO, NewDstReg);
360  };
361 
363  MI.setDesc(
364  Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT
365  ? TargetOpcode::G_SEXTLOAD
366  : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT
367  ? TargetOpcode::G_ZEXTLOAD
368  : TargetOpcode::G_LOAD));
369 
370  // Rewrite all the uses to fix up the types.
371  auto &LoadValue = MI.getOperand(0);
373  for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
374  Uses.push_back(&UseMO);
375 
376  for (auto *UseMO : Uses) {
377  MachineInstr *UseMI = UseMO->getParent();
378 
379  // If the extend is compatible with the preferred extend then we should fix
380  // up the type and extend so that it uses the preferred use.
381  if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
382  UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
383  Register UseDstReg = UseMI->getOperand(0).getReg();
384  MachineOperand &UseSrcMO = UseMI->getOperand(1);
385  const LLT &UseDstTy = MRI.getType(UseDstReg);
386  if (UseDstReg != ChosenDstReg) {
387  if (Preferred.Ty == UseDstTy) {
388  // If the use has the same type as the preferred use, then merge
389  // the vregs and erase the extend. For example:
390  // %1:_(s8) = G_LOAD ...
391  // %2:_(s32) = G_SEXT %1(s8)
392  // %3:_(s32) = G_ANYEXT %1(s8)
393  // ... = ... %3(s32)
394  // rewrites to:
395  // %2:_(s32) = G_SEXTLOAD ...
396  // ... = ... %2(s32)
397  replaceRegWith(MRI, UseDstReg, ChosenDstReg);
398  Observer.erasingInstr(*UseMO->getParent());
399  UseMO->getParent()->eraseFromParent();
400  } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
401  // If the preferred size is smaller, then keep the extend but extend
402  // from the result of the extending load. For example:
403  // %1:_(s8) = G_LOAD ...
404  // %2:_(s32) = G_SEXT %1(s8)
405  // %3:_(s64) = G_ANYEXT %1(s8)
406  // ... = ... %3(s64)
407  /// rewrites to:
408  // %2:_(s32) = G_SEXTLOAD ...
409  // %3:_(s64) = G_ANYEXT %2:_(s32)
410  // ... = ... %3(s64)
411  replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
412  } else {
413  // If the preferred size is large, then insert a truncate. For
414  // example:
415  // %1:_(s8) = G_LOAD ...
416  // %2:_(s64) = G_SEXT %1(s8)
417  // %3:_(s32) = G_ZEXT %1(s8)
418  // ... = ... %3(s32)
419  /// rewrites to:
420  // %2:_(s64) = G_SEXTLOAD ...
421  // %4:_(s8) = G_TRUNC %2:_(s32)
422  // %3:_(s64) = G_ZEXT %2:_(s8)
423  // ... = ... %3(s64)
424  InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
425  InsertTruncAt);
426  }
427  continue;
428  }
429  // The use is (one of) the uses of the preferred use we chose earlier.
430  // We're going to update the load to def this value later so just erase
431  // the old extend.
432  Observer.erasingInstr(*UseMO->getParent());
433  UseMO->getParent()->eraseFromParent();
434  continue;
435  }
436 
437  // The use isn't an extend. Truncate back to the type we originally loaded.
438  // This is free on many targets.
439  InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
440  }
441 
442  MI.getOperand(0).setReg(ChosenDstReg);
444 }
445 
447  assert(DefMI.getParent() == UseMI.getParent());
448  if (&DefMI == &UseMI)
449  return false;
450 
451  // Loop through the basic block until we find one of the instructions.
453  for (; &*I != &DefMI && &*I != &UseMI; ++I)
454  return &*I == &DefMI;
455 
456  llvm_unreachable("Block must contain instructions");
457 }
458 
460  if (MDT)
461  return MDT->dominates(&DefMI, &UseMI);
462  else if (DefMI.getParent() != UseMI.getParent())
463  return false;
464 
465  return isPredecessor(DefMI, UseMI);
466 }
467 
468 bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
470  auto &MF = *MI.getParent()->getParent();
471  const auto &TLI = *MF.getSubtarget().getTargetLowering();
472 
473 #ifndef NDEBUG
474  unsigned Opcode = MI.getOpcode();
475  assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
476  Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
477 #endif
478 
479  Base = MI.getOperand(1).getReg();
480  MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base);
481  if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
482  return false;
483 
484  LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
485 
486  for (auto &Use : MRI.use_instructions(Base)) {
487  if (Use.getOpcode() != TargetOpcode::G_GEP)
488  continue;
489 
490  Offset = Use.getOperand(2).getReg();
491  if (!ForceLegalIndexing &&
492  !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) {
493  LLVM_DEBUG(dbgs() << " Ignoring candidate with illegal addrmode: "
494  << Use);
495  continue;
496  }
497 
498  // Make sure the offset calculation is before the potentially indexed op.
499  // FIXME: we really care about dependency here. The offset calculation might
500  // be movable.
501  MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset);
502  if (!OffsetDef || !dominates(*OffsetDef, MI)) {
503  LLVM_DEBUG(dbgs() << " Ignoring candidate with offset after mem-op: "
504  << Use);
505  continue;
506  }
507 
508  // FIXME: check whether all uses of Base are load/store with foldable
509  // addressing modes. If so, using the normal addr-modes is better than
510  // forming an indexed one.
511 
512  bool MemOpDominatesAddrUses = true;
513  for (auto &GEPUse : MRI.use_instructions(Use.getOperand(0).getReg())) {
514  if (!dominates(MI, GEPUse)) {
515  MemOpDominatesAddrUses = false;
516  break;
517  }
518  }
519 
520  if (!MemOpDominatesAddrUses) {
521  LLVM_DEBUG(
522  dbgs() << " Ignoring candidate as memop does not dominate uses: "
523  << Use);
524  continue;
525  }
526 
527  LLVM_DEBUG(dbgs() << " Found match: " << Use);
528  Addr = Use.getOperand(0).getReg();
529  return true;
530  }
531 
532  return false;
533 }
534 
535 bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
536  Register &Base, Register &Offset) {
537  auto &MF = *MI.getParent()->getParent();
538  const auto &TLI = *MF.getSubtarget().getTargetLowering();
539 
540 #ifndef NDEBUG
541  unsigned Opcode = MI.getOpcode();
542  assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
543  Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
544 #endif
545 
546  Addr = MI.getOperand(1).getReg();
547  MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_GEP, Addr, MRI);
548  if (!AddrDef || MRI.hasOneUse(Addr))
549  return false;
550 
551  Base = AddrDef->getOperand(1).getReg();
552  Offset = AddrDef->getOperand(2).getReg();
553 
554  LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI);
555 
556  if (!ForceLegalIndexing &&
557  !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) {
558  LLVM_DEBUG(dbgs() << " Skipping, not legal for target");
559  return false;
560  }
561 
562  MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
563  if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
564  LLVM_DEBUG(dbgs() << " Skipping, frame index would need copy anyway.");
565  return false;
566  }
567 
568  if (MI.getOpcode() == TargetOpcode::G_STORE) {
569  // Would require a copy.
570  if (Base == MI.getOperand(0).getReg()) {
571  LLVM_DEBUG(dbgs() << " Skipping, storing base so need copy anyway.");
572  return false;
573  }
574 
575  // We're expecting one use of Addr in MI, but it could also be the
576  // value stored, which isn't actually dominated by the instruction.
577  if (MI.getOperand(0).getReg() == Addr) {
578  LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses");
579  return false;
580  }
581  }
582 
583  // FIXME: check whether all uses of the base pointer are constant GEPs. That
584  // might allow us to end base's liveness here by adjusting the constant.
585 
586  for (auto &UseMI : MRI.use_instructions(Addr)) {
587  if (!dominates(MI, UseMI)) {
588  LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses.");
589  return false;
590  }
591  }
592 
593  return true;
594 }
595 
597  unsigned Opcode = MI.getOpcode();
598  if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
599  Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
600  return false;
601 
602  bool IsStore = Opcode == TargetOpcode::G_STORE;
603  Register Addr, Base, Offset;
604  bool IsPre = findPreIndexCandidate(MI, Addr, Base, Offset);
605  if (!IsPre && !findPostIndexCandidate(MI, Addr, Base, Offset))
606  return false;
607 
608 
609  unsigned NewOpcode;
610  switch (Opcode) {
611  case TargetOpcode::G_LOAD:
612  NewOpcode = TargetOpcode::G_INDEXED_LOAD;
613  break;
614  case TargetOpcode::G_SEXTLOAD:
615  NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
616  break;
617  case TargetOpcode::G_ZEXTLOAD:
618  NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
619  break;
620  case TargetOpcode::G_STORE:
621  NewOpcode = TargetOpcode::G_INDEXED_STORE;
622  break;
623  default:
624  llvm_unreachable("Unknown load/store opcode");
625  }
626 
627  MachineInstr &AddrDef = *MRI.getUniqueVRegDef(Addr);
628  MachineIRBuilder MIRBuilder(MI);
629  auto MIB = MIRBuilder.buildInstr(NewOpcode);
630  if (IsStore) {
631  MIB.addDef(Addr);
632  MIB.addUse(MI.getOperand(0).getReg());
633  } else {
634  MIB.addDef(MI.getOperand(0).getReg());
635  MIB.addDef(Addr);
636  }
637 
638  MIB.addUse(Base);
639  MIB.addUse(Offset);
640  MIB.addImm(IsPre);
641  MI.eraseFromParent();
642  AddrDef.eraseFromParent();
643 
644  LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
645  return true;
646 }
647 
649  if (MI.getOpcode() != TargetOpcode::G_BR)
650  return false;
651 
652  // Try to match the following:
653  // bb1:
654  // %c(s32) = G_ICMP pred, %a, %b
655  // %c1(s1) = G_TRUNC %c(s32)
656  // G_BRCOND %c1, %bb2
657  // G_BR %bb3
658  // bb2:
659  // ...
660  // bb3:
661 
662  // The above pattern does not have a fall through to the successor bb2, always
663  // resulting in a branch no matter which path is taken. Here we try to find
664  // and replace that pattern with conditional branch to bb3 and otherwise
665  // fallthrough to bb2.
666 
667  MachineBasicBlock *MBB = MI.getParent();
669  if (BrIt == MBB->begin())
670  return false;
671  assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
672 
673  MachineInstr *BrCond = &*std::prev(BrIt);
674  if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
675  return false;
676 
677  // Check that the next block is the conditional branch target.
678  if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB()))
679  return false;
680 
681  MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
682  if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP ||
683  !MRI.hasOneUse(CmpMI->getOperand(0).getReg()))
684  return false;
685  return true;
686 }
687 
690  return false;
692  return true;
693 }
694 
696  MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
698  MachineInstr *BrCond = &*std::prev(BrIt);
699  MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
700 
703 
704  // Invert the G_ICMP condition.
705  Observer.changingInstr(*CmpMI);
706  CmpMI->getOperand(1).setPredicate(InversePred);
707  Observer.changedInstr(*CmpMI);
708 
709  // Change the conditional branch target.
710  Observer.changingInstr(*BrCond);
711  BrCond->getOperand(1).setMBB(BrTarget);
712  Observer.changedInstr(*BrCond);
713  MI.eraseFromParent();
714 }
715 
717  // On Darwin, -Os means optimize for size without hurting performance, so
718  // only really optimize for size when -Oz (MinSize) is used.
719  if (MF.getTarget().getTargetTriple().isOSDarwin())
720  return MF.getFunction().hasMinSize();
721  return MF.getFunction().hasOptSize();
722 }
723 
724 // Returns a list of types to use for memory op lowering in MemOps. A partial
725 // port of findOptimalMemOpLowering in TargetLowering.
727  std::vector<LLT> &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign,
728  unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
729  bool AllowOverlap, unsigned DstAS, unsigned SrcAS,
730  const AttributeList &FuncAttributes, const TargetLowering &TLI) {
731  // If 'SrcAlign' is zero, that means the memory operation does not need to
732  // load the value, i.e. memset or memcpy from constant string. Otherwise,
733  // it's the inferred alignment of the source. 'DstAlign', on the other hand,
734  // is the specified alignment of the memory operation. If it is zero, that
735  // means it's possible to change the alignment of the destination.
736  // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
737  // not need to be loaded.
738  if (SrcAlign != 0 && SrcAlign < DstAlign)
739  return false;
740 
741  LLT Ty = TLI.getOptimalMemOpLLT(Size, DstAlign, SrcAlign, IsMemset,
742  ZeroMemset, MemcpyStrSrc, FuncAttributes);
743 
744  if (Ty == LLT()) {
745  // Use the largest scalar type whose alignment constraints are satisfied.
746  // We only need to check DstAlign here as SrcAlign is always greater or
747  // equal to DstAlign (or zero).
748  Ty = LLT::scalar(64);
749  while (DstAlign && DstAlign < Ty.getSizeInBytes() &&
750  !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, DstAlign))
751  Ty = LLT::scalar(Ty.getSizeInBytes());
752  assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
753  // FIXME: check for the largest legal type we can load/store to.
754  }
755 
756  unsigned NumMemOps = 0;
757  while (Size != 0) {
758  unsigned TySize = Ty.getSizeInBytes();
759  while (TySize > Size) {
760  // For now, only use non-vector load / store's for the left-over pieces.
761  LLT NewTy = Ty;
762  // FIXME: check for mem op safety and legality of the types. Not all of
763  // SDAGisms map cleanly to GISel concepts.
764  if (NewTy.isVector())
765  NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
766  NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
767  unsigned NewTySize = NewTy.getSizeInBytes();
768  assert(NewTySize > 0 && "Could not find appropriate type");
769 
770  // If the new LLT cannot cover all of the remaining bits, then consider
771  // issuing a (or a pair of) unaligned and overlapping load / store.
772  bool Fast;
773  // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
774  MVT VT = getMVTForLLT(Ty);
775  if (NumMemOps && AllowOverlap && NewTySize < Size &&
777  VT, DstAS, DstAlign, MachineMemOperand::MONone, &Fast) &&
778  Fast)
779  TySize = Size;
780  else {
781  Ty = NewTy;
782  TySize = NewTySize;
783  }
784  }
785 
786  if (++NumMemOps > Limit)
787  return false;
788 
789  MemOps.push_back(Ty);
790  Size -= TySize;
791  }
792 
793  return true;
794 }
795 
797  if (Ty.isVector())
799  Ty.getNumElements());
800  return IntegerType::get(C, Ty.getSizeInBits());
801 }
802 
803 // Get a vectorized representation of the memset value operand, GISel edition.
805  MachineRegisterInfo &MRI = *MIB.getMRI();
806  unsigned NumBits = Ty.getScalarSizeInBits();
807  auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
808  if (!Ty.isVector() && ValVRegAndVal) {
809  unsigned KnownVal = ValVRegAndVal->Value;
810  APInt Scalar = APInt(8, KnownVal);
811  APInt SplatVal = APInt::getSplat(NumBits, Scalar);
812  return MIB.buildConstant(Ty, SplatVal).getReg(0);
813  }
814  // FIXME: for vector types create a G_BUILD_VECTOR.
815  if (Ty.isVector())
816  return Register();
817 
818  // Extend the byte value to the larger type, and then multiply by a magic
819  // value 0x010101... in order to replicate it across every byte.
820  LLT ExtType = Ty.getScalarType();
821  auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
822  if (NumBits > 8) {
823  APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
824  auto MagicMI = MIB.buildConstant(ExtType, Magic);
825  Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
826  }
827 
828  assert(ExtType == Ty && "Vector memset value type not supported yet");
829  return Val;
830 }
831 
832 bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, Register Val,
833  unsigned KnownLen, unsigned Align,
834  bool IsVolatile) {
835  auto &MF = *MI.getParent()->getParent();
836  const auto &TLI = *MF.getSubtarget().getTargetLowering();
837  auto &DL = MF.getDataLayout();
839 
840  assert(KnownLen != 0 && "Have a zero length memset length!");
841 
842  bool DstAlignCanChange = false;
843  MachineFrameInfo &MFI = MF.getFrameInfo();
844  bool OptSize = shouldLowerMemFuncForSize(MF);
845 
846  MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
847  if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
848  DstAlignCanChange = true;
849 
850  unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
851  std::vector<LLT> MemOps;
852 
853  const auto &DstMMO = **MI.memoperands_begin();
854  MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
855 
856  auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
857  bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
858 
860  MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Align), 0,
861  /*IsMemset=*/true,
862  /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false,
863  /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(), ~0u,
864  MF.getFunction().getAttributes(), TLI))
865  return false;
866 
867  if (DstAlignCanChange) {
868  // Get an estimate of the type from the LLT.
869  Type *IRTy = getTypeForLLT(MemOps[0], C);
870  unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy);
871  if (NewAlign > Align) {
872  Align = NewAlign;
873  unsigned FI = FIDef->getOperand(1).getIndex();
874  // Give the stack frame object a larger alignment if needed.
875  if (MFI.getObjectAlignment(FI) < Align)
876  MFI.setObjectAlignment(FI, Align);
877  }
878  }
879 
880  MachineIRBuilder MIB(MI);
881  // Find the largest store and generate the bit pattern for it.
882  LLT LargestTy = MemOps[0];
883  for (unsigned i = 1; i < MemOps.size(); i++)
884  if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
885  LargestTy = MemOps[i];
886 
887  // The memset stored value is always defined as an s8, so in order to make it
888  // work with larger store types we need to repeat the bit pattern across the
889  // wider type.
890  Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
891 
892  if (!MemSetValue)
893  return false;
894 
895  // Generate the stores. For each store type in the list, we generate the
896  // matching store of that type to the destination address.
897  LLT PtrTy = MRI.getType(Dst);
898  unsigned DstOff = 0;
899  unsigned Size = KnownLen;
900  for (unsigned I = 0; I < MemOps.size(); I++) {
901  LLT Ty = MemOps[I];
902  unsigned TySize = Ty.getSizeInBytes();
903  if (TySize > Size) {
904  // Issuing an unaligned load / store pair that overlaps with the previous
905  // pair. Adjust the offset accordingly.
906  assert(I == MemOps.size() - 1 && I != 0);
907  DstOff -= TySize - Size;
908  }
909 
910  // If this store is smaller than the largest store see whether we can get
911  // the smaller value for free with a truncate.
912  Register Value = MemSetValue;
913  if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
914  MVT VT = getMVTForLLT(Ty);
915  MVT LargestVT = getMVTForLLT(LargestTy);
916  if (!LargestTy.isVector() && !Ty.isVector() &&
917  TLI.isTruncateFree(LargestVT, VT))
918  Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
919  else
920  Value = getMemsetValue(Val, Ty, MIB);
921  if (!Value)
922  return false;
923  }
924 
925  auto *StoreMMO =
926  MF.getMachineMemOperand(&DstMMO, DstOff, Ty.getSizeInBytes());
927 
928  Register Ptr = Dst;
929  if (DstOff != 0) {
930  auto Offset =
931  MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
932  Ptr = MIB.buildGEP(PtrTy, Dst, Offset).getReg(0);
933  }
934 
935  MIB.buildStore(Value, Ptr, *StoreMMO);
936  DstOff += Ty.getSizeInBytes();
937  Size -= TySize;
938  }
939 
940  MI.eraseFromParent();
941  return true;
942 }
943 
944 
945 bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
946  Register Src, unsigned KnownLen,
947  unsigned DstAlign, unsigned SrcAlign,
948  bool IsVolatile) {
949  auto &MF = *MI.getParent()->getParent();
950  const auto &TLI = *MF.getSubtarget().getTargetLowering();
951  auto &DL = MF.getDataLayout();
953 
954  assert(KnownLen != 0 && "Have a zero length memcpy length!");
955 
956  bool DstAlignCanChange = false;
957  MachineFrameInfo &MFI = MF.getFrameInfo();
958  bool OptSize = shouldLowerMemFuncForSize(MF);
959  unsigned Alignment = MinAlign(DstAlign, SrcAlign);
960 
961  MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
962  if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
963  DstAlignCanChange = true;
964 
965  // FIXME: infer better src pointer alignment like SelectionDAG does here.
966  // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
967  // if the memcpy is in a tail call position.
968 
969  unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize);
970  std::vector<LLT> MemOps;
971 
972  const auto &DstMMO = **MI.memoperands_begin();
973  const auto &SrcMMO = **std::next(MI.memoperands_begin());
974  MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
975  MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
976 
978  MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment),
979  SrcAlign,
980  /*IsMemset=*/false,
981  /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
982  /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(),
983  SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI))
984  return false;
985 
986  if (DstAlignCanChange) {
987  // Get an estimate of the type from the LLT.
988  Type *IRTy = getTypeForLLT(MemOps[0], C);
989  unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy);
990 
991  // Don't promote to an alignment that would require dynamic stack
992  // realignment.
993  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
994  if (!TRI->needsStackRealignment(MF))
995  while (NewAlign > Alignment &&
996  DL.exceedsNaturalStackAlignment(Align(NewAlign)))
997  NewAlign /= 2;
998 
999  if (NewAlign > Alignment) {
1000  Alignment = NewAlign;
1001  unsigned FI = FIDef->getOperand(1).getIndex();
1002  // Give the stack frame object a larger alignment if needed.
1003  if (MFI.getObjectAlignment(FI) < Alignment)
1004  MFI.setObjectAlignment(FI, Alignment);
1005  }
1006  }
1007 
1008  LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
1009 
1010  MachineIRBuilder MIB(MI);
1011  // Now we need to emit a pair of load and stores for each of the types we've
1012  // collected. I.e. for each type, generate a load from the source pointer of
1013  // that type width, and then generate a corresponding store to the dest buffer
1014  // of that value loaded. This can result in a sequence of loads and stores
1015  // mixed types, depending on what the target specifies as good types to use.
1016  unsigned CurrOffset = 0;
1017  LLT PtrTy = MRI.getType(Src);
1018  unsigned Size = KnownLen;
1019  for (auto CopyTy : MemOps) {
1020  // Issuing an unaligned load / store pair that overlaps with the previous
1021  // pair. Adjust the offset accordingly.
1022  if (CopyTy.getSizeInBytes() > Size)
1023  CurrOffset -= CopyTy.getSizeInBytes() - Size;
1024 
1025  // Construct MMOs for the accesses.
1026  auto *LoadMMO =
1027  MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1028  auto *StoreMMO =
1029  MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1030 
1031  // Create the load.
1032  Register LoadPtr = Src;
1033  Register Offset;
1034  if (CurrOffset != 0) {
1035  Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
1036  .getReg(0);
1037  LoadPtr = MIB.buildGEP(PtrTy, Src, Offset).getReg(0);
1038  }
1039  auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
1040 
1041  // Create the store.
1042  Register StorePtr =
1043  CurrOffset == 0 ? Dst : MIB.buildGEP(PtrTy, Dst, Offset).getReg(0);
1044  MIB.buildStore(LdVal, StorePtr, *StoreMMO);
1045  CurrOffset += CopyTy.getSizeInBytes();
1046  Size -= CopyTy.getSizeInBytes();
1047  }
1048 
1049  MI.eraseFromParent();
1050  return true;
1051 }
1052 
1053 bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
1054  Register Src, unsigned KnownLen,
1055  unsigned DstAlign, unsigned SrcAlign,
1056  bool IsVolatile) {
1057  auto &MF = *MI.getParent()->getParent();
1058  const auto &TLI = *MF.getSubtarget().getTargetLowering();
1059  auto &DL = MF.getDataLayout();
1060  LLVMContext &C = MF.getFunction().getContext();
1061 
1062  assert(KnownLen != 0 && "Have a zero length memmove length!");
1063 
1064  bool DstAlignCanChange = false;
1065  MachineFrameInfo &MFI = MF.getFrameInfo();
1066  bool OptSize = shouldLowerMemFuncForSize(MF);
1067  unsigned Alignment = MinAlign(DstAlign, SrcAlign);
1068 
1069  MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1070  if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1071  DstAlignCanChange = true;
1072 
1073  unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
1074  std::vector<LLT> MemOps;
1075 
1076  const auto &DstMMO = **MI.memoperands_begin();
1077  const auto &SrcMMO = **std::next(MI.memoperands_begin());
1078  MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1079  MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1080 
1081  // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
1082  // to a bug in it's findOptimalMemOpLowering implementation. For now do the
1083  // same thing here.
1085  MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment),
1086  SrcAlign,
1087  /*IsMemset=*/false,
1088  /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
1089  /*AllowOverlap=*/false, DstPtrInfo.getAddrSpace(),
1090  SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI))
1091  return false;
1092 
1093  if (DstAlignCanChange) {
1094  // Get an estimate of the type from the LLT.
1095  Type *IRTy = getTypeForLLT(MemOps[0], C);
1096  unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy);
1097 
1098  // Don't promote to an alignment that would require dynamic stack
1099  // realignment.
1100  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1101  if (!TRI->needsStackRealignment(MF))
1102  while (NewAlign > Alignment &&
1103  DL.exceedsNaturalStackAlignment(Align(NewAlign)))
1104  NewAlign /= 2;
1105 
1106  if (NewAlign > Alignment) {
1107  Alignment = NewAlign;
1108  unsigned FI = FIDef->getOperand(1).getIndex();
1109  // Give the stack frame object a larger alignment if needed.
1110  if (MFI.getObjectAlignment(FI) < Alignment)
1111  MFI.setObjectAlignment(FI, Alignment);
1112  }
1113  }
1114 
1115  LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
1116 
1117  MachineIRBuilder MIB(MI);
1118  // Memmove requires that we perform the loads first before issuing the stores.
1119  // Apart from that, this loop is pretty much doing the same thing as the
1120  // memcpy codegen function.
1121  unsigned CurrOffset = 0;
1122  LLT PtrTy = MRI.getType(Src);
1123  SmallVector<Register, 16> LoadVals;
1124  for (auto CopyTy : MemOps) {
1125  // Construct MMO for the load.
1126  auto *LoadMMO =
1127  MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1128 
1129  // Create the load.
1130  Register LoadPtr = Src;
1131  if (CurrOffset != 0) {
1132  auto Offset =
1133  MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1134  LoadPtr = MIB.buildGEP(PtrTy, Src, Offset).getReg(0);
1135  }
1136  LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
1137  CurrOffset += CopyTy.getSizeInBytes();
1138  }
1139 
1140  CurrOffset = 0;
1141  for (unsigned I = 0; I < MemOps.size(); ++I) {
1142  LLT CopyTy = MemOps[I];
1143  // Now store the values loaded.
1144  auto *StoreMMO =
1145  MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1146 
1147  Register StorePtr = Dst;
1148  if (CurrOffset != 0) {
1149  auto Offset =
1150  MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1151  StorePtr = MIB.buildGEP(PtrTy, Dst, Offset).getReg(0);
1152  }
1153  MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
1154  CurrOffset += CopyTy.getSizeInBytes();
1155  }
1156  MI.eraseFromParent();
1157  return true;
1158 }
1159 
1161  // This combine is fairly complex so it's not written with a separate
1162  // matcher function.
1163  assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1165  assert((ID == Intrinsic::memcpy || ID == Intrinsic::memmove ||
1166  ID == Intrinsic::memset) &&
1167  "Expected a memcpy like intrinsic");
1168 
1169  auto MMOIt = MI.memoperands_begin();
1170  const MachineMemOperand *MemOp = *MMOIt;
1171  bool IsVolatile = MemOp->isVolatile();
1172  // Don't try to optimize volatile.
1173  if (IsVolatile)
1174  return false;
1175 
1176  unsigned DstAlign = MemOp->getBaseAlignment();
1177  unsigned SrcAlign = 0;
1178  Register Dst = MI.getOperand(1).getReg();
1179  Register Src = MI.getOperand(2).getReg();
1180  Register Len = MI.getOperand(3).getReg();
1181 
1182  if (ID != Intrinsic::memset) {
1183  assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
1184  MemOp = *(++MMOIt);
1185  SrcAlign = MemOp->getBaseAlignment();
1186  }
1187 
1188  // See if this is a constant length copy
1189  auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
1190  if (!LenVRegAndVal)
1191  return false; // Leave it to the legalizer to lower it to a libcall.
1192  unsigned KnownLen = LenVRegAndVal->Value;
1193 
1194  if (KnownLen == 0) {
1195  MI.eraseFromParent();
1196  return true;
1197  }
1198 
1199  if (MaxLen && KnownLen > MaxLen)
1200  return false;
1201 
1202  if (ID == Intrinsic::memcpy)
1203  return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1204  if (ID == Intrinsic::memmove)
1205  return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1206  if (ID == Intrinsic::memset)
1207  return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
1208  return false;
1209 }
1210 
1212  if (tryCombineCopy(MI))
1213  return true;
1214  if (tryCombineExtendingLoads(MI))
1215  return true;
1217  return true;
1218  return false;
1219 }
uint64_t CallInst * C
bool isOSDarwin() const
isOSDarwin - Is this a "Darwin" OS (OS X, iOS, or watchOS).
Definition: Triple.h:481
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
bool dominates(MachineInstr &DefMI, MachineInstr &UseMI)
Returns true if DefMI dominates UseMI.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo)
MachineBasicBlock * getMBB() const
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Register getReg(unsigned Idx) const
Get the register for the operand index.
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:622
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
unsigned getScalarSizeInBits() const
unsigned getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
bool tryCombine(MachineInstr &MI)
Try to transform MI by using all of the above combine functions.
unsigned Reg
static cl::opt< bool > ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), cl::desc("Force all indexed operations to be " "legal for the GlobalISel combiner"))
static Type * getTypeForLLT(LLT Ty, LLVMContext &C)
virtual const TargetLowering * getTargetLowering() const
LLT getScalarType() const
bool constrainRegAttrs(unsigned Reg, unsigned ConstrainingReg, unsigned MinNumRegs=0)
Constrain the register class or the register bank of the virtual register Reg (and low-level type) to...
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
void applyElideBrByInvertingCond(MachineInstr &MI)
unsigned const TargetRegisterInfo * TRI
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:477
bool isPHI() const
GISelChangeObserver & Observer
uint64_t getBaseAlignment() const
Return the minimum known alignment in bytes of the base address, without the offset.
void applyCombineConcatVectors(MachineInstr &MI, bool IsUndef, const ArrayRef< Register > Ops)
Replace MI with a flattened build_vector with Ops or an implicit_def if IsUndef is true...
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:303
unsigned getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
void applyCombineCopy(MachineInstr &MI)
bool isVector() const
bool tryElideBrByInvertingCond(MachineInstr &MI)
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo)
virtual LLT getOptimalMemOpLLT(uint64_t, unsigned, unsigned, bool, bool, bool, const AttributeList &) const
LLT returning variant.
A description of a memory reference used in the backend.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:831
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
MachineIRBuilder & Builder
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool matchElideBrByInvertingCond(MachineInstr &MI)
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
void setReg(Register Reg)
Change the register this operand corresponds to.
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
void finishedChangingAllUsesOfReg()
All instructions reported as changing by changingAllUsesOfReg() have finished being changed...
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
Definition: Utils.cpp:435
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:661
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
MachineRegisterInfo * getMRI()
Getter for MRI.
Abstract class that contains various methods for clients to notify about changes. ...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:465
Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder & UseMI
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, unsigned Align=1, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *=nullptr) const
Determine if the target supports unaligned memory accesses.
Helper class to build MachineInstr.
void setMBB(MachineBasicBlock *MBB)
MachineDominatorTree * MDT
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
bool isValid() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
bool tryCombineExtendingLoads(MachineInstr &MI)
If MI is extend that consumes the result of a load, try to combine it.
const Triple & getTargetTriple() const
Optional< ValueAndVReg > getConstantVRegValWithLookThrough(unsigned VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool HandleFConstants=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_F/CONSTANT (LookThro...
Definition: Utils.cpp:218
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:59
MachineRegisterInfo & MRI
MachineInstrBuilder buildGEP(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert Res = G_GEP Op0, Op1.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
virtual void erasingInstr(MachineInstr &MI)=0
An instruction is about to be erased.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC Op.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:319
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
static const char *const Magic
Definition: Archive.cpp:41
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
bool tryCombineCopy(MachineInstr &MI)
If MI is COPY, try to combine it.
void changingAllUsesOfReg(const MachineRegisterInfo &MRI, unsigned Reg)
All the instructions using the given register are being changed.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:244
bool matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef, SmallVectorImpl< Register > &Ops)
Check if the G_CONCAT_VECTORS MI is undef or if it can be flattened into a build_vector.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Build and insert Res = G_MUL Op0, Op1.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:552
iterator_range< use_iterator > use_operands(unsigned Reg) const
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
MachineInstrBuilder MachineInstrBuilder & DefMI
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:577
bool matchCombineCopy(MachineInstr &MI)
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Promote Memory to Register
Definition: Mem2Reg.cpp:109
const TargetInstrInfo & getTII()
MachineInstr * MI
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
This file declares the MachineIRBuilder class.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Class for arbitrary precision integers.
Definition: APInt.h:69
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, bool AllowOverlap, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
void replaceRegWith(unsigned FromReg, unsigned ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
bool isPredecessor(MachineInstr &DefMI, MachineInstr &UseMI)
Returns true if DefMI precedes UseMI or they are the same instruction.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:256
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
void setPredicate(unsigned Predicate)
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
bool hasOneUse(unsigned RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:44
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:614
#define I(x, y, z)
Definition: MD5.cpp:58
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:619
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Optimize memcpy intrinsics et al, e.g.
uint32_t Size
Definition: Profile.cpp:46
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:185
iterator_range< use_instr_iterator > use_instructions(unsigned Reg) const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getIntrinsicID() const
Returns the Intrinsic::ID for this instruction.
uint64_t PowerOf2Floor(uint64_t A)
Returns the power of two which is less than or equal to the given value.
Definition: MathExtras.h:684
bool needsStackRealignment(const MachineFunction &MF) const
True if storage within the function requires the stack pointer to be aligned more than the normal cal...
LLVM Value Representation.
Definition: Value.h:74
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
print Print MemDeps of function
IRTranslator LLVM IR MI
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
Register getReg() const
getReg - Returns the register number.
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, GISelKnownBits *KB=nullptr, MachineDominatorTree *MDT=nullptr)
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
bool tryCombineConcatVectors(MachineInstr &MI)
If MI is G_CONCAT_VECTORS, try to combine it.
void setObjectAlignment(int ObjectIdx, unsigned Align)
setObjectAlignment - Change the alignment of the specified stack object.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
This file describes how to lower LLVM code to machine code.
mmo_iterator memoperands_end() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:559
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes...
unsigned getPredicate() const
bool tryCombineIndexedLoadStore(MachineInstr &MI)
Combine MI into a pre-indexed or post-indexed load/store operation if legal and the surrounding code ...