LLVM  10.0.0svn
CombinerHelper.cpp
Go to the documentation of this file.
1 //===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
21 
22 #define DEBUG_TYPE "gi-combiner"
23 
24 using namespace llvm;
25 
26 // Option to allow testing of the combiner while no targets know about indexed
27 // addressing.
28 static cl::opt<bool>
29  ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
30  cl::desc("Force all indexed operations to be "
31  "legal for the GlobalISel combiner"));
32 
33 
37  : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer),
38  KB(KB), MDT(MDT) {
39  (void)this->KB;
40 }
41 
43  Register ToReg) const {
44  Observer.changingAllUsesOfReg(MRI, FromReg);
45 
46  if (MRI.constrainRegAttrs(ToReg, FromReg))
47  MRI.replaceRegWith(FromReg, ToReg);
48  else
49  Builder.buildCopy(ToReg, FromReg);
50 
52 }
53 
55  MachineOperand &FromRegOp,
56  Register ToReg) const {
57  assert(FromRegOp.getParent() && "Expected an operand in an MI");
58  Observer.changingInstr(*FromRegOp.getParent());
59 
60  FromRegOp.setReg(ToReg);
61 
62  Observer.changedInstr(*FromRegOp.getParent());
63 }
64 
66  if (matchCombineCopy(MI)) {
67  applyCombineCopy(MI);
68  return true;
69  }
70  return false;
71 }
73  if (MI.getOpcode() != TargetOpcode::COPY)
74  return false;
75  Register DstReg = MI.getOperand(0).getReg();
76  Register SrcReg = MI.getOperand(1).getReg();
77  LLT DstTy = MRI.getType(DstReg);
78  LLT SrcTy = MRI.getType(SrcReg);
79  // Simple Copy Propagation.
80  // a(sx) = COPY b(sx) -> Replace all uses of a with b.
81  if (DstTy.isValid() && SrcTy.isValid() && DstTy == SrcTy)
82  return true;
83  return false;
84 }
86  Register DstReg = MI.getOperand(0).getReg();
87  Register SrcReg = MI.getOperand(1).getReg();
88  MI.eraseFromParent();
89  replaceRegWith(MRI, DstReg, SrcReg);
90 }
91 
93  bool IsUndef = false;
95  if (matchCombineConcatVectors(MI, IsUndef, Ops)) {
96  applyCombineConcatVectors(MI, IsUndef, Ops);
97  return true;
98  }
99  return false;
100 }
101 
104  assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
105  "Invalid instruction");
106  IsUndef = true;
107  MachineInstr *Undef = nullptr;
108 
109  // Walk over all the operands of concat vectors and check if they are
110  // build_vector themselves or undef.
111  // Then collect their operands in Ops.
112  for (const MachineOperand &MO : MI.operands()) {
113  // Skip the instruction definition.
114  if (MO.isDef())
115  continue;
116  Register Reg = MO.getReg();
117  MachineInstr *Def = MRI.getVRegDef(Reg);
118  assert(Def && "Operand not defined");
119  switch (Def->getOpcode()) {
120  case TargetOpcode::G_BUILD_VECTOR:
121  IsUndef = false;
122  // Remember the operands of the build_vector to fold
123  // them into the yet-to-build flattened concat vectors.
124  for (const MachineOperand &BuildVecMO : Def->operands()) {
125  // Skip the definition.
126  if (BuildVecMO.isDef())
127  continue;
128  Ops.push_back(BuildVecMO.getReg());
129  }
130  break;
131  case TargetOpcode::G_IMPLICIT_DEF: {
132  LLT OpType = MRI.getType(Reg);
133  // Keep one undef value for all the undef operands.
134  if (!Undef) {
136  Undef = Builder.buildUndef(OpType.getScalarType());
137  }
138  assert(MRI.getType(Undef->getOperand(0).getReg()) ==
139  OpType.getScalarType() &&
140  "All undefs should have the same type");
141  // Break the undef vector in as many scalar elements as needed
142  // for the flattening.
143  for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
144  EltIdx != EltEnd; ++EltIdx)
145  Ops.push_back(Undef->getOperand(0).getReg());
146  break;
147  }
148  default:
149  return false;
150  }
151  }
152  return true;
153 }
155  MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) {
156  // We determined that the concat_vectors can be flatten.
157  // Generate the flattened build_vector.
158  Register DstReg = MI.getOperand(0).getReg();
160  Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
161 
162  // Note: IsUndef is sort of redundant. We could have determine it by
163  // checking that at all Ops are undef. Alternatively, we could have
164  // generate a build_vector of undefs and rely on another combine to
165  // clean that up. For now, given we already gather this information
166  // in tryCombineConcatVectors, just save compile time and issue the
167  // right thing.
168  if (IsUndef)
169  Builder.buildUndef(NewDstReg);
170  else
171  Builder.buildBuildVector(NewDstReg, Ops);
172  MI.eraseFromParent();
173  replaceRegWith(MRI, DstReg, NewDstReg);
174 }
175 
178  if (matchCombineShuffleVector(MI, Ops)) {
179  applyCombineShuffleVector(MI, Ops);
180  return true;
181  }
182  return false;
183 }
184 
187  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
188  "Invalid instruction kind");
189  LLT DstType = MRI.getType(MI.getOperand(0).getReg());
190  Register Src1 = MI.getOperand(1).getReg();
191  LLT SrcType = MRI.getType(Src1);
192  unsigned DstNumElts = DstType.getNumElements();
193  unsigned SrcNumElts = SrcType.getNumElements();
194 
195  // If the resulting vector is smaller than the size of the source
196  // vectors being concatenated, we won't be able to replace the
197  // shuffle vector into a concat_vectors.
198  //
199  // Note: We may still be able to produce a concat_vectors fed by
200  // extract_vector_elt and so on. It is less clear that would
201  // be better though, so don't bother for now.
202  if (DstNumElts < 2 * SrcNumElts)
203  return false;
204 
205  // Check that the shuffle mask can be broken evenly between the
206  // different sources.
207  if (DstNumElts % SrcNumElts != 0)
208  return false;
209 
210  // Mask length is a multiple of the source vector length.
211  // Check if the shuffle is some kind of concatenation of the input
212  // vectors.
213  unsigned NumConcat = DstNumElts / SrcNumElts;
214  SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
217  for (unsigned i = 0; i != DstNumElts; ++i) {
218  int Idx = Mask[i];
219  // Undef value.
220  if (Idx < 0)
221  continue;
222  // Ensure the indices in each SrcType sized piece are sequential and that
223  // the same source is used for the whole piece.
224  if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
225  (ConcatSrcs[i / SrcNumElts] >= 0 &&
226  ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
227  return false;
228  // Remember which source this index came from.
229  ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
230  }
231 
232  // The shuffle is concatenating multiple vectors together.
233  // Collect the different operands for that.
234  Register UndefReg;
235  Register Src2 = MI.getOperand(2).getReg();
236  for (auto Src : ConcatSrcs) {
237  if (Src < 0) {
238  if (!UndefReg) {
240  UndefReg = Builder.buildUndef(SrcType).getReg(0);
241  }
242  Ops.push_back(UndefReg);
243  } else if (Src == 0)
244  Ops.push_back(Src1);
245  else
246  Ops.push_back(Src2);
247  }
248  return true;
249 }
250 
252  const ArrayRef<Register> Ops) {
253  Register DstReg = MI.getOperand(0).getReg();
255  Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
256 
257  Builder.buildConcatVectors(NewDstReg, Ops);
258 
259  MI.eraseFromParent();
260  replaceRegWith(MRI, DstReg, NewDstReg);
261 }
262 
263 namespace {
264 
265 /// Select a preference between two uses. CurrentUse is the current preference
266 /// while *ForCandidate is attributes of the candidate under consideration.
267 PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
268  const LLT &TyForCandidate,
269  unsigned OpcodeForCandidate,
270  MachineInstr *MIForCandidate) {
271  if (!CurrentUse.Ty.isValid()) {
272  if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
273  CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
274  return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
275  return CurrentUse;
276  }
277 
278  // We permit the extend to hoist through basic blocks but this is only
279  // sensible if the target has extending loads. If you end up lowering back
280  // into a load and extend during the legalizer then the end result is
281  // hoisting the extend up to the load.
282 
283  // Prefer defined extensions to undefined extensions as these are more
284  // likely to reduce the number of instructions.
285  if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
286  CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
287  return CurrentUse;
288  else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
289  OpcodeForCandidate != TargetOpcode::G_ANYEXT)
290  return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
291 
292  // Prefer sign extensions to zero extensions as sign-extensions tend to be
293  // more expensive.
294  if (CurrentUse.Ty == TyForCandidate) {
295  if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
296  OpcodeForCandidate == TargetOpcode::G_ZEXT)
297  return CurrentUse;
298  else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
299  OpcodeForCandidate == TargetOpcode::G_SEXT)
300  return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
301  }
302 
303  // This is potentially target specific. We've chosen the largest type
304  // because G_TRUNC is usually free. One potential catch with this is that
305  // some targets have a reduced number of larger registers than smaller
306  // registers and this choice potentially increases the live-range for the
307  // larger value.
308  if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
309  return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
310  }
311  return CurrentUse;
312 }
313 
314 /// Find a suitable place to insert some instructions and insert them. This
315 /// function accounts for special cases like inserting before a PHI node.
316 /// The current strategy for inserting before PHI's is to duplicate the
317 /// instructions for each predecessor. However, while that's ok for G_TRUNC
318 /// on most targets since it generally requires no code, other targets/cases may
319 /// want to try harder to find a dominating block.
320 static void InsertInsnsWithoutSideEffectsBeforeUse(
323  MachineOperand &UseMO)>
324  Inserter) {
325  MachineInstr &UseMI = *UseMO.getParent();
326 
327  MachineBasicBlock *InsertBB = UseMI.getParent();
328 
329  // If the use is a PHI then we want the predecessor block instead.
330  if (UseMI.isPHI()) {
331  MachineOperand *PredBB = std::next(&UseMO);
332  InsertBB = PredBB->getMBB();
333  }
334 
335  // If the block is the same block as the def then we want to insert just after
336  // the def instead of at the start of the block.
337  if (InsertBB == DefMI.getParent()) {
339  Inserter(InsertBB, std::next(InsertPt), UseMO);
340  return;
341  }
342 
343  // Otherwise we want the start of the BB
344  Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
345 }
346 } // end anonymous namespace
347 
349  PreferredTuple Preferred;
350  if (matchCombineExtendingLoads(MI, Preferred)) {
351  applyCombineExtendingLoads(MI, Preferred);
352  return true;
353  }
354  return false;
355 }
356 
358  PreferredTuple &Preferred) {
359  // We match the loads and follow the uses to the extend instead of matching
360  // the extends and following the def to the load. This is because the load
361  // must remain in the same position for correctness (unless we also add code
362  // to find a safe place to sink it) whereas the extend is freely movable.
363  // It also prevents us from duplicating the load for the volatile case or just
364  // for performance.
365 
366  if (MI.getOpcode() != TargetOpcode::G_LOAD &&
367  MI.getOpcode() != TargetOpcode::G_SEXTLOAD &&
368  MI.getOpcode() != TargetOpcode::G_ZEXTLOAD)
369  return false;
370 
371  auto &LoadValue = MI.getOperand(0);
372  assert(LoadValue.isReg() && "Result wasn't a register?");
373 
374  LLT LoadValueTy = MRI.getType(LoadValue.getReg());
375  if (!LoadValueTy.isScalar())
376  return false;
377 
378  // Most architectures are going to legalize <s8 loads into at least a 1 byte
379  // load, and the MMOs can only describe memory accesses in multiples of bytes.
380  // If we try to perform extload combining on those, we can end up with
381  // %a(s8) = extload %ptr (load 1 byte from %ptr)
382  // ... which is an illegal extload instruction.
383  if (LoadValueTy.getSizeInBits() < 8)
384  return false;
385 
386  // For non power-of-2 types, they will very likely be legalized into multiple
387  // loads. Don't bother trying to match them into extending loads.
388  if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))
389  return false;
390 
391  // Find the preferred type aside from the any-extends (unless it's the only
392  // one) and non-extending ops. We'll emit an extending load to that type and
393  // and emit a variant of (extend (trunc X)) for the others according to the
394  // relative type sizes. At the same time, pick an extend to use based on the
395  // extend involved in the chosen type.
396  unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD
397  ? TargetOpcode::G_ANYEXT
398  : MI.getOpcode() == TargetOpcode::G_SEXTLOAD
399  ? TargetOpcode::G_SEXT
400  : TargetOpcode::G_ZEXT;
401  Preferred = {LLT(), PreferredOpcode, nullptr};
402  for (auto &UseMI : MRI.use_instructions(LoadValue.getReg())) {
403  if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
404  UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
405  UseMI.getOpcode() == TargetOpcode::G_ANYEXT) {
406  Preferred = ChoosePreferredUse(Preferred,
407  MRI.getType(UseMI.getOperand(0).getReg()),
408  UseMI.getOpcode(), &UseMI);
409  }
410  }
411 
412  // There were no extends
413  if (!Preferred.MI)
414  return false;
415  // It should be impossible to chose an extend without selecting a different
416  // type since by definition the result of an extend is larger.
417  assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
418 
419  LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
420  return true;
421 }
422 
424  PreferredTuple &Preferred) {
425  // Rewrite the load to the chosen extending load.
426  Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
427 
428  // Inserter to insert a truncate back to the original type at a given point
429  // with some basic CSE to limit truncate duplication to one per BB.
431  auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
432  MachineBasicBlock::iterator InsertBefore,
433  MachineOperand &UseMO) {
434  MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
435  if (PreviouslyEmitted) {
436  Observer.changingInstr(*UseMO.getParent());
437  UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
438  Observer.changedInstr(*UseMO.getParent());
439  return;
440  }
441 
442  Builder.setInsertPt(*InsertIntoBB, InsertBefore);
443  Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
444  MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
445  EmittedInsns[InsertIntoBB] = NewMI;
446  replaceRegOpWith(MRI, UseMO, NewDstReg);
447  };
448 
450  MI.setDesc(
451  Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT
452  ? TargetOpcode::G_SEXTLOAD
453  : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT
454  ? TargetOpcode::G_ZEXTLOAD
455  : TargetOpcode::G_LOAD));
456 
457  // Rewrite all the uses to fix up the types.
458  auto &LoadValue = MI.getOperand(0);
460  for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
461  Uses.push_back(&UseMO);
462 
463  for (auto *UseMO : Uses) {
464  MachineInstr *UseMI = UseMO->getParent();
465 
466  // If the extend is compatible with the preferred extend then we should fix
467  // up the type and extend so that it uses the preferred use.
468  if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
469  UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
470  Register UseDstReg = UseMI->getOperand(0).getReg();
471  MachineOperand &UseSrcMO = UseMI->getOperand(1);
472  const LLT &UseDstTy = MRI.getType(UseDstReg);
473  if (UseDstReg != ChosenDstReg) {
474  if (Preferred.Ty == UseDstTy) {
475  // If the use has the same type as the preferred use, then merge
476  // the vregs and erase the extend. For example:
477  // %1:_(s8) = G_LOAD ...
478  // %2:_(s32) = G_SEXT %1(s8)
479  // %3:_(s32) = G_ANYEXT %1(s8)
480  // ... = ... %3(s32)
481  // rewrites to:
482  // %2:_(s32) = G_SEXTLOAD ...
483  // ... = ... %2(s32)
484  replaceRegWith(MRI, UseDstReg, ChosenDstReg);
485  Observer.erasingInstr(*UseMO->getParent());
486  UseMO->getParent()->eraseFromParent();
487  } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
488  // If the preferred size is smaller, then keep the extend but extend
489  // from the result of the extending load. For example:
490  // %1:_(s8) = G_LOAD ...
491  // %2:_(s32) = G_SEXT %1(s8)
492  // %3:_(s64) = G_ANYEXT %1(s8)
493  // ... = ... %3(s64)
494  /// rewrites to:
495  // %2:_(s32) = G_SEXTLOAD ...
496  // %3:_(s64) = G_ANYEXT %2:_(s32)
497  // ... = ... %3(s64)
498  replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
499  } else {
500  // If the preferred size is large, then insert a truncate. For
501  // example:
502  // %1:_(s8) = G_LOAD ...
503  // %2:_(s64) = G_SEXT %1(s8)
504  // %3:_(s32) = G_ZEXT %1(s8)
505  // ... = ... %3(s32)
506  /// rewrites to:
507  // %2:_(s64) = G_SEXTLOAD ...
508  // %4:_(s8) = G_TRUNC %2:_(s32)
509  // %3:_(s64) = G_ZEXT %2:_(s8)
510  // ... = ... %3(s64)
511  InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
512  InsertTruncAt);
513  }
514  continue;
515  }
516  // The use is (one of) the uses of the preferred use we chose earlier.
517  // We're going to update the load to def this value later so just erase
518  // the old extend.
519  Observer.erasingInstr(*UseMO->getParent());
520  UseMO->getParent()->eraseFromParent();
521  continue;
522  }
523 
524  // The use isn't an extend. Truncate back to the type we originally loaded.
525  // This is free on many targets.
526  InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
527  }
528 
529  MI.getOperand(0).setReg(ChosenDstReg);
531 }
532 
534  assert(DefMI.getParent() == UseMI.getParent());
535  if (&DefMI == &UseMI)
536  return false;
537 
538  // Loop through the basic block until we find one of the instructions.
540  for (; &*I != &DefMI && &*I != &UseMI; ++I)
541  return &*I == &DefMI;
542 
543  llvm_unreachable("Block must contain instructions");
544 }
545 
547  if (MDT)
548  return MDT->dominates(&DefMI, &UseMI);
549  else if (DefMI.getParent() != UseMI.getParent())
550  return false;
551 
552  return isPredecessor(DefMI, UseMI);
553 }
554 
555 bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
557  auto &MF = *MI.getParent()->getParent();
558  const auto &TLI = *MF.getSubtarget().getTargetLowering();
559 
560 #ifndef NDEBUG
561  unsigned Opcode = MI.getOpcode();
562  assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
563  Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
564 #endif
565 
566  Base = MI.getOperand(1).getReg();
567  MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base);
568  if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
569  return false;
570 
571  LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
572 
573  for (auto &Use : MRI.use_instructions(Base)) {
574  if (Use.getOpcode() != TargetOpcode::G_GEP)
575  continue;
576 
577  Offset = Use.getOperand(2).getReg();
578  if (!ForceLegalIndexing &&
579  !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) {
580  LLVM_DEBUG(dbgs() << " Ignoring candidate with illegal addrmode: "
581  << Use);
582  continue;
583  }
584 
585  // Make sure the offset calculation is before the potentially indexed op.
586  // FIXME: we really care about dependency here. The offset calculation might
587  // be movable.
588  MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset);
589  if (!OffsetDef || !dominates(*OffsetDef, MI)) {
590  LLVM_DEBUG(dbgs() << " Ignoring candidate with offset after mem-op: "
591  << Use);
592  continue;
593  }
594 
595  // FIXME: check whether all uses of Base are load/store with foldable
596  // addressing modes. If so, using the normal addr-modes is better than
597  // forming an indexed one.
598 
599  bool MemOpDominatesAddrUses = true;
600  for (auto &GEPUse : MRI.use_instructions(Use.getOperand(0).getReg())) {
601  if (!dominates(MI, GEPUse)) {
602  MemOpDominatesAddrUses = false;
603  break;
604  }
605  }
606 
607  if (!MemOpDominatesAddrUses) {
608  LLVM_DEBUG(
609  dbgs() << " Ignoring candidate as memop does not dominate uses: "
610  << Use);
611  continue;
612  }
613 
614  LLVM_DEBUG(dbgs() << " Found match: " << Use);
615  Addr = Use.getOperand(0).getReg();
616  return true;
617  }
618 
619  return false;
620 }
621 
622 bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
623  Register &Base, Register &Offset) {
624  auto &MF = *MI.getParent()->getParent();
625  const auto &TLI = *MF.getSubtarget().getTargetLowering();
626 
627 #ifndef NDEBUG
628  unsigned Opcode = MI.getOpcode();
629  assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
630  Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
631 #endif
632 
633  Addr = MI.getOperand(1).getReg();
634  MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_GEP, Addr, MRI);
635  if (!AddrDef || MRI.hasOneUse(Addr))
636  return false;
637 
638  Base = AddrDef->getOperand(1).getReg();
639  Offset = AddrDef->getOperand(2).getReg();
640 
641  LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI);
642 
643  if (!ForceLegalIndexing &&
644  !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) {
645  LLVM_DEBUG(dbgs() << " Skipping, not legal for target");
646  return false;
647  }
648 
649  MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
650  if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
651  LLVM_DEBUG(dbgs() << " Skipping, frame index would need copy anyway.");
652  return false;
653  }
654 
655  if (MI.getOpcode() == TargetOpcode::G_STORE) {
656  // Would require a copy.
657  if (Base == MI.getOperand(0).getReg()) {
658  LLVM_DEBUG(dbgs() << " Skipping, storing base so need copy anyway.");
659  return false;
660  }
661 
662  // We're expecting one use of Addr in MI, but it could also be the
663  // value stored, which isn't actually dominated by the instruction.
664  if (MI.getOperand(0).getReg() == Addr) {
665  LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses");
666  return false;
667  }
668  }
669 
670  // FIXME: check whether all uses of the base pointer are constant GEPs. That
671  // might allow us to end base's liveness here by adjusting the constant.
672 
673  for (auto &UseMI : MRI.use_instructions(Addr)) {
674  if (!dominates(MI, UseMI)) {
675  LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses.");
676  return false;
677  }
678  }
679 
680  return true;
681 }
682 
684  unsigned Opcode = MI.getOpcode();
685  if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
686  Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
687  return false;
688 
689  bool IsStore = Opcode == TargetOpcode::G_STORE;
690  Register Addr, Base, Offset;
691  bool IsPre = findPreIndexCandidate(MI, Addr, Base, Offset);
692  if (!IsPre && !findPostIndexCandidate(MI, Addr, Base, Offset))
693  return false;
694 
695 
696  unsigned NewOpcode;
697  switch (Opcode) {
698  case TargetOpcode::G_LOAD:
699  NewOpcode = TargetOpcode::G_INDEXED_LOAD;
700  break;
701  case TargetOpcode::G_SEXTLOAD:
702  NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
703  break;
704  case TargetOpcode::G_ZEXTLOAD:
705  NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
706  break;
707  case TargetOpcode::G_STORE:
708  NewOpcode = TargetOpcode::G_INDEXED_STORE;
709  break;
710  default:
711  llvm_unreachable("Unknown load/store opcode");
712  }
713 
714  MachineInstr &AddrDef = *MRI.getUniqueVRegDef(Addr);
715  MachineIRBuilder MIRBuilder(MI);
716  auto MIB = MIRBuilder.buildInstr(NewOpcode);
717  if (IsStore) {
718  MIB.addDef(Addr);
719  MIB.addUse(MI.getOperand(0).getReg());
720  } else {
721  MIB.addDef(MI.getOperand(0).getReg());
722  MIB.addDef(Addr);
723  }
724 
725  MIB.addUse(Base);
726  MIB.addUse(Offset);
727  MIB.addImm(IsPre);
728  MI.eraseFromParent();
729  AddrDef.eraseFromParent();
730 
731  LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
732  return true;
733 }
734 
736  if (MI.getOpcode() != TargetOpcode::G_BR)
737  return false;
738 
739  // Try to match the following:
740  // bb1:
741  // %c(s32) = G_ICMP pred, %a, %b
742  // %c1(s1) = G_TRUNC %c(s32)
743  // G_BRCOND %c1, %bb2
744  // G_BR %bb3
745  // bb2:
746  // ...
747  // bb3:
748 
749  // The above pattern does not have a fall through to the successor bb2, always
750  // resulting in a branch no matter which path is taken. Here we try to find
751  // and replace that pattern with conditional branch to bb3 and otherwise
752  // fallthrough to bb2.
753 
754  MachineBasicBlock *MBB = MI.getParent();
756  if (BrIt == MBB->begin())
757  return false;
758  assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
759 
760  MachineInstr *BrCond = &*std::prev(BrIt);
761  if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
762  return false;
763 
764  // Check that the next block is the conditional branch target.
765  if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB()))
766  return false;
767 
768  MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
769  if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP ||
770  !MRI.hasOneUse(CmpMI->getOperand(0).getReg()))
771  return false;
772  return true;
773 }
774 
777  return false;
779  return true;
780 }
781 
783  MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
785  MachineInstr *BrCond = &*std::prev(BrIt);
786  MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
787 
790 
791  // Invert the G_ICMP condition.
792  Observer.changingInstr(*CmpMI);
793  CmpMI->getOperand(1).setPredicate(InversePred);
794  Observer.changedInstr(*CmpMI);
795 
796  // Change the conditional branch target.
797  Observer.changingInstr(*BrCond);
798  BrCond->getOperand(1).setMBB(BrTarget);
799  Observer.changedInstr(*BrCond);
800  MI.eraseFromParent();
801 }
802 
804  // On Darwin, -Os means optimize for size without hurting performance, so
805  // only really optimize for size when -Oz (MinSize) is used.
806  if (MF.getTarget().getTargetTriple().isOSDarwin())
807  return MF.getFunction().hasMinSize();
808  return MF.getFunction().hasOptSize();
809 }
810 
811 // Returns a list of types to use for memory op lowering in MemOps. A partial
812 // port of findOptimalMemOpLowering in TargetLowering.
814  std::vector<LLT> &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign,
815  unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
816  bool AllowOverlap, unsigned DstAS, unsigned SrcAS,
817  const AttributeList &FuncAttributes, const TargetLowering &TLI) {
818  // If 'SrcAlign' is zero, that means the memory operation does not need to
819  // load the value, i.e. memset or memcpy from constant string. Otherwise,
820  // it's the inferred alignment of the source. 'DstAlign', on the other hand,
821  // is the specified alignment of the memory operation. If it is zero, that
822  // means it's possible to change the alignment of the destination.
823  // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
824  // not need to be loaded.
825  if (SrcAlign != 0 && SrcAlign < DstAlign)
826  return false;
827 
828  LLT Ty = TLI.getOptimalMemOpLLT(Size, DstAlign, SrcAlign, IsMemset,
829  ZeroMemset, MemcpyStrSrc, FuncAttributes);
830 
831  if (Ty == LLT()) {
832  // Use the largest scalar type whose alignment constraints are satisfied.
833  // We only need to check DstAlign here as SrcAlign is always greater or
834  // equal to DstAlign (or zero).
835  Ty = LLT::scalar(64);
836  while (DstAlign && DstAlign < Ty.getSizeInBytes() &&
837  !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, DstAlign))
838  Ty = LLT::scalar(Ty.getSizeInBytes());
839  assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
840  // FIXME: check for the largest legal type we can load/store to.
841  }
842 
843  unsigned NumMemOps = 0;
844  while (Size != 0) {
845  unsigned TySize = Ty.getSizeInBytes();
846  while (TySize > Size) {
847  // For now, only use non-vector load / store's for the left-over pieces.
848  LLT NewTy = Ty;
849  // FIXME: check for mem op safety and legality of the types. Not all of
850  // SDAGisms map cleanly to GISel concepts.
851  if (NewTy.isVector())
852  NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
853  NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
854  unsigned NewTySize = NewTy.getSizeInBytes();
855  assert(NewTySize > 0 && "Could not find appropriate type");
856 
857  // If the new LLT cannot cover all of the remaining bits, then consider
858  // issuing a (or a pair of) unaligned and overlapping load / store.
859  bool Fast;
860  // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
861  MVT VT = getMVTForLLT(Ty);
862  if (NumMemOps && AllowOverlap && NewTySize < Size &&
864  VT, DstAS, DstAlign, MachineMemOperand::MONone, &Fast) &&
865  Fast)
866  TySize = Size;
867  else {
868  Ty = NewTy;
869  TySize = NewTySize;
870  }
871  }
872 
873  if (++NumMemOps > Limit)
874  return false;
875 
876  MemOps.push_back(Ty);
877  Size -= TySize;
878  }
879 
880  return true;
881 }
882 
884  if (Ty.isVector())
886  Ty.getNumElements());
887  return IntegerType::get(C, Ty.getSizeInBits());
888 }
889 
890 // Get a vectorized representation of the memset value operand, GISel edition.
892  MachineRegisterInfo &MRI = *MIB.getMRI();
893  unsigned NumBits = Ty.getScalarSizeInBits();
894  auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
895  if (!Ty.isVector() && ValVRegAndVal) {
896  unsigned KnownVal = ValVRegAndVal->Value;
897  APInt Scalar = APInt(8, KnownVal);
898  APInt SplatVal = APInt::getSplat(NumBits, Scalar);
899  return MIB.buildConstant(Ty, SplatVal).getReg(0);
900  }
901  // FIXME: for vector types create a G_BUILD_VECTOR.
902  if (Ty.isVector())
903  return Register();
904 
905  // Extend the byte value to the larger type, and then multiply by a magic
906  // value 0x010101... in order to replicate it across every byte.
907  LLT ExtType = Ty.getScalarType();
908  auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
909  if (NumBits > 8) {
910  APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
911  auto MagicMI = MIB.buildConstant(ExtType, Magic);
912  Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
913  }
914 
915  assert(ExtType == Ty && "Vector memset value type not supported yet");
916  return Val;
917 }
918 
919 bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, Register Val,
920  unsigned KnownLen, unsigned Align,
921  bool IsVolatile) {
922  auto &MF = *MI.getParent()->getParent();
923  const auto &TLI = *MF.getSubtarget().getTargetLowering();
924  auto &DL = MF.getDataLayout();
926 
927  assert(KnownLen != 0 && "Have a zero length memset length!");
928 
929  bool DstAlignCanChange = false;
930  MachineFrameInfo &MFI = MF.getFrameInfo();
931  bool OptSize = shouldLowerMemFuncForSize(MF);
932 
933  MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
934  if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
935  DstAlignCanChange = true;
936 
937  unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
938  std::vector<LLT> MemOps;
939 
940  const auto &DstMMO = **MI.memoperands_begin();
941  MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
942 
943  auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
944  bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
945 
947  MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Align), 0,
948  /*IsMemset=*/true,
949  /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false,
950  /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(), ~0u,
951  MF.getFunction().getAttributes(), TLI))
952  return false;
953 
954  if (DstAlignCanChange) {
955  // Get an estimate of the type from the LLT.
956  Type *IRTy = getTypeForLLT(MemOps[0], C);
957  unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy);
958  if (NewAlign > Align) {
959  Align = NewAlign;
960  unsigned FI = FIDef->getOperand(1).getIndex();
961  // Give the stack frame object a larger alignment if needed.
962  if (MFI.getObjectAlignment(FI) < Align)
963  MFI.setObjectAlignment(FI, Align);
964  }
965  }
966 
967  MachineIRBuilder MIB(MI);
968  // Find the largest store and generate the bit pattern for it.
969  LLT LargestTy = MemOps[0];
970  for (unsigned i = 1; i < MemOps.size(); i++)
971  if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
972  LargestTy = MemOps[i];
973 
974  // The memset stored value is always defined as an s8, so in order to make it
975  // work with larger store types we need to repeat the bit pattern across the
976  // wider type.
977  Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
978 
979  if (!MemSetValue)
980  return false;
981 
982  // Generate the stores. For each store type in the list, we generate the
983  // matching store of that type to the destination address.
984  LLT PtrTy = MRI.getType(Dst);
985  unsigned DstOff = 0;
986  unsigned Size = KnownLen;
987  for (unsigned I = 0; I < MemOps.size(); I++) {
988  LLT Ty = MemOps[I];
989  unsigned TySize = Ty.getSizeInBytes();
990  if (TySize > Size) {
991  // Issuing an unaligned load / store pair that overlaps with the previous
992  // pair. Adjust the offset accordingly.
993  assert(I == MemOps.size() - 1 && I != 0);
994  DstOff -= TySize - Size;
995  }
996 
997  // If this store is smaller than the largest store see whether we can get
998  // the smaller value for free with a truncate.
999  Register Value = MemSetValue;
1000  if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
1001  MVT VT = getMVTForLLT(Ty);
1002  MVT LargestVT = getMVTForLLT(LargestTy);
1003  if (!LargestTy.isVector() && !Ty.isVector() &&
1004  TLI.isTruncateFree(LargestVT, VT))
1005  Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
1006  else
1007  Value = getMemsetValue(Val, Ty, MIB);
1008  if (!Value)
1009  return false;
1010  }
1011 
1012  auto *StoreMMO =
1013  MF.getMachineMemOperand(&DstMMO, DstOff, Ty.getSizeInBytes());
1014 
1015  Register Ptr = Dst;
1016  if (DstOff != 0) {
1017  auto Offset =
1018  MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
1019  Ptr = MIB.buildGEP(PtrTy, Dst, Offset).getReg(0);
1020  }
1021 
1022  MIB.buildStore(Value, Ptr, *StoreMMO);
1023  DstOff += Ty.getSizeInBytes();
1024  Size -= TySize;
1025  }
1026 
1027  MI.eraseFromParent();
1028  return true;
1029 }
1030 
1031 
1032 bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
1033  Register Src, unsigned KnownLen,
1034  unsigned DstAlign, unsigned SrcAlign,
1035  bool IsVolatile) {
1036  auto &MF = *MI.getParent()->getParent();
1037  const auto &TLI = *MF.getSubtarget().getTargetLowering();
1038  auto &DL = MF.getDataLayout();
1039  LLVMContext &C = MF.getFunction().getContext();
1040 
1041  assert(KnownLen != 0 && "Have a zero length memcpy length!");
1042 
1043  bool DstAlignCanChange = false;
1044  MachineFrameInfo &MFI = MF.getFrameInfo();
1045  bool OptSize = shouldLowerMemFuncForSize(MF);
1046  unsigned Alignment = MinAlign(DstAlign, SrcAlign);
1047 
1048  MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1049  if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1050  DstAlignCanChange = true;
1051 
1052  // FIXME: infer better src pointer alignment like SelectionDAG does here.
1053  // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
1054  // if the memcpy is in a tail call position.
1055 
1056  unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize);
1057  std::vector<LLT> MemOps;
1058 
1059  const auto &DstMMO = **MI.memoperands_begin();
1060  const auto &SrcMMO = **std::next(MI.memoperands_begin());
1061  MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1062  MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1063 
1065  MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment),
1066  SrcAlign,
1067  /*IsMemset=*/false,
1068  /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
1069  /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(),
1070  SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI))
1071  return false;
1072 
1073  if (DstAlignCanChange) {
1074  // Get an estimate of the type from the LLT.
1075  Type *IRTy = getTypeForLLT(MemOps[0], C);
1076  unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy);
1077 
1078  // Don't promote to an alignment that would require dynamic stack
1079  // realignment.
1080  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1081  if (!TRI->needsStackRealignment(MF))
1082  while (NewAlign > Alignment &&
1083  DL.exceedsNaturalStackAlignment(Align(NewAlign)))
1084  NewAlign /= 2;
1085 
1086  if (NewAlign > Alignment) {
1087  Alignment = NewAlign;
1088  unsigned FI = FIDef->getOperand(1).getIndex();
1089  // Give the stack frame object a larger alignment if needed.
1090  if (MFI.getObjectAlignment(FI) < Alignment)
1091  MFI.setObjectAlignment(FI, Alignment);
1092  }
1093  }
1094 
1095  LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
1096 
1097  MachineIRBuilder MIB(MI);
1098  // Now we need to emit a pair of load and stores for each of the types we've
1099  // collected. I.e. for each type, generate a load from the source pointer of
1100  // that type width, and then generate a corresponding store to the dest buffer
1101  // of that value loaded. This can result in a sequence of loads and stores
1102  // mixed types, depending on what the target specifies as good types to use.
1103  unsigned CurrOffset = 0;
1104  LLT PtrTy = MRI.getType(Src);
1105  unsigned Size = KnownLen;
1106  for (auto CopyTy : MemOps) {
1107  // Issuing an unaligned load / store pair that overlaps with the previous
1108  // pair. Adjust the offset accordingly.
1109  if (CopyTy.getSizeInBytes() > Size)
1110  CurrOffset -= CopyTy.getSizeInBytes() - Size;
1111 
1112  // Construct MMOs for the accesses.
1113  auto *LoadMMO =
1114  MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1115  auto *StoreMMO =
1116  MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1117 
1118  // Create the load.
1119  Register LoadPtr = Src;
1120  Register Offset;
1121  if (CurrOffset != 0) {
1122  Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
1123  .getReg(0);
1124  LoadPtr = MIB.buildGEP(PtrTy, Src, Offset).getReg(0);
1125  }
1126  auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
1127 
1128  // Create the store.
1129  Register StorePtr =
1130  CurrOffset == 0 ? Dst : MIB.buildGEP(PtrTy, Dst, Offset).getReg(0);
1131  MIB.buildStore(LdVal, StorePtr, *StoreMMO);
1132  CurrOffset += CopyTy.getSizeInBytes();
1133  Size -= CopyTy.getSizeInBytes();
1134  }
1135 
1136  MI.eraseFromParent();
1137  return true;
1138 }
1139 
1140 bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
1141  Register Src, unsigned KnownLen,
1142  unsigned DstAlign, unsigned SrcAlign,
1143  bool IsVolatile) {
1144  auto &MF = *MI.getParent()->getParent();
1145  const auto &TLI = *MF.getSubtarget().getTargetLowering();
1146  auto &DL = MF.getDataLayout();
1147  LLVMContext &C = MF.getFunction().getContext();
1148 
1149  assert(KnownLen != 0 && "Have a zero length memmove length!");
1150 
1151  bool DstAlignCanChange = false;
1152  MachineFrameInfo &MFI = MF.getFrameInfo();
1153  bool OptSize = shouldLowerMemFuncForSize(MF);
1154  unsigned Alignment = MinAlign(DstAlign, SrcAlign);
1155 
1156  MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1157  if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1158  DstAlignCanChange = true;
1159 
1160  unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
1161  std::vector<LLT> MemOps;
1162 
1163  const auto &DstMMO = **MI.memoperands_begin();
1164  const auto &SrcMMO = **std::next(MI.memoperands_begin());
1165  MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1166  MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1167 
1168  // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
1169  // to a bug in it's findOptimalMemOpLowering implementation. For now do the
1170  // same thing here.
1172  MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment),
1173  SrcAlign,
1174  /*IsMemset=*/false,
1175  /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
1176  /*AllowOverlap=*/false, DstPtrInfo.getAddrSpace(),
1177  SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI))
1178  return false;
1179 
1180  if (DstAlignCanChange) {
1181  // Get an estimate of the type from the LLT.
1182  Type *IRTy = getTypeForLLT(MemOps[0], C);
1183  unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy);
1184 
1185  // Don't promote to an alignment that would require dynamic stack
1186  // realignment.
1187  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1188  if (!TRI->needsStackRealignment(MF))
1189  while (NewAlign > Alignment &&
1190  DL.exceedsNaturalStackAlignment(Align(NewAlign)))
1191  NewAlign /= 2;
1192 
1193  if (NewAlign > Alignment) {
1194  Alignment = NewAlign;
1195  unsigned FI = FIDef->getOperand(1).getIndex();
1196  // Give the stack frame object a larger alignment if needed.
1197  if (MFI.getObjectAlignment(FI) < Alignment)
1198  MFI.setObjectAlignment(FI, Alignment);
1199  }
1200  }
1201 
1202  LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
1203 
1204  MachineIRBuilder MIB(MI);
1205  // Memmove requires that we perform the loads first before issuing the stores.
1206  // Apart from that, this loop is pretty much doing the same thing as the
1207  // memcpy codegen function.
1208  unsigned CurrOffset = 0;
1209  LLT PtrTy = MRI.getType(Src);
1210  SmallVector<Register, 16> LoadVals;
1211  for (auto CopyTy : MemOps) {
1212  // Construct MMO for the load.
1213  auto *LoadMMO =
1214  MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1215 
1216  // Create the load.
1217  Register LoadPtr = Src;
1218  if (CurrOffset != 0) {
1219  auto Offset =
1220  MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1221  LoadPtr = MIB.buildGEP(PtrTy, Src, Offset).getReg(0);
1222  }
1223  LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
1224  CurrOffset += CopyTy.getSizeInBytes();
1225  }
1226 
1227  CurrOffset = 0;
1228  for (unsigned I = 0; I < MemOps.size(); ++I) {
1229  LLT CopyTy = MemOps[I];
1230  // Now store the values loaded.
1231  auto *StoreMMO =
1232  MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1233 
1234  Register StorePtr = Dst;
1235  if (CurrOffset != 0) {
1236  auto Offset =
1237  MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1238  StorePtr = MIB.buildGEP(PtrTy, Dst, Offset).getReg(0);
1239  }
1240  MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
1241  CurrOffset += CopyTy.getSizeInBytes();
1242  }
1243  MI.eraseFromParent();
1244  return true;
1245 }
1246 
1248  // This combine is fairly complex so it's not written with a separate
1249  // matcher function.
1250  assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1252  assert((ID == Intrinsic::memcpy || ID == Intrinsic::memmove ||
1253  ID == Intrinsic::memset) &&
1254  "Expected a memcpy like intrinsic");
1255 
1256  auto MMOIt = MI.memoperands_begin();
1257  const MachineMemOperand *MemOp = *MMOIt;
1258  bool IsVolatile = MemOp->isVolatile();
1259  // Don't try to optimize volatile.
1260  if (IsVolatile)
1261  return false;
1262 
1263  unsigned DstAlign = MemOp->getBaseAlignment();
1264  unsigned SrcAlign = 0;
1265  Register Dst = MI.getOperand(1).getReg();
1266  Register Src = MI.getOperand(2).getReg();
1267  Register Len = MI.getOperand(3).getReg();
1268 
1269  if (ID != Intrinsic::memset) {
1270  assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
1271  MemOp = *(++MMOIt);
1272  SrcAlign = MemOp->getBaseAlignment();
1273  }
1274 
1275  // See if this is a constant length copy
1276  auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
1277  if (!LenVRegAndVal)
1278  return false; // Leave it to the legalizer to lower it to a libcall.
1279  unsigned KnownLen = LenVRegAndVal->Value;
1280 
1281  if (KnownLen == 0) {
1282  MI.eraseFromParent();
1283  return true;
1284  }
1285 
1286  if (MaxLen && KnownLen > MaxLen)
1287  return false;
1288 
1289  if (ID == Intrinsic::memcpy)
1290  return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1291  if (ID == Intrinsic::memmove)
1292  return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1293  if (ID == Intrinsic::memset)
1294  return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
1295  return false;
1296 }
1297 
1299  if (tryCombineCopy(MI))
1300  return true;
1301  if (tryCombineExtendingLoads(MI))
1302  return true;
1304  return true;
1305  return false;
1306 }
uint64_t CallInst * C
bool isOSDarwin() const
isOSDarwin - Is this a "Darwin" OS (OS X, iOS, or watchOS).
Definition: Triple.h:481
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
bool dominates(MachineInstr &DefMI, MachineInstr &UseMI)
Returns true if DefMI dominates UseMI.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo)
MachineBasicBlock * getMBB() const
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Register getReg(unsigned Idx) const
Get the register for the operand index.
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:627
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
unsigned getScalarSizeInBits() const
unsigned getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
bool tryCombine(MachineInstr &MI)
Try to transform MI by using all of the above combine functions.
unsigned Reg
static cl::opt< bool > ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), cl::desc("Force all indexed operations to be " "legal for the GlobalISel combiner"))
static Type * getTypeForLLT(LLT Ty, LLVMContext &C)
virtual const TargetLowering * getTargetLowering() const
LLT getScalarType() const
bool constrainRegAttrs(unsigned Reg, unsigned ConstrainingReg, unsigned MinNumRegs=0)
Constrain the register class or the register bank of the virtual register Reg (and low-level type) to...
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
void applyElideBrByInvertingCond(MachineInstr &MI)
bool matchCombineShuffleVector(MachineInstr &MI, SmallVectorImpl< Register > &Ops)
Check if the G_SHUFFLE_VECTOR MI can be replaced by a concat_vectors.
unsigned const TargetRegisterInfo * TRI
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:476
bool isPHI() const
GISelChangeObserver & Observer
uint64_t getBaseAlignment() const
Return the minimum known alignment in bytes of the base address, without the offset.
void applyCombineConcatVectors(MachineInstr &MI, bool IsUndef, const ArrayRef< Register > Ops)
Replace MI with a flattened build_vector with Ops or an implicit_def if IsUndef is true...
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:303
unsigned getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
void applyCombineCopy(MachineInstr &MI)
bool isVector() const
bool tryElideBrByInvertingCond(MachineInstr &MI)
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo)
virtual LLT getOptimalMemOpLLT(uint64_t, unsigned, unsigned, bool, bool, bool, const AttributeList &) const
LLT returning variant.
A description of a memory reference used in the backend.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:831
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:410
MachineIRBuilder & Builder
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool matchElideBrByInvertingCond(MachineInstr &MI)
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
void applyCombineShuffleVector(MachineInstr &MI, const ArrayRef< Register > Ops)
Replace MI with a concat_vectors with Ops.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
void setReg(Register Reg)
Change the register this operand corresponds to.
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
void finishedChangingAllUsesOfReg()
All instructions reported as changing by changingAllUsesOfReg() have finished being changed...
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
Definition: Utils.cpp:435
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:661
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
MachineRegisterInfo * getMRI()
Getter for MRI.
Abstract class that contains various methods for clients to notify about changes. ...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:465
Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder & UseMI
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, unsigned Align=1, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *=nullptr) const
Determine if the target supports unaligned memory accesses.
Helper class to build MachineInstr.
void setMBB(MachineBasicBlock *MBB)
MachineDominatorTree * MDT
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
bool isValid() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
bool tryCombineExtendingLoads(MachineInstr &MI)
If MI is extend that consumes the result of a load, try to combine it.
const Triple & getTargetTriple() const
Optional< ValueAndVReg > getConstantVRegValWithLookThrough(unsigned VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool HandleFConstants=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_F/CONSTANT (LookThro...
Definition: Utils.cpp:218
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:59
MachineRegisterInfo & MRI
MachineInstrBuilder buildGEP(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert Res = G_GEP Op0, Op1.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
virtual void erasingInstr(MachineInstr &MI)=0
An instruction is about to be erased.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC Op.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:319
This class contains a discriminated union of information about pointers in memory operands...
const Constant * getShuffleMask() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
static const char *const Magic
Definition: Archive.cpp:41
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
bool tryCombineCopy(MachineInstr &MI)
If MI is COPY, try to combine it.
void changingAllUsesOfReg(const MachineRegisterInfo &MRI, unsigned Reg)
All the instructions using the given register are being changed.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:244
bool matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef, SmallVectorImpl< Register > &Ops)
Check if the G_CONCAT_VECTORS MI is undef or if it can be flattened into a build_vector.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Build and insert Res = G_MUL Op0, Op1.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:551
iterator_range< use_iterator > use_operands(unsigned Reg) const
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
MachineInstrBuilder MachineInstrBuilder & DefMI
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:577
bool matchCombineCopy(MachineInstr &MI)
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Promote Memory to Register
Definition: Mem2Reg.cpp:109
const TargetInstrInfo & getTII()
MachineInstr * MI
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
This file declares the MachineIRBuilder class.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Class for arbitrary precision integers.
Definition: APInt.h:69
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, bool AllowOverlap, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
void replaceRegWith(unsigned FromReg, unsigned ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
bool tryCombineShuffleVector(MachineInstr &MI)
Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
bool isPredecessor(MachineInstr &DefMI, MachineInstr &UseMI)
Returns true if DefMI precedes UseMI or they are the same instruction.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:255
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
void setPredicate(unsigned Predicate)
Representation of each machine instruction.
Definition: MachineInstr.h:63
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
bool hasOneUse(unsigned RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:44
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:614
#define I(x, y, z)
Definition: MD5.cpp:58
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:624
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Optimize memcpy intrinsics et al, e.g.
uint32_t Size
Definition: Profile.cpp:46
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:185
iterator_range< use_instr_iterator > use_instructions(unsigned Reg) const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SmallVector< int, 16 > getShuffleMask() const
unsigned getIntrinsicID() const
Returns the Intrinsic::ID for this instruction.
uint64_t PowerOf2Floor(uint64_t A)
Returns the power of two which is less than or equal to the given value.
Definition: MathExtras.h:684
bool needsStackRealignment(const MachineFunction &MF) const
True if storage within the function requires the stack pointer to be aligned more than the normal cal...
LLVM Value Representation.
Definition: Value.h:74
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
print Print MemDeps of function
IRTranslator LLVM IR MI
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
Register getReg() const
getReg - Returns the register number.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, GISelKnownBits *KB=nullptr, MachineDominatorTree *MDT=nullptr)
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:415
bool tryCombineConcatVectors(MachineInstr &MI)
If MI is G_CONCAT_VECTORS, try to combine it.
void setObjectAlignment(int ObjectIdx, unsigned Align)
setObjectAlignment - Change the alignment of the specified stack object.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
This file describes how to lower LLVM code to machine code.
mmo_iterator memoperands_end() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:558
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes...
unsigned getPredicate() const
bool tryCombineIndexedLoadStore(MachineInstr &MI)
Combine MI into a pre-indexed or post-indexed load/store operation if legal and the surrounding code ...