LLVM  15.0.0git
VectorCombine.cpp
Go to the documentation of this file.
1 //===------- VectorCombine.cpp - Optimize partial vector operations -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass optimizes scalar/vector interactions using target cost models. The
10 // transforms implemented here may not fit in traditional loop-based or SLP
11 // vectorization passes.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/Statistic.h"
20 #include "llvm/Analysis/Loads.h"
24 #include "llvm/IR/Dominators.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/IR/PatternMatch.h"
28 #include "llvm/InitializePasses.h"
29 #include "llvm/Pass.h"
33 
34 #define DEBUG_TYPE "vector-combine"
36 
37 using namespace llvm;
38 using namespace llvm::PatternMatch;
39 
40 STATISTIC(NumVecLoad, "Number of vector loads formed");
41 STATISTIC(NumVecCmp, "Number of vector compares formed");
42 STATISTIC(NumVecBO, "Number of vector binops formed");
43 STATISTIC(NumVecCmpBO, "Number of vector compare + binop formed");
44 STATISTIC(NumShufOfBitcast, "Number of shuffles moved after bitcast");
45 STATISTIC(NumScalarBO, "Number of scalar binops formed");
46 STATISTIC(NumScalarCmp, "Number of scalar compares formed");
47 
49  "disable-vector-combine", cl::init(false), cl::Hidden,
50  cl::desc("Disable all vector combine transforms"));
51 
53  "disable-binop-extract-shuffle", cl::init(false), cl::Hidden,
54  cl::desc("Disable binop extract to shuffle transforms"));
55 
57  "vector-combine-max-scan-instrs", cl::init(30), cl::Hidden,
58  cl::desc("Max number of instructions to scan for vector combining."));
59 
61 
62 namespace {
63 class VectorCombine {
64 public:
65  VectorCombine(Function &F, const TargetTransformInfo &TTI,
66  const DominatorTree &DT, AAResults &AA, AssumptionCache &AC,
67  bool ScalarizationOnly)
68  : F(F), Builder(F.getContext()), TTI(TTI), DT(DT), AA(AA), AC(AC),
69  ScalarizationOnly(ScalarizationOnly) {}
70 
71  bool run();
72 
73 private:
74  Function &F;
76  const TargetTransformInfo &TTI;
77  const DominatorTree &DT;
78  AAResults &AA;
79  AssumptionCache ∾
80 
81  /// If true only perform scalarization combines and do not introduce new
82  /// vector operations.
83  bool ScalarizationOnly;
84 
85  InstructionWorklist Worklist;
86 
87  bool vectorizeLoadInsert(Instruction &I);
88  ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
89  ExtractElementInst *Ext1,
90  unsigned PreferredExtractIndex) const;
91  bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
92  const Instruction &I,
93  ExtractElementInst *&ConvertToShuffle,
94  unsigned PreferredExtractIndex);
95  void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
96  Instruction &I);
97  void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
98  Instruction &I);
99  bool foldExtractExtract(Instruction &I);
100  bool foldBitcastShuf(Instruction &I);
101  bool scalarizeBinopOrCmp(Instruction &I);
102  bool foldExtractedCmps(Instruction &I);
103  bool foldSingleElementStore(Instruction &I);
104  bool scalarizeLoadExtract(Instruction &I);
105  bool foldShuffleOfBinops(Instruction &I);
106  bool foldShuffleFromReductions(Instruction &I);
107  bool foldSelectShuffle(Instruction &I, bool FromReduction = false);
108 
109  void replaceValue(Value &Old, Value &New) {
110  Old.replaceAllUsesWith(&New);
111  if (auto *NewI = dyn_cast<Instruction>(&New)) {
112  New.takeName(&Old);
113  Worklist.pushUsersToWorkList(*NewI);
114  Worklist.pushValue(NewI);
115  }
116  Worklist.pushValue(&Old);
117  }
118 
120  for (Value *Op : I.operands())
121  Worklist.pushValue(Op);
122  Worklist.remove(&I);
123  I.eraseFromParent();
124  }
125 };
126 } // namespace
127 
128 bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
129  // Match insert into fixed vector of scalar value.
130  // TODO: Handle non-zero insert index.
131  auto *Ty = dyn_cast<FixedVectorType>(I.getType());
132  Value *Scalar;
133  if (!Ty || !match(&I, m_InsertElt(m_Undef(), m_Value(Scalar), m_ZeroInt())) ||
134  !Scalar->hasOneUse())
135  return false;
136 
137  // Optionally match an extract from another vector.
138  Value *X;
139  bool HasExtract = match(Scalar, m_ExtractElt(m_Value(X), m_ZeroInt()));
140  if (!HasExtract)
141  X = Scalar;
142 
143  // Match source value as load of scalar or vector.
144  // Do not vectorize scalar load (widening) if atomic/volatile or under
145  // asan/hwasan/memtag/tsan. The widened load may load data from dirty regions
146  // or create data races non-existent in the source.
147  auto *Load = dyn_cast<LoadInst>(X);
148  if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
149  Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
151  return false;
152 
153  const DataLayout &DL = I.getModule()->getDataLayout();
154  Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts();
155  assert(isa<PointerType>(SrcPtr->getType()) && "Expected a pointer type");
156 
157  unsigned AS = Load->getPointerAddressSpace();
158 
159  // We are potentially transforming byte-sized (8-bit) memory accesses, so make
160  // sure we have all of our type-based constraints in place for this target.
161  Type *ScalarTy = Scalar->getType();
162  uint64_t ScalarSize = ScalarTy->getPrimitiveSizeInBits();
163  unsigned MinVectorSize = TTI.getMinVectorRegisterBitWidth();
164  if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
165  ScalarSize % 8 != 0)
166  return false;
167 
168  // Check safety of replacing the scalar load with a larger vector load.
169  // We use minimal alignment (maximum flexibility) because we only care about
170  // the dereferenceable region. When calculating cost and creating a new op,
171  // we may use a larger value based on alignment attributes.
172  unsigned MinVecNumElts = MinVectorSize / ScalarSize;
173  auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts, false);
174  unsigned OffsetEltIndex = 0;
175  Align Alignment = Load->getAlign();
176  if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT)) {
177  // It is not safe to load directly from the pointer, but we can still peek
178  // through gep offsets and check if it safe to load from a base address with
179  // updated alignment. If it is, we can shuffle the element(s) into place
180  // after loading.
181  unsigned OffsetBitWidth = DL.getIndexTypeSizeInBits(SrcPtr->getType());
182  APInt Offset(OffsetBitWidth, 0);
183  SrcPtr = SrcPtr->stripAndAccumulateInBoundsConstantOffsets(DL, Offset);
184 
185  // We want to shuffle the result down from a high element of a vector, so
186  // the offset must be positive.
187  if (Offset.isNegative())
188  return false;
189 
190  // The offset must be a multiple of the scalar element to shuffle cleanly
191  // in the element's size.
192  uint64_t ScalarSizeInBytes = ScalarSize / 8;
193  if (Offset.urem(ScalarSizeInBytes) != 0)
194  return false;
195 
196  // If we load MinVecNumElts, will our target element still be loaded?
197  OffsetEltIndex = Offset.udiv(ScalarSizeInBytes).getZExtValue();
198  if (OffsetEltIndex >= MinVecNumElts)
199  return false;
200 
201  if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT))
202  return false;
203 
204  // Update alignment with offset value. Note that the offset could be negated
205  // to more accurately represent "(new) SrcPtr - Offset = (old) SrcPtr", but
206  // negation does not change the result of the alignment calculation.
207  Alignment = commonAlignment(Alignment, Offset.getZExtValue());
208  }
209 
210  // Original pattern: insertelt undef, load [free casts of] PtrOp, 0
211  // Use the greater of the alignment on the load or its source pointer.
212  Alignment = std::max(SrcPtr->getPointerAlignment(DL), Alignment);
213  Type *LoadTy = Load->getType();
214  InstructionCost OldCost =
215  TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS);
216  APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0);
217  OldCost += TTI.getScalarizationOverhead(MinVecTy, DemandedElts,
218  /* Insert */ true, HasExtract);
219 
220  // New pattern: load VecPtr
221  InstructionCost NewCost =
222  TTI.getMemoryOpCost(Instruction::Load, MinVecTy, Alignment, AS);
223  // Optionally, we are shuffling the loaded vector element(s) into place.
224  // For the mask set everything but element 0 to undef to prevent poison from
225  // propagating from the extra loaded memory. This will also optionally
226  // shrink/grow the vector from the loaded size to the output size.
227  // We assume this operation has no cost in codegen if there was no offset.
228  // Note that we could use freeze to avoid poison problems, but then we might
229  // still need a shuffle to change the vector size.
230  unsigned OutputNumElts = Ty->getNumElements();
231  SmallVector<int, 16> Mask(OutputNumElts, UndefMaskElem);
232  assert(OffsetEltIndex < MinVecNumElts && "Address offset too big");
233  Mask[0] = OffsetEltIndex;
234  if (OffsetEltIndex)
235  NewCost += TTI.getShuffleCost(TTI::SK_PermuteSingleSrc, MinVecTy, Mask);
236 
237  // We can aggressively convert to the vector form because the backend can
238  // invert this transform if it does not result in a performance win.
239  if (OldCost < NewCost || !NewCost.isValid())
240  return false;
241 
242  // It is safe and potentially profitable to load a vector directly:
243  // inselt undef, load Scalar, 0 --> load VecPtr
245  Value *CastedPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
246  SrcPtr, MinVecTy->getPointerTo(AS));
247  Value *VecLd = Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment);
248  VecLd = Builder.CreateShuffleVector(VecLd, Mask);
249 
250  replaceValue(I, *VecLd);
251  ++NumVecLoad;
252  return true;
253 }
254 
255 /// Determine which, if any, of the inputs should be replaced by a shuffle
256 /// followed by extract from a different index.
257 ExtractElementInst *VectorCombine::getShuffleExtract(
259  unsigned PreferredExtractIndex = InvalidIndex) const {
260  auto *Index0C = dyn_cast<ConstantInt>(Ext0->getIndexOperand());
261  auto *Index1C = dyn_cast<ConstantInt>(Ext1->getIndexOperand());
262  assert(Index0C && Index1C && "Expected constant extract indexes");
263 
264  unsigned Index0 = Index0C->getZExtValue();
265  unsigned Index1 = Index1C->getZExtValue();
266 
267  // If the extract indexes are identical, no shuffle is needed.
268  if (Index0 == Index1)
269  return nullptr;
270 
271  Type *VecTy = Ext0->getVectorOperand()->getType();
272  assert(VecTy == Ext1->getVectorOperand()->getType() && "Need matching types");
273  InstructionCost Cost0 =
274  TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0);
275  InstructionCost Cost1 =
276  TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1);
277 
278  // If both costs are invalid no shuffle is needed
279  if (!Cost0.isValid() && !Cost1.isValid())
280  return nullptr;
281 
282  // We are extracting from 2 different indexes, so one operand must be shuffled
283  // before performing a vector operation and/or extract. The more expensive
284  // extract will be replaced by a shuffle.
285  if (Cost0 > Cost1)
286  return Ext0;
287  if (Cost1 > Cost0)
288  return Ext1;
289 
290  // If the costs are equal and there is a preferred extract index, shuffle the
291  // opposite operand.
292  if (PreferredExtractIndex == Index0)
293  return Ext1;
294  if (PreferredExtractIndex == Index1)
295  return Ext0;
296 
297  // Otherwise, replace the extract with the higher index.
298  return Index0 > Index1 ? Ext0 : Ext1;
299 }
300 
301 /// Compare the relative costs of 2 extracts followed by scalar operation vs.
302 /// vector operation(s) followed by extract. Return true if the existing
303 /// instructions are cheaper than a vector alternative. Otherwise, return false
304 /// and if one of the extracts should be transformed to a shufflevector, set
305 /// \p ConvertToShuffle to that extract instruction.
306 bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
307  ExtractElementInst *Ext1,
308  const Instruction &I,
309  ExtractElementInst *&ConvertToShuffle,
310  unsigned PreferredExtractIndex) {
311  auto *Ext0IndexC = dyn_cast<ConstantInt>(Ext0->getOperand(1));
312  auto *Ext1IndexC = dyn_cast<ConstantInt>(Ext1->getOperand(1));
313  assert(Ext0IndexC && Ext1IndexC && "Expected constant extract indexes");
314 
315  unsigned Opcode = I.getOpcode();
316  Type *ScalarTy = Ext0->getType();
317  auto *VecTy = cast<VectorType>(Ext0->getOperand(0)->getType());
318  InstructionCost ScalarOpCost, VectorOpCost;
319 
320  // Get cost estimates for scalar and vector versions of the operation.
321  bool IsBinOp = Instruction::isBinaryOp(Opcode);
322  if (IsBinOp) {
323  ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy);
324  VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy);
325  } else {
326  assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
327  "Expected a compare");
328  CmpInst::Predicate Pred = cast<CmpInst>(I).getPredicate();
329  ScalarOpCost = TTI.getCmpSelInstrCost(
330  Opcode, ScalarTy, CmpInst::makeCmpResultType(ScalarTy), Pred);
331  VectorOpCost = TTI.getCmpSelInstrCost(
332  Opcode, VecTy, CmpInst::makeCmpResultType(VecTy), Pred);
333  }
334 
335  // Get cost estimates for the extract elements. These costs will factor into
336  // both sequences.
337  unsigned Ext0Index = Ext0IndexC->getZExtValue();
338  unsigned Ext1Index = Ext1IndexC->getZExtValue();
339 
340  InstructionCost Extract0Cost =
341  TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext0Index);
342  InstructionCost Extract1Cost =
343  TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext1Index);
344 
345  // A more expensive extract will always be replaced by a splat shuffle.
346  // For example, if Ext0 is more expensive:
347  // opcode (extelt V0, Ext0), (ext V1, Ext1) -->
348  // extelt (opcode (splat V0, Ext0), V1), Ext1
349  // TODO: Evaluate whether that always results in lowest cost. Alternatively,
350  // check the cost of creating a broadcast shuffle and shuffling both
351  // operands to element 0.
352  InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
353 
354  // Extra uses of the extracts mean that we include those costs in the
355  // vector total because those instructions will not be eliminated.
356  InstructionCost OldCost, NewCost;
357  if (Ext0->getOperand(0) == Ext1->getOperand(0) && Ext0Index == Ext1Index) {
358  // Handle a special case. If the 2 extracts are identical, adjust the
359  // formulas to account for that. The extra use charge allows for either the
360  // CSE'd pattern or an unoptimized form with identical values:
361  // opcode (extelt V, C), (extelt V, C) --> extelt (opcode V, V), C
362  bool HasUseTax = Ext0 == Ext1 ? !Ext0->hasNUses(2)
363  : !Ext0->hasOneUse() || !Ext1->hasOneUse();
364  OldCost = CheapExtractCost + ScalarOpCost;
365  NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
366  } else {
367  // Handle the general case. Each extract is actually a different value:
368  // opcode (extelt V0, C0), (extelt V1, C1) --> extelt (opcode V0, V1), C
369  OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
370  NewCost = VectorOpCost + CheapExtractCost +
371  !Ext0->hasOneUse() * Extract0Cost +
372  !Ext1->hasOneUse() * Extract1Cost;
373  }
374 
375  ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
376  if (ConvertToShuffle) {
377  if (IsBinOp && DisableBinopExtractShuffle)
378  return true;
379 
380  // If we are extracting from 2 different indexes, then one operand must be
381  // shuffled before performing the vector operation. The shuffle mask is
382  // undefined except for 1 lane that is being translated to the remaining
383  // extraction lane. Therefore, it is a splat shuffle. Ex:
384  // ShufMask = { undef, undef, 0, undef }
385  // TODO: The cost model has an option for a "broadcast" shuffle
386  // (splat-from-element-0), but no option for a more general splat.
387  NewCost +=
389  }
390 
391  // Aggressively form a vector op if the cost is equal because the transform
392  // may enable further optimization.
393  // Codegen can reverse this transform (scalarize) if it was not profitable.
394  return OldCost < NewCost;
395 }
396 
397 /// Create a shuffle that translates (shifts) 1 element from the input vector
398 /// to a new element location.
399 static Value *createShiftShuffle(Value *Vec, unsigned OldIndex,
400  unsigned NewIndex, IRBuilder<> &Builder) {
401  // The shuffle mask is undefined except for 1 lane that is being translated
402  // to the new element index. Example for OldIndex == 2 and NewIndex == 0:
403  // ShufMask = { 2, undef, undef, undef }
404  auto *VecTy = cast<FixedVectorType>(Vec->getType());
405  SmallVector<int, 32> ShufMask(VecTy->getNumElements(), UndefMaskElem);
406  ShufMask[NewIndex] = OldIndex;
407  return Builder.CreateShuffleVector(Vec, ShufMask, "shift");
408 }
409 
410 /// Given an extract element instruction with constant index operand, shuffle
411 /// the source vector (shift the scalar element) to a NewIndex for extraction.
412 /// Return null if the input can be constant folded, so that we are not creating
413 /// unnecessary instructions.
415  unsigned NewIndex,
416  IRBuilder<> &Builder) {
417  // If the extract can be constant-folded, this code is unsimplified. Defer
418  // to other passes to handle that.
419  Value *X = ExtElt->getVectorOperand();
420  Value *C = ExtElt->getIndexOperand();
421  assert(isa<ConstantInt>(C) && "Expected a constant index operand");
422  if (isa<Constant>(X))
423  return nullptr;
424 
425  Value *Shuf = createShiftShuffle(X, cast<ConstantInt>(C)->getZExtValue(),
426  NewIndex, Builder);
427  return cast<ExtractElementInst>(Builder.CreateExtractElement(Shuf, NewIndex));
428 }
429 
430 /// Try to reduce extract element costs by converting scalar compares to vector
431 /// compares followed by extract.
432 /// cmp (ext0 V0, C), (ext1 V1, C)
433 void VectorCombine::foldExtExtCmp(ExtractElementInst *Ext0,
434  ExtractElementInst *Ext1, Instruction &I) {
435  assert(isa<CmpInst>(&I) && "Expected a compare");
436  assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
437  cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
438  "Expected matching constant extract indexes");
439 
440  // cmp Pred (extelt V0, C), (extelt V1, C) --> extelt (cmp Pred V0, V1), C
441  ++NumVecCmp;
442  CmpInst::Predicate Pred = cast<CmpInst>(&I)->getPredicate();
443  Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
444  Value *VecCmp = Builder.CreateCmp(Pred, V0, V1);
445  Value *NewExt = Builder.CreateExtractElement(VecCmp, Ext0->getIndexOperand());
446  replaceValue(I, *NewExt);
447 }
448 
449 /// Try to reduce extract element costs by converting scalar binops to vector
450 /// binops followed by extract.
451 /// bo (ext0 V0, C), (ext1 V1, C)
452 void VectorCombine::foldExtExtBinop(ExtractElementInst *Ext0,
453  ExtractElementInst *Ext1, Instruction &I) {
454  assert(isa<BinaryOperator>(&I) && "Expected a binary operator");
455  assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
456  cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
457  "Expected matching constant extract indexes");
458 
459  // bo (extelt V0, C), (extelt V1, C) --> extelt (bo V0, V1), C
460  ++NumVecBO;
461  Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
462  Value *VecBO =
463  Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), V0, V1);
464 
465  // All IR flags are safe to back-propagate because any potential poison
466  // created in unused vector elements is discarded by the extract.
467  if (auto *VecBOInst = dyn_cast<Instruction>(VecBO))
468  VecBOInst->copyIRFlags(&I);
469 
470  Value *NewExt = Builder.CreateExtractElement(VecBO, Ext0->getIndexOperand());
471  replaceValue(I, *NewExt);
472 }
473 
474 /// Match an instruction with extracted vector operands.
475 bool VectorCombine::foldExtractExtract(Instruction &I) {
476  // It is not safe to transform things like div, urem, etc. because we may
477  // create undefined behavior when executing those on unknown vector elements.
479  return false;
480 
481  Instruction *I0, *I1;
483  if (!match(&I, m_Cmp(Pred, m_Instruction(I0), m_Instruction(I1))) &&
485  return false;
486 
487  Value *V0, *V1;
488  uint64_t C0, C1;
489  if (!match(I0, m_ExtractElt(m_Value(V0), m_ConstantInt(C0))) ||
491  V0->getType() != V1->getType())
492  return false;
493 
494  // If the scalar value 'I' is going to be re-inserted into a vector, then try
495  // to create an extract to that same element. The extract/insert can be
496  // reduced to a "select shuffle".
497  // TODO: If we add a larger pattern match that starts from an insert, this
498  // probably becomes unnecessary.
499  auto *Ext0 = cast<ExtractElementInst>(I0);
500  auto *Ext1 = cast<ExtractElementInst>(I1);
501  uint64_t InsertIndex = InvalidIndex;
502  if (I.hasOneUse())
503  match(I.user_back(),
504  m_InsertElt(m_Value(), m_Value(), m_ConstantInt(InsertIndex)));
505 
506  ExtractElementInst *ExtractToChange;
507  if (isExtractExtractCheap(Ext0, Ext1, I, ExtractToChange, InsertIndex))
508  return false;
509 
510  if (ExtractToChange) {
511  unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
512  ExtractElementInst *NewExtract =
513  translateExtract(ExtractToChange, CheapExtractIdx, Builder);
514  if (!NewExtract)
515  return false;
516  if (ExtractToChange == Ext0)
517  Ext0 = NewExtract;
518  else
519  Ext1 = NewExtract;
520  }
521 
522  if (Pred != CmpInst::BAD_ICMP_PREDICATE)
523  foldExtExtCmp(Ext0, Ext1, I);
524  else
525  foldExtExtBinop(Ext0, Ext1, I);
526 
527  Worklist.push(Ext0);
528  Worklist.push(Ext1);
529  return true;
530 }
531 
532 /// If this is a bitcast of a shuffle, try to bitcast the source vector to the
533 /// destination type followed by shuffle. This can enable further transforms by
534 /// moving bitcasts or shuffles together.
535 bool VectorCombine::foldBitcastShuf(Instruction &I) {
536  Value *V;
538  if (!match(&I, m_BitCast(
540  return false;
541 
542  // 1) Do not fold bitcast shuffle for scalable type. First, shuffle cost for
543  // scalable type is unknown; Second, we cannot reason if the narrowed shuffle
544  // mask for scalable type is a splat or not.
545  // 2) Disallow non-vector casts and length-changing shuffles.
546  // TODO: We could allow any shuffle.
547  auto *DestTy = dyn_cast<FixedVectorType>(I.getType());
548  auto *SrcTy = dyn_cast<FixedVectorType>(V->getType());
549  if (!SrcTy || !DestTy || I.getOperand(0)->getType() != SrcTy)
550  return false;
551 
552  unsigned DestNumElts = DestTy->getNumElements();
553  unsigned SrcNumElts = SrcTy->getNumElements();
554  SmallVector<int, 16> NewMask;
555  if (SrcNumElts <= DestNumElts) {
556  // The bitcast is from wide to narrow/equal elements. The shuffle mask can
557  // always be expanded to the equivalent form choosing narrower elements.
558  assert(DestNumElts % SrcNumElts == 0 && "Unexpected shuffle mask");
559  unsigned ScaleFactor = DestNumElts / SrcNumElts;
560  narrowShuffleMaskElts(ScaleFactor, Mask, NewMask);
561  } else {
562  // The bitcast is from narrow elements to wide elements. The shuffle mask
563  // must choose consecutive elements to allow casting first.
564  assert(SrcNumElts % DestNumElts == 0 && "Unexpected shuffle mask");
565  unsigned ScaleFactor = SrcNumElts / DestNumElts;
566  if (!widenShuffleMaskElts(ScaleFactor, Mask, NewMask))
567  return false;
568  }
569 
570  // The new shuffle must not cost more than the old shuffle. The bitcast is
571  // moved ahead of the shuffle, so assume that it has the same cost as before.
574  InstructionCost SrcCost =
576  if (DestCost > SrcCost || !DestCost.isValid())
577  return false;
578 
579  // bitcast (shuf V, MaskC) --> shuf (bitcast V), MaskC'
580  ++NumShufOfBitcast;
581  Value *CastV = Builder.CreateBitCast(V, DestTy);
582  Value *Shuf = Builder.CreateShuffleVector(CastV, NewMask);
583  replaceValue(I, *Shuf);
584  return true;
585 }
586 
587 /// Match a vector binop or compare instruction with at least one inserted
588 /// scalar operand and convert to scalar binop/cmp followed by insertelement.
589 bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
591  Value *Ins0, *Ins1;
592  if (!match(&I, m_BinOp(m_Value(Ins0), m_Value(Ins1))) &&
593  !match(&I, m_Cmp(Pred, m_Value(Ins0), m_Value(Ins1))))
594  return false;
595 
596  // Do not convert the vector condition of a vector select into a scalar
597  // condition. That may cause problems for codegen because of differences in
598  // boolean formats and register-file transfers.
599  // TODO: Can we account for that in the cost model?
600  bool IsCmp = Pred != CmpInst::Predicate::BAD_ICMP_PREDICATE;
601  if (IsCmp)
602  for (User *U : I.users())
603  if (match(U, m_Select(m_Specific(&I), m_Value(), m_Value())))
604  return false;
605 
606  // Match against one or both scalar values being inserted into constant
607  // vectors:
608  // vec_op VecC0, (inselt VecC1, V1, Index)
609  // vec_op (inselt VecC0, V0, Index), VecC1
610  // vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index)
611  // TODO: Deal with mismatched index constants and variable indexes?
612  Constant *VecC0 = nullptr, *VecC1 = nullptr;
613  Value *V0 = nullptr, *V1 = nullptr;
614  uint64_t Index0 = 0, Index1 = 0;
615  if (!match(Ins0, m_InsertElt(m_Constant(VecC0), m_Value(V0),
616  m_ConstantInt(Index0))) &&
617  !match(Ins0, m_Constant(VecC0)))
618  return false;
619  if (!match(Ins1, m_InsertElt(m_Constant(VecC1), m_Value(V1),
620  m_ConstantInt(Index1))) &&
621  !match(Ins1, m_Constant(VecC1)))
622  return false;
623 
624  bool IsConst0 = !V0;
625  bool IsConst1 = !V1;
626  if (IsConst0 && IsConst1)
627  return false;
628  if (!IsConst0 && !IsConst1 && Index0 != Index1)
629  return false;
630 
631  // Bail for single insertion if it is a load.
632  // TODO: Handle this once getVectorInstrCost can cost for load/stores.
633  auto *I0 = dyn_cast_or_null<Instruction>(V0);
634  auto *I1 = dyn_cast_or_null<Instruction>(V1);
635  if ((IsConst0 && I1 && I1->mayReadFromMemory()) ||
636  (IsConst1 && I0 && I0->mayReadFromMemory()))
637  return false;
638 
639  uint64_t Index = IsConst0 ? Index1 : Index0;
640  Type *ScalarTy = IsConst0 ? V1->getType() : V0->getType();
641  Type *VecTy = I.getType();
642  assert(VecTy->isVectorTy() &&
643  (IsConst0 || IsConst1 || V0->getType() == V1->getType()) &&
644  (ScalarTy->isIntegerTy() || ScalarTy->isFloatingPointTy() ||
645  ScalarTy->isPointerTy()) &&
646  "Unexpected types for insert element into binop or cmp");
647 
648  unsigned Opcode = I.getOpcode();
649  InstructionCost ScalarOpCost, VectorOpCost;
650  if (IsCmp) {
651  CmpInst::Predicate Pred = cast<CmpInst>(I).getPredicate();
652  ScalarOpCost = TTI.getCmpSelInstrCost(
653  Opcode, ScalarTy, CmpInst::makeCmpResultType(ScalarTy), Pred);
654  VectorOpCost = TTI.getCmpSelInstrCost(
655  Opcode, VecTy, CmpInst::makeCmpResultType(VecTy), Pred);
656  } else {
657  ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy);
658  VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy);
659  }
660 
661  // Get cost estimate for the insert element. This cost will factor into
662  // both sequences.
663  InstructionCost InsertCost =
664  TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, Index);
665  InstructionCost OldCost =
666  (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
667  InstructionCost NewCost = ScalarOpCost + InsertCost +
668  (IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCost) +
669  (IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCost);
670 
671  // We want to scalarize unless the vector variant actually has lower cost.
672  if (OldCost < NewCost || !NewCost.isValid())
673  return false;
674 
675  // vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index) -->
676  // inselt NewVecC, (scalar_op V0, V1), Index
677  if (IsCmp)
678  ++NumScalarCmp;
679  else
680  ++NumScalarBO;
681 
682  // For constant cases, extract the scalar element, this should constant fold.
683  if (IsConst0)
684  V0 = ConstantExpr::getExtractElement(VecC0, Builder.getInt64(Index));
685  if (IsConst1)
686  V1 = ConstantExpr::getExtractElement(VecC1, Builder.getInt64(Index));
687 
688  Value *Scalar =
689  IsCmp ? Builder.CreateCmp(Pred, V0, V1)
690  : Builder.CreateBinOp((Instruction::BinaryOps)Opcode, V0, V1);
691 
692  Scalar->setName(I.getName() + ".scalar");
693 
694  // All IR flags are safe to back-propagate. There is no potential for extra
695  // poison to be created by the scalar instruction.
696  if (auto *ScalarInst = dyn_cast<Instruction>(Scalar))
697  ScalarInst->copyIRFlags(&I);
698 
699  // Fold the vector constants in the original vectors into a new base vector.
700  Constant *NewVecC = IsCmp ? ConstantExpr::getCompare(Pred, VecC0, VecC1)
701  : ConstantExpr::get(Opcode, VecC0, VecC1);
702  Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, Index);
703  replaceValue(I, *Insert);
704  return true;
705 }
706 
707 /// Try to combine a scalar binop + 2 scalar compares of extracted elements of
708 /// a vector into vector operations followed by extract. Note: The SLP pass
709 /// may miss this pattern because of implementation problems.
710 bool VectorCombine::foldExtractedCmps(Instruction &I) {
711  // We are looking for a scalar binop of booleans.
712  // binop i1 (cmp Pred I0, C0), (cmp Pred I1, C1)
713  if (!I.isBinaryOp() || !I.getType()->isIntegerTy(1))
714  return false;
715 
716  // The compare predicates should match, and each compare should have a
717  // constant operand.
718  // TODO: Relax the one-use constraints.
719  Value *B0 = I.getOperand(0), *B1 = I.getOperand(1);
720  Instruction *I0, *I1;
721  Constant *C0, *C1;
722  CmpInst::Predicate P0, P1;
723  if (!match(B0, m_OneUse(m_Cmp(P0, m_Instruction(I0), m_Constant(C0)))) ||
725  P0 != P1)
726  return false;
727 
728  // The compare operands must be extracts of the same vector with constant
729  // extract indexes.
730  // TODO: Relax the one-use constraints.
731  Value *X;
732  uint64_t Index0, Index1;
733  if (!match(I0, m_OneUse(m_ExtractElt(m_Value(X), m_ConstantInt(Index0)))) ||
735  return false;
736 
737  auto *Ext0 = cast<ExtractElementInst>(I0);
738  auto *Ext1 = cast<ExtractElementInst>(I1);
739  ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1);
740  if (!ConvertToShuf)
741  return false;
742 
743  // The original scalar pattern is:
744  // binop i1 (cmp Pred (ext X, Index0), C0), (cmp Pred (ext X, Index1), C1)
745  CmpInst::Predicate Pred = P0;
746  unsigned CmpOpcode = CmpInst::isFPPredicate(Pred) ? Instruction::FCmp
747  : Instruction::ICmp;
748  auto *VecTy = dyn_cast<FixedVectorType>(X->getType());
749  if (!VecTy)
750  return false;
751 
752  InstructionCost OldCost =
753  TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0);
754  OldCost += TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1);
755  OldCost +=
756  TTI.getCmpSelInstrCost(CmpOpcode, I0->getType(),
757  CmpInst::makeCmpResultType(I0->getType()), Pred) *
758  2;
759  OldCost += TTI.getArithmeticInstrCost(I.getOpcode(), I.getType());
760 
761  // The proposed vector pattern is:
762  // vcmp = cmp Pred X, VecC
763  // ext (binop vNi1 vcmp, (shuffle vcmp, Index1)), Index0
764  int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
765  int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
766  auto *CmpTy = cast<FixedVectorType>(CmpInst::makeCmpResultType(X->getType()));
768  CmpOpcode, X->getType(), CmpInst::makeCmpResultType(X->getType()), Pred);
769  SmallVector<int, 32> ShufMask(VecTy->getNumElements(), UndefMaskElem);
770  ShufMask[CheapIndex] = ExpensiveIndex;
772  ShufMask);
773  NewCost += TTI.getArithmeticInstrCost(I.getOpcode(), CmpTy);
774  NewCost += TTI.getVectorInstrCost(Ext0->getOpcode(), CmpTy, CheapIndex);
775 
776  // Aggressively form vector ops if the cost is equal because the transform
777  // may enable further optimization.
778  // Codegen can reverse this transform (scalarize) if it was not profitable.
779  if (OldCost < NewCost || !NewCost.isValid())
780  return false;
781 
782  // Create a vector constant from the 2 scalar constants.
783  SmallVector<Constant *, 32> CmpC(VecTy->getNumElements(),
784  UndefValue::get(VecTy->getElementType()));
785  CmpC[Index0] = C0;
786  CmpC[Index1] = C1;
787  Value *VCmp = Builder.CreateCmp(Pred, X, ConstantVector::get(CmpC));
788 
789  Value *Shuf = createShiftShuffle(VCmp, ExpensiveIndex, CheapIndex, Builder);
790  Value *VecLogic = Builder.CreateBinOp(cast<BinaryOperator>(I).getOpcode(),
791  VCmp, Shuf);
792  Value *NewExt = Builder.CreateExtractElement(VecLogic, CheapIndex);
793  replaceValue(I, *NewExt);
794  ++NumVecCmpBO;
795  return true;
796 }
797 
798 // Check if memory loc modified between two instrs in the same BB
801  const MemoryLocation &Loc, AAResults &AA) {
802  unsigned NumScanned = 0;
803  return std::any_of(Begin, End, [&](const Instruction &Instr) {
804  return isModSet(AA.getModRefInfo(&Instr, Loc)) ||
805  ++NumScanned > MaxInstrsToScan;
806  });
807 }
808 
809 /// Helper class to indicate whether a vector index can be safely scalarized and
810 /// if a freeze needs to be inserted.
812  enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
813 
814  StatusTy Status;
815  Value *ToFreeze;
816 
817  ScalarizationResult(StatusTy Status, Value *ToFreeze = nullptr)
818  : Status(Status), ToFreeze(ToFreeze) {}
819 
820 public:
823  assert(!ToFreeze && "freeze() not called with ToFreeze being set");
824  }
825 
826  static ScalarizationResult unsafe() { return {StatusTy::Unsafe}; }
827  static ScalarizationResult safe() { return {StatusTy::Safe}; }
829  return {StatusTy::SafeWithFreeze, ToFreeze};
830  }
831 
832  /// Returns true if the index can be scalarize without requiring a freeze.
833  bool isSafe() const { return Status == StatusTy::Safe; }
834  /// Returns true if the index cannot be scalarized.
835  bool isUnsafe() const { return Status == StatusTy::Unsafe; }
836  /// Returns true if the index can be scalarize, but requires inserting a
837  /// freeze.
838  bool isSafeWithFreeze() const { return Status == StatusTy::SafeWithFreeze; }
839 
840  /// Reset the state of Unsafe and clear ToFreze if set.
841  void discard() {
842  ToFreeze = nullptr;
843  Status = StatusTy::Unsafe;
844  }
845 
846  /// Freeze the ToFreeze and update the use in \p User to use it.
848  assert(isSafeWithFreeze() &&
849  "should only be used when freezing is required");
850  assert(is_contained(ToFreeze->users(), &UserI) &&
851  "UserI must be a user of ToFreeze");
853  Builder.SetInsertPoint(cast<Instruction>(&UserI));
854  Value *Frozen =
855  Builder.CreateFreeze(ToFreeze, ToFreeze->getName() + ".frozen");
856  for (Use &U : make_early_inc_range((UserI.operands())))
857  if (U.get() == ToFreeze)
858  U.set(Frozen);
859 
860  ToFreeze = nullptr;
861  }
862 };
863 
864 /// Check if it is legal to scalarize a memory access to \p VecTy at index \p
865 /// Idx. \p Idx must access a valid vector element.
867  Value *Idx, Instruction *CtxI,
868  AssumptionCache &AC,
869  const DominatorTree &DT) {
870  if (auto *C = dyn_cast<ConstantInt>(Idx)) {
871  if (C->getValue().ult(VecTy->getNumElements()))
872  return ScalarizationResult::safe();
874  }
875 
876  unsigned IntWidth = Idx->getType()->getScalarSizeInBits();
877  APInt Zero(IntWidth, 0);
878  APInt MaxElts(IntWidth, VecTy->getNumElements());
879  ConstantRange ValidIndices(Zero, MaxElts);
880  ConstantRange IdxRange(IntWidth, true);
881 
882  if (isGuaranteedNotToBePoison(Idx, &AC)) {
883  if (ValidIndices.contains(computeConstantRange(Idx, /* ForSigned */ false,
884  true, &AC, CtxI, &DT)))
885  return ScalarizationResult::safe();
887  }
888 
889  // If the index may be poison, check if we can insert a freeze before the
890  // range of the index is restricted.
891  Value *IdxBase;
892  ConstantInt *CI;
893  if (match(Idx, m_And(m_Value(IdxBase), m_ConstantInt(CI)))) {
894  IdxRange = IdxRange.binaryAnd(CI->getValue());
895  } else if (match(Idx, m_URem(m_Value(IdxBase), m_ConstantInt(CI)))) {
896  IdxRange = IdxRange.urem(CI->getValue());
897  }
898 
899  if (ValidIndices.contains(IdxRange))
900  return ScalarizationResult::safeWithFreeze(IdxBase);
902 }
903 
904 /// The memory operation on a vector of \p ScalarType had alignment of
905 /// \p VectorAlignment. Compute the maximal, but conservatively correct,
906 /// alignment that will be valid for the memory operation on a single scalar
907 /// element of the same type with index \p Idx.
909  Type *ScalarType, Value *Idx,
910  const DataLayout &DL) {
911  if (auto *C = dyn_cast<ConstantInt>(Idx))
912  return commonAlignment(VectorAlignment,
913  C->getZExtValue() * DL.getTypeStoreSize(ScalarType));
914  return commonAlignment(VectorAlignment, DL.getTypeStoreSize(ScalarType));
915 }
916 
917 // Combine patterns like:
918 // %0 = load <4 x i32>, <4 x i32>* %a
919 // %1 = insertelement <4 x i32> %0, i32 %b, i32 1
920 // store <4 x i32> %1, <4 x i32>* %a
921 // to:
922 // %0 = bitcast <4 x i32>* %a to i32*
923 // %1 = getelementptr inbounds i32, i32* %0, i64 0, i64 1
924 // store i32 %b, i32* %1
925 bool VectorCombine::foldSingleElementStore(Instruction &I) {
926  StoreInst *SI = dyn_cast<StoreInst>(&I);
927  if (!SI || !SI->isSimple() ||
928  !isa<FixedVectorType>(SI->getValueOperand()->getType()))
929  return false;
930 
931  // TODO: Combine more complicated patterns (multiple insert) by referencing
932  // TargetTransformInfo.
934  Value *NewElement;
935  Value *Idx;
936  if (!match(SI->getValueOperand(),
937  m_InsertElt(m_Instruction(Source), m_Value(NewElement),
938  m_Value(Idx))))
939  return false;
940 
941  if (auto *Load = dyn_cast<LoadInst>(Source)) {
942  auto VecTy = cast<FixedVectorType>(SI->getValueOperand()->getType());
943  const DataLayout &DL = I.getModule()->getDataLayout();
944  Value *SrcAddr = Load->getPointerOperand()->stripPointerCasts();
945  // Don't optimize for atomic/volatile load or store. Ensure memory is not
946  // modified between, vector type matches store size, and index is inbounds.
947  if (!Load->isSimple() || Load->getParent() != SI->getParent() ||
948  !DL.typeSizeEqualsStoreSize(Load->getType()) ||
949  SrcAddr != SI->getPointerOperand()->stripPointerCasts())
950  return false;
951 
952  auto ScalarizableIdx = canScalarizeAccess(VecTy, Idx, Load, AC, DT);
953  if (ScalarizableIdx.isUnsafe() ||
954  isMemModifiedBetween(Load->getIterator(), SI->getIterator(),
956  return false;
957 
958  if (ScalarizableIdx.isSafeWithFreeze())
959  ScalarizableIdx.freeze(Builder, *cast<Instruction>(Idx));
960  Value *GEP = Builder.CreateInBoundsGEP(
961  SI->getValueOperand()->getType(), SI->getPointerOperand(),
962  {ConstantInt::get(Idx->getType(), 0), Idx});
963  StoreInst *NSI = Builder.CreateStore(NewElement, GEP);
964  NSI->copyMetadata(*SI);
965  Align ScalarOpAlignment = computeAlignmentAfterScalarization(
966  std::max(SI->getAlign(), Load->getAlign()), NewElement->getType(), Idx,
967  DL);
968  NSI->setAlignment(ScalarOpAlignment);
969  replaceValue(I, *NSI);
971  return true;
972  }
973 
974  return false;
975 }
976 
977 /// Try to scalarize vector loads feeding extractelement instructions.
978 bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
979  Value *Ptr;
980  if (!match(&I, m_Load(m_Value(Ptr))))
981  return false;
982 
983  auto *LI = cast<LoadInst>(&I);
984  const DataLayout &DL = I.getModule()->getDataLayout();
985  if (LI->isVolatile() || !DL.typeSizeEqualsStoreSize(LI->getType()))
986  return false;
987 
988  auto *FixedVT = dyn_cast<FixedVectorType>(LI->getType());
989  if (!FixedVT)
990  return false;
991 
992  InstructionCost OriginalCost =
993  TTI.getMemoryOpCost(Instruction::Load, LI->getType(), LI->getAlign(),
994  LI->getPointerAddressSpace());
995  InstructionCost ScalarizedCost = 0;
996 
997  Instruction *LastCheckedInst = LI;
998  unsigned NumInstChecked = 0;
999  // Check if all users of the load are extracts with no memory modifications
1000  // between the load and the extract. Compute the cost of both the original
1001  // code and the scalarized version.
1002  for (User *U : LI->users()) {
1003  auto *UI = dyn_cast<ExtractElementInst>(U);
1004  if (!UI || UI->getParent() != LI->getParent())
1005  return false;
1006 
1007  if (!isGuaranteedNotToBePoison(UI->getOperand(1), &AC, LI, &DT))
1008  return false;
1009 
1010  // Check if any instruction between the load and the extract may modify
1011  // memory.
1012  if (LastCheckedInst->comesBefore(UI)) {
1013  for (Instruction &I :
1014  make_range(std::next(LI->getIterator()), UI->getIterator())) {
1015  // Bail out if we reached the check limit or the instruction may write
1016  // to memory.
1017  if (NumInstChecked == MaxInstrsToScan || I.mayWriteToMemory())
1018  return false;
1019  NumInstChecked++;
1020  }
1021  LastCheckedInst = UI;
1022  }
1023 
1024  auto ScalarIdx = canScalarizeAccess(FixedVT, UI->getOperand(1), &I, AC, DT);
1025  if (!ScalarIdx.isSafe()) {
1026  // TODO: Freeze index if it is safe to do so.
1027  ScalarIdx.discard();
1028  return false;
1029  }
1030 
1031  auto *Index = dyn_cast<ConstantInt>(UI->getOperand(1));
1032  OriginalCost +=
1033  TTI.getVectorInstrCost(Instruction::ExtractElement, LI->getType(),
1034  Index ? Index->getZExtValue() : -1);
1035  ScalarizedCost +=
1036  TTI.getMemoryOpCost(Instruction::Load, FixedVT->getElementType(),
1037  Align(1), LI->getPointerAddressSpace());
1038  ScalarizedCost += TTI.getAddressComputationCost(FixedVT->getElementType());
1039  }
1040 
1041  if (ScalarizedCost >= OriginalCost)
1042  return false;
1043 
1044  // Replace extracts with narrow scalar loads.
1045  for (User *U : LI->users()) {
1046  auto *EI = cast<ExtractElementInst>(U);
1047  Builder.SetInsertPoint(EI);
1048 
1049  Value *Idx = EI->getOperand(1);
1050  Value *GEP =
1051  Builder.CreateInBoundsGEP(FixedVT, Ptr, {Builder.getInt32(0), Idx});
1052  auto *NewLoad = cast<LoadInst>(Builder.CreateLoad(
1053  FixedVT->getElementType(), GEP, EI->getName() + ".scalar"));
1054 
1055  Align ScalarOpAlignment = computeAlignmentAfterScalarization(
1056  LI->getAlign(), FixedVT->getElementType(), Idx, DL);
1057  NewLoad->setAlignment(ScalarOpAlignment);
1058 
1059  replaceValue(*EI, *NewLoad);
1060  }
1061 
1062  return true;
1063 }
1064 
1065 /// Try to convert "shuffle (binop), (binop)" with a shared binop operand into
1066 /// "binop (shuffle), (shuffle)".
1067 bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
1068  auto *VecTy = dyn_cast<FixedVectorType>(I.getType());
1069  if (!VecTy)
1070  return false;
1071 
1072  BinaryOperator *B0, *B1;
1075  m_Mask(Mask))) ||
1076  B0->getOpcode() != B1->getOpcode() || B0->getType() != VecTy)
1077  return false;
1078 
1079  // Try to replace a binop with a shuffle if the shuffle is not costly.
1080  // The new shuffle will choose from a single, common operand, so it may be
1081  // cheaper than the existing two-operand shuffle.
1082  SmallVector<int> UnaryMask = createUnaryMask(Mask, Mask.size());
1083  Instruction::BinaryOps Opcode = B0->getOpcode();
1084  InstructionCost BinopCost = TTI.getArithmeticInstrCost(Opcode, VecTy);
1085  InstructionCost ShufCost = TTI.getShuffleCost(
1086  TargetTransformInfo::SK_PermuteSingleSrc, VecTy, UnaryMask);
1087  if (ShufCost > BinopCost)
1088  return false;
1089 
1090  // If we have something like "add X, Y" and "add Z, X", swap ops to match.
1091  Value *X = B0->getOperand(0), *Y = B0->getOperand(1);
1092  Value *Z = B1->getOperand(0), *W = B1->getOperand(1);
1093  if (BinaryOperator::isCommutative(Opcode) && X != Z && Y != W)
1094  std::swap(X, Y);
1095 
1096  Value *Shuf0, *Shuf1;
1097  if (X == Z) {
1098  // shuf (bo X, Y), (bo X, W) --> bo (shuf X), (shuf Y, W)
1099  Shuf0 = Builder.CreateShuffleVector(X, UnaryMask);
1100  Shuf1 = Builder.CreateShuffleVector(Y, W, Mask);
1101  } else if (Y == W) {
1102  // shuf (bo X, Y), (bo Z, Y) --> bo (shuf X, Z), (shuf Y)
1103  Shuf0 = Builder.CreateShuffleVector(X, Z, Mask);
1104  Shuf1 = Builder.CreateShuffleVector(Y, UnaryMask);
1105  } else {
1106  return false;
1107  }
1108 
1109  Value *NewBO = Builder.CreateBinOp(Opcode, Shuf0, Shuf1);
1110  // Intersect flags from the old binops.
1111  if (auto *NewInst = dyn_cast<Instruction>(NewBO)) {
1112  NewInst->copyIRFlags(B0);
1113  NewInst->andIRFlags(B1);
1114  }
1115  replaceValue(I, *NewBO);
1116  return true;
1117 }
1118 
1119 /// Given a commutative reduction, the order of the input lanes does not alter
1120 /// the results. We can use this to remove certain shuffles feeding the
1121 /// reduction, removing the need to shuffle at all.
1122 bool VectorCombine::foldShuffleFromReductions(Instruction &I) {
1123  auto *II = dyn_cast<IntrinsicInst>(&I);
1124  if (!II)
1125  return false;
1126  switch (II->getIntrinsicID()) {
1127  case Intrinsic::vector_reduce_add:
1128  case Intrinsic::vector_reduce_mul:
1129  case Intrinsic::vector_reduce_and:
1130  case Intrinsic::vector_reduce_or:
1131  case Intrinsic::vector_reduce_xor:
1132  case Intrinsic::vector_reduce_smin:
1133  case Intrinsic::vector_reduce_smax:
1134  case Intrinsic::vector_reduce_umin:
1135  case Intrinsic::vector_reduce_umax:
1136  break;
1137  default:
1138  return false;
1139  }
1140 
1141  // Find all the inputs when looking through operations that do not alter the
1142  // lane order (binops, for example). Currently we look for a single shuffle,
1143  // and can ignore splat values.
1144  std::queue<Value *> Worklist;
1145  SmallPtrSet<Value *, 4> Visited;
1146  ShuffleVectorInst *Shuffle = nullptr;
1147  if (auto *Op = dyn_cast<Instruction>(I.getOperand(0)))
1148  Worklist.push(Op);
1149 
1150  while (!Worklist.empty()) {
1151  Value *CV = Worklist.front();
1152  Worklist.pop();
1153  if (Visited.contains(CV))
1154  continue;
1155 
1156  // Splats don't change the order, so can be safely ignored.
1157  if (isSplatValue(CV))
1158  continue;
1159 
1160  Visited.insert(CV);
1161 
1162  if (auto *CI = dyn_cast<Instruction>(CV)) {
1163  if (CI->isBinaryOp()) {
1164  for (auto *Op : CI->operand_values())
1165  Worklist.push(Op);
1166  continue;
1167  } else if (auto *SV = dyn_cast<ShuffleVectorInst>(CI)) {
1168  if (Shuffle && Shuffle != SV)
1169  return false;
1170  Shuffle = SV;
1171  continue;
1172  }
1173  }
1174 
1175  // Anything else is currently an unknown node.
1176  return false;
1177  }
1178 
1179  if (!Shuffle)
1180  return false;
1181 
1182  // Check all uses of the binary ops and shuffles are also included in the
1183  // lane-invariant operations (Visited should be the list of lanewise
1184  // instructions, including the shuffle that we found).
1185  for (auto *V : Visited)
1186  for (auto *U : V->users())
1187  if (!Visited.contains(U) && U != &I)
1188  return false;
1189 
1191  dyn_cast<FixedVectorType>(II->getOperand(0)->getType());
1192  if (!VecType)
1193  return false;
1194  FixedVectorType *ShuffleInputType =
1195  dyn_cast<FixedVectorType>(Shuffle->getOperand(0)->getType());
1196  if (!ShuffleInputType)
1197  return false;
1198  int NumInputElts = ShuffleInputType->getNumElements();
1199 
1200  // Find the mask from sorting the lanes into order. This is most likely to
1201  // become a identity or concat mask. Undef elements are pushed to the end.
1202  SmallVector<int> ConcatMask;
1203  Shuffle->getShuffleMask(ConcatMask);
1204  sort(ConcatMask, [](int X, int Y) { return (unsigned)X < (unsigned)Y; });
1205  bool UsesSecondVec =
1206  any_of(ConcatMask, [&](int M) { return M >= NumInputElts; });
1209  Shuffle->getShuffleMask());
1212  ConcatMask);
1213 
1214  LLVM_DEBUG(dbgs() << "Found a reduction feeding from a shuffle: " << *Shuffle
1215  << "\n");
1216  LLVM_DEBUG(dbgs() << " OldCost: " << OldCost << " vs NewCost: " << NewCost
1217  << "\n");
1218  if (NewCost < OldCost) {
1219  Builder.SetInsertPoint(Shuffle);
1220  Value *NewShuffle = Builder.CreateShuffleVector(
1221  Shuffle->getOperand(0), Shuffle->getOperand(1), ConcatMask);
1222  LLVM_DEBUG(dbgs() << "Created new shuffle: " << *NewShuffle << "\n");
1223  replaceValue(*Shuffle, *NewShuffle);
1224  }
1225 
1226  // See if we can re-use foldSelectShuffle, getting it to reduce the size of
1227  // the shuffle into a nicer order, as it can ignore the order of the shuffles.
1228  return foldSelectShuffle(*Shuffle, true);
1229 }
1230 
1231 /// This method looks for groups of shuffles acting on binops, of the form:
1232 /// %x = shuffle ...
1233 /// %y = shuffle ...
1234 /// %a = binop %x, %y
1235 /// %b = binop %x, %y
1236 /// shuffle %a, %b, selectmask
1237 /// We may, especially if the shuffle is wider than legal, be able to convert
1238 /// the shuffle to a form where only parts of a and b need to be computed. On
1239 /// architectures with no obvious "select" shuffle, this can reduce the total
1240 /// number of operations if the target reports them as cheaper.
1241 bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
1242  auto *SVI = dyn_cast<ShuffleVectorInst>(&I);
1243  auto *VT = dyn_cast<FixedVectorType>(I.getType());
1244  if (!SVI || !VT)
1245  return false;
1246  auto *Op0 = dyn_cast<Instruction>(SVI->getOperand(0));
1247  auto *Op1 = dyn_cast<Instruction>(SVI->getOperand(1));
1248  if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
1249  VT != Op0->getType())
1250  return false;
1251  auto *SVI0A = dyn_cast<ShuffleVectorInst>(Op0->getOperand(0));
1252  auto *SVI0B = dyn_cast<ShuffleVectorInst>(Op0->getOperand(1));
1253  auto *SVI1A = dyn_cast<ShuffleVectorInst>(Op1->getOperand(0));
1254  auto *SVI1B = dyn_cast<ShuffleVectorInst>(Op1->getOperand(1));
1255  auto checkSVNonOpUses = [&](Instruction *I) {
1256  if (!I || I->getOperand(0)->getType() != VT)
1257  return true;
1258  return any_of(I->users(), [&](User *U) { return U != Op0 && U != Op1; });
1259  };
1260  if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
1261  checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
1262  return false;
1263 
1264  // Collect all the uses that are shuffles that we can transform together. We
1265  // may not have a single shuffle, but a group that can all be transformed
1266  // together profitably.
1268  auto collectShuffles = [&](Instruction *I) {
1269  for (auto *U : I->users()) {
1270  auto *SV = dyn_cast<ShuffleVectorInst>(U);
1271  if (!SV || SV->getType() != VT)
1272  return false;
1273  if (!llvm::is_contained(Shuffles, SV))
1274  Shuffles.push_back(SV);
1275  }
1276  return true;
1277  };
1278  if (!collectShuffles(Op0) || !collectShuffles(Op1))
1279  return false;
1280  // From a reduction, we need to be processing a single shuffle, otherwise the
1281  // other uses will not be lane-invariant.
1282  if (FromReduction && Shuffles.size() > 1)
1283  return false;
1284 
1285  // For each of the output shuffles, we try to sort all the first vector
1286  // elements to the beginning, followed by the second array elements at the
1287  // end. If the binops are legalized to smaller vectors, this may reduce total
1288  // number of binops. We compute the ReconstructMask mask needed to convert
1289  // back to the original lane order.
1290  SmallVector<int> V1, V2;
1291  SmallVector<SmallVector<int>> ReconstructMasks;
1292  int MaxV1Elt = 0, MaxV2Elt = 0;
1293  unsigned NumElts = VT->getNumElements();
1294  for (ShuffleVectorInst *SVN : Shuffles) {
1296  SVN->getShuffleMask(Mask);
1297 
1298  // Check the operands are the same as the original, or reversed (in which
1299  // case we need to commute the mask).
1300  Value *SVOp0 = SVN->getOperand(0);
1301  Value *SVOp1 = SVN->getOperand(1);
1302  if (SVOp0 == Op1 && SVOp1 == Op0) {
1303  std::swap(SVOp0, SVOp1);
1305  }
1306  if (SVOp0 != Op0 || SVOp1 != Op1)
1307  return false;
1308 
1309  // Calculate the reconstruction mask for this shuffle, as the mask needed to
1310  // take the packed values from Op0/Op1 and reconstructing to the original
1311  // order.
1312  SmallVector<int> ReconstructMask;
1313  for (unsigned I = 0; I < Mask.size(); I++) {
1314  if (Mask[I] < 0) {
1315  ReconstructMask.push_back(-1);
1316  } else if (Mask[I] < static_cast<int>(NumElts)) {
1317  MaxV1Elt = std::max(MaxV1Elt, Mask[I]);
1318  auto It = find(V1, Mask[I]);
1319  if (It != V1.end())
1320  ReconstructMask.push_back(It - V1.begin());
1321  else {
1322  ReconstructMask.push_back(V1.size());
1323  V1.push_back(Mask[I]);
1324  }
1325  } else {
1326  MaxV2Elt = std::max<int>(MaxV2Elt, Mask[I] - NumElts);
1327  auto It = find(V2, Mask[I] - NumElts);
1328  if (It != V2.end())
1329  ReconstructMask.push_back(NumElts + It - V2.begin());
1330  else {
1331  ReconstructMask.push_back(NumElts + V2.size());
1332  V2.push_back(Mask[I] - NumElts);
1333  }
1334  }
1335  }
1336 
1337  // For reductions, we know that the lane ordering out doesn't alter the
1338  // result. In-order can help simplify the shuffle away.
1339  if (FromReduction)
1340  sort(ReconstructMask);
1341  ReconstructMasks.push_back(ReconstructMask);
1342  }
1343 
1344  // If the Maximum element used from V1 and V2 are not larger than the new
1345  // vectors, the vectors are already packes and performing the optimization
1346  // again will likely not help any further. This also prevents us from getting
1347  // stuck in a cycle in case the costs do not also rule it out.
1348  if (V1.empty() || V2.empty() ||
1349  (MaxV1Elt == static_cast<int>(V1.size()) - 1 &&
1350  MaxV2Elt == static_cast<int>(V2.size()) - 1))
1351  return false;
1352 
1353  // Calculate the masks needed for the new input shuffles, which get padded
1354  // with undef
1355  SmallVector<int> V1A, V1B, V2A, V2B;
1356  for (unsigned I = 0; I < V1.size(); I++) {
1357  V1A.push_back(SVI0A->getMaskValue(V1[I]));
1358  V1B.push_back(SVI0B->getMaskValue(V1[I]));
1359  }
1360  for (unsigned I = 0; I < V2.size(); I++) {
1361  V2A.push_back(SVI1A->getMaskValue(V2[I]));
1362  V2B.push_back(SVI1B->getMaskValue(V2[I]));
1363  }
1364  while (V1A.size() < NumElts) {
1365  V1A.push_back(UndefMaskElem);
1366  V1B.push_back(UndefMaskElem);
1367  }
1368  while (V2A.size() < NumElts) {
1369  V2A.push_back(UndefMaskElem);
1370  V2B.push_back(UndefMaskElem);
1371  }
1372 
1373  auto AddShuffleCost = [&](InstructionCost C, ShuffleVectorInst *SV) {
1374  return C +
1375  TTI.getShuffleCost(TTI::SK_PermuteTwoSrc, VT, SV->getShuffleMask());
1376  };
1377  auto AddShuffleMaskCost = [&](InstructionCost C, ArrayRef<int> Mask) {
1379  };
1380 
1381  // Get the costs of the shuffles + binops before and after with the new
1382  // shuffle masks.
1383  InstructionCost CostBefore =
1384  TTI.getArithmeticInstrCost(Op0->getOpcode(), VT) +
1385  TTI.getArithmeticInstrCost(Op1->getOpcode(), VT);
1386  CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
1387  InstructionCost(0), AddShuffleCost);
1388  // This set helps us only cost each unique shuffle once.
1390  {SVI0A, SVI0B, SVI1A, SVI1B});
1391  CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
1392  InstructionCost(0), AddShuffleCost);
1393 
1394  // The new binops will be unused for lanes past the used shuffle lengths.
1395  // These types attempt to get the correct cost for that from the target.
1396  FixedVectorType *Op0SmallVT =
1397  FixedVectorType::get(VT->getScalarType(), V1.size());
1398  FixedVectorType *Op1SmallVT =
1399  FixedVectorType::get(VT->getScalarType(), V2.size());
1400  InstructionCost CostAfter =
1401  TTI.getArithmeticInstrCost(Op0->getOpcode(), Op0SmallVT) +
1402  TTI.getArithmeticInstrCost(Op1->getOpcode(), Op1SmallVT);
1403  CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
1404  InstructionCost(0), AddShuffleMaskCost);
1405  std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
1406  CostAfter +=
1407  std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
1408  InstructionCost(0), AddShuffleMaskCost);
1409 
1410  if (CostBefore <= CostAfter)
1411  return false;
1412 
1413  // The cost model has passed, create the new instructions.
1414  Builder.SetInsertPoint(SVI0A);
1415  Value *NSV0A = Builder.CreateShuffleVector(SVI0A->getOperand(0),
1416  SVI0A->getOperand(1), V1A);
1417  Builder.SetInsertPoint(SVI0B);
1418  Value *NSV0B = Builder.CreateShuffleVector(SVI0B->getOperand(0),
1419  SVI0B->getOperand(1), V1B);
1420  Builder.SetInsertPoint(SVI1A);
1421  Value *NSV1A = Builder.CreateShuffleVector(SVI1A->getOperand(0),
1422  SVI1A->getOperand(1), V2A);
1423  Builder.SetInsertPoint(SVI1B);
1424  Value *NSV1B = Builder.CreateShuffleVector(SVI1B->getOperand(0),
1425  SVI1B->getOperand(1), V2B);
1426  Builder.SetInsertPoint(Op0);
1427  Value *NOp0 = Builder.CreateBinOp((Instruction::BinaryOps)Op0->getOpcode(),
1428  NSV0A, NSV0B);
1429  if (auto *I = dyn_cast<Instruction>(NOp0))
1430  I->copyIRFlags(Op0, true);
1431  Builder.SetInsertPoint(Op1);
1432  Value *NOp1 = Builder.CreateBinOp((Instruction::BinaryOps)Op1->getOpcode(),
1433  NSV1A, NSV1B);
1434  if (auto *I = dyn_cast<Instruction>(NOp1))
1435  I->copyIRFlags(Op1, true);
1436 
1437  for (int S = 0, E = ReconstructMasks.size(); S != E; S++) {
1438  Builder.SetInsertPoint(Shuffles[S]);
1439  Value *NSV = Builder.CreateShuffleVector(NOp0, NOp1, ReconstructMasks[S]);
1440  replaceValue(*Shuffles[S], *NSV);
1441  }
1442 
1443  Worklist.pushValue(NSV0A);
1444  Worklist.pushValue(NSV0B);
1445  Worklist.pushValue(NSV1A);
1446  Worklist.pushValue(NSV1B);
1447  for (auto *S : Shuffles)
1448  Worklist.add(S);
1449  return true;
1450 }
1451 
1452 /// This is the entry point for all transforms. Pass manager differences are
1453 /// handled in the callers of this function.
1454 bool VectorCombine::run() {
1456  return false;
1457 
1458  // Don't attempt vectorization if the target does not support vectors.
1459  if (!TTI.getNumberOfRegisters(TTI.getRegisterClassForType(/*Vector*/ true)))
1460  return false;
1461 
1462  bool MadeChange = false;
1463  auto FoldInst = [this, &MadeChange](Instruction &I) {
1464  Builder.SetInsertPoint(&I);
1465  if (!ScalarizationOnly) {
1466  MadeChange |= vectorizeLoadInsert(I);
1467  MadeChange |= foldExtractExtract(I);
1468  MadeChange |= foldBitcastShuf(I);
1469  MadeChange |= foldExtractedCmps(I);
1470  MadeChange |= foldShuffleOfBinops(I);
1471  MadeChange |= foldShuffleFromReductions(I);
1472  MadeChange |= foldSelectShuffle(I);
1473  }
1474  MadeChange |= scalarizeBinopOrCmp(I);
1475  MadeChange |= scalarizeLoadExtract(I);
1476  MadeChange |= foldSingleElementStore(I);
1477  };
1478  for (BasicBlock &BB : F) {
1479  // Ignore unreachable basic blocks.
1480  if (!DT.isReachableFromEntry(&BB))
1481  continue;
1482  // Use early increment range so that we can erase instructions in loop.
1483  for (Instruction &I : make_early_inc_range(BB)) {
1484  if (I.isDebugOrPseudoInst())
1485  continue;
1486  FoldInst(I);
1487  }
1488  }
1489 
1490  while (!Worklist.isEmpty()) {
1491  Instruction *I = Worklist.removeOne();
1492  if (!I)
1493  continue;
1494 
1496  eraseInstruction(*I);
1497  continue;
1498  }
1499 
1500  FoldInst(*I);
1501  }
1502 
1503  return MadeChange;
1504 }
1505 
1506 // Pass manager boilerplate below here.
1507 
1508 namespace {
1509 class VectorCombineLegacyPass : public FunctionPass {
1510 public:
1511  static char ID;
1512  VectorCombineLegacyPass() : FunctionPass(ID) {
1514  }
1515 
1516  void getAnalysisUsage(AnalysisUsage &AU) const override {
1521  AU.setPreservesCFG();
1527  }
1528 
1529  bool runOnFunction(Function &F) override {
1530  if (skipFunction(F))
1531  return false;
1532  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1533  auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1534  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1535  auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1536  VectorCombine Combiner(F, TTI, DT, AA, AC, false);
1537  return Combiner.run();
1538  }
1539 };
1540 } // namespace
1541 
1543 INITIALIZE_PASS_BEGIN(VectorCombineLegacyPass, "vector-combine",
1544  "Optimize scalar/vector ops", false,
1545  false)
1548 INITIALIZE_PASS_END(VectorCombineLegacyPass, "vector-combine",
1549  "Optimize scalar/vector ops", false, false)
1551  return new VectorCombineLegacyPass();
1552 }
1553 
1556  auto &AC = FAM.getResult<AssumptionAnalysis>(F);
1560  VectorCombine Combiner(F, TTI, DT, AA, AC, ScalarizationOnly);
1561  if (!Combiner.run())
1562  return PreservedAnalyses::all();
1563  PreservedAnalyses PA;
1564  PA.preserveSet<CFGAnalyses>();
1565  return PA;
1566 }
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
AssumptionCache.h
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1303
llvm::mustSuppressSpeculation
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
Definition: ValueTracking.cpp:4676
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2479
B1
llvm::MemoryLocation::get
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition: MemoryLocation.cpp:35
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::ConstantExpr::getExtractElement
static Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2585
llvm::Value::getPointerAlignment
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:915
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::TargetTransformInfo::getMinVectorRegisterBitWidth
unsigned getMinVectorRegisterBitWidth() const
Definition: TargetTransformInfo.cpp:629
ScalarizationResult::~ScalarizationResult
~ScalarizationResult()
Definition: VectorCombine.cpp:822
llvm::PatternMatch::m_Mask
Definition: PatternMatch.h:1508
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
llvm::ConstantRange::binaryAnd
ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
Definition: ConstantRange.cpp:1397
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:719
llvm::User::operands
op_range operands()
Definition: User.h:242
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::isSafeToSpeculativelyExecute
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Definition: ValueTracking.cpp:4688
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:218
llvm::ShuffleVectorInst::getShuffleMask
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
Definition: Instructions.cpp:2115
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:780
llvm::TargetTransformInfo::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:848
Loads.h
llvm::ExtractElementInst
This instruction extracts a single (scalar) element from a VectorType value.
Definition: Instructions.h:1859
llvm::Function
Definition: Function.h:60
Pass.h
llvm::TargetTransformInfo::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:876
ScalarizationResult::isUnsafe
bool isUnsafe() const
Returns true if the index cannot be scalarized.
Definition: VectorCombine.cpp:835
canScalarizeAccess
static ScalarizationResult canScalarizeAccess(FixedVectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
Definition: VectorCombine.cpp:866
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::PatternMatch::m_InsertElt
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Definition: PatternMatch.h:1478
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
llvm::TargetTransformInfo::getAddressComputationCost
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
Definition: TargetTransformInfo.cpp:938
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Statistic.h
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:241
llvm::VectorCombinePass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
Definition: VectorCombine.cpp:1554
llvm::PatternMatch::m_Load
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
Definition: PatternMatch.h:1557
llvm::CmpInst::makeCmpResultType
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:1044
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::IRBuilder<>
eraseInstruction
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
Definition: LICM.cpp:1461
ValueTracking.h
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
GlobalsModRef.h
VectorCombine.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::narrowShuffleMaskElts
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
Definition: VectorUtils.cpp:432
llvm::createVectorCombinePass
Pass * createVectorCombinePass()
Definition: VectorCombine.cpp:1550
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::StoreInst::setAlignment
void setAlignment(Align Align)
Definition: Instructions.h:345
llvm::Instruction::comesBefore
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
Definition: Instruction.cpp:110
llvm::createUnaryMask
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
Definition: VectorUtils.cpp:947
llvm::PatternMatch::m_BinOp
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
llvm::SmallPtrSet< Value *, 4 >
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::Instruction::copyMetadata
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Definition: Instruction.cpp:841
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(VectorCombineLegacyPass, "vector-combine", "Optimize scalar/vector ops", false, false) INITIALIZE_PASS_END(VectorCombineLegacyPass
InvalidIndex
static const unsigned InvalidIndex
Definition: VectorCombine.cpp:60
llvm::PatternMatch::m_BitCast
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
Definition: PatternMatch.h:1587
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:163
BasicAliasAnalysis.h
MaxInstrsToScan
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
llvm::FixedVectorType
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:525
llvm::TargetTransformInfo::SK_PermuteSingleSrc
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
Definition: TargetTransformInfo.h:893
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::NVPTX::PTXLdStInstCode::VecType
VecType
Definition: NVPTX.h:121
I1
@ I1
Definition: DXILOpLowering.cpp:37
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::UndefMaskElem
constexpr int UndefMaskElem
Definition: Instructions.h:1983
llvm::PatternMatch::m_OneUse
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
CommandLine.h
llvm::FixedVectorType::getNumElements
unsigned getNumElements() const
Definition: DerivedTypes.h:568
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:157
llvm::TargetTransformInfo::SK_PermuteTwoSrc
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
Definition: TargetTransformInfo.h:891
llvm::isSplatValue
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Definition: VectorUtils.cpp:386
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1456
llvm::CmpInst::isFPPredicate
bool isFPPredicate() const
Definition: InstrTypes.h:826
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::AAResults
Definition: AliasAnalysis.h:511
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
createShiftShuffle
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilder<> &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
Definition: VectorCombine.cpp:399
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
InstructionWorklist.h
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
getOpcode
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:190
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
llvm::InstructionWorklist::remove
void remove(Instruction *I)
Remove I from the worklist if it exists.
Definition: InstructionWorklist.h:85
llvm::PatternMatch::m_Instruction
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:710
false
Definition: StackSlotColoring.cpp:141
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition: InstrTypes.h:392
llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:147
llvm::Instruction
Definition: Instruction.h:42
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:189
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1777
DisableBinopExtractShuffle
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
llvm::PatternMatch::m_URem
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1069
Align
uint64_t Align
Definition: ELFObjHandler.cpp:81
PatternMatch.h
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:684
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::NVPTX::PTXLdStInstCode::Scalar
@ Scalar
Definition: NVPTX.h:122
llvm::isModSet
LLVM_NODISCARD bool isModSet(const ModRefInfo MRI)
Definition: AliasAnalysis.h:196
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
ScalarizationResult::isSafeWithFreeze
bool isSafeWithFreeze() const
Returns true if the index can be scalarize, but requires inserting a freeze.
Definition: VectorCombine.cpp:838
llvm::Instruction::isCommutative
bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
Definition: Instruction.cpp:770
llvm::dxil::PointerTypeAnalysis::run
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Definition: PointerTypeAnalysis.cpp:101
llvm::PatternMatch::m_ExtractElt
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
Definition: PatternMatch.h:1486
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:191
llvm::APInt::getOneBitSet
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:222
ScalarizationResult::freeze
void freeze(IRBuilder<> &Builder, Instruction &UserI)
Freeze the ToFreeze and update the use in User to use it.
Definition: VectorCombine.cpp:847
VectorUtils.h
llvm::SPIRV::Decoration::Alignment
@ Alignment
llvm::cl::opt< bool >
llvm::ConstantExpr::getCompare
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:2431
llvm::ShuffleVectorInst::commuteShuffleMask
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
Definition: Instructions.h:2365
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:297
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:413
uint64_t
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2535
llvm::Combiner
Definition: Combiner.h:26
llvm::InstructionWorklist::push
void push(Instruction *I)
Push the instruction onto the worklist stack.
Definition: InstructionWorklist.h:58
llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1637
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::BasicAAWrapperPass
Legacy wrapper pass to provide the BasicAAResult object.
Definition: BasicAliasAnalysis.h:173
llvm::TargetTransformInfo::getRegisterClassForType
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
Definition: TargetTransformInfo.cpp:615
DisableVectorCombine
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:618
llvm::PatternMatch::m_And
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1087
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1682
llvm::TargetTransformInfo::getScalarizationOverhead
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract) const
Estimate the overhead of scalarizing an instruction.
Definition: TargetTransformInfo.cpp:510
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
translateExtract
static ExtractElementInst * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilder<> &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
Definition: VectorCombine.cpp:414
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:335
llvm::CmpInst::BAD_ICMP_PREDICATE
@ BAD_ICMP_PREDICATE
Definition: InstrTypes.h:752
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:163
llvm::Instruction::isBinaryOp
bool isBinaryOp() const
Definition: Instruction.h:162
llvm::PatternMatch::m_Constant
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:144
Status
Definition: SIModeRegister.cpp:29
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:99
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:202
llvm::ArrayRef< int >
llvm::isInstructionTriviallyDead
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:395
llvm::BinaryOperator
Definition: InstrTypes.h:188
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1624
llvm::InstructionWorklist::pushValue
void pushValue(Value *V)
Definition: InstructionWorklist.h:68
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
llvm::PatternMatch::m_Undef
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:136
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::ExtractElementInst::getVectorOperand
Value * getVectorOperand()
Definition: Instructions.h:1888
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
llvm::PatternMatch::m_Shuffle
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
Definition: PatternMatch.h:1545
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:529
llvm::ms_demangle::IntrinsicFunctionKind::New
@ New
ScalarizationResult::isSafe
bool isSafe() const
Returns true if the index can be scalarize without requiring a freeze.
Definition: VectorCombine.cpp:833
llvm::initializeVectorCombineLegacyPassPass
void initializeVectorCombineLegacyPassPass(PassRegistry &)
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::computeConstantRange
ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
Definition: ValueTracking.cpp:7239
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::TargetTransformInfo::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned ClassID) const
Definition: TargetTransformInfo.cpp:611
llvm::ConstantVector::get
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1397
llvm::InstructionCost::isValid
bool isValid() const
Definition: InstructionCost.h:79
llvm::Instruction::mayReadFromMemory
bool mayReadFromMemory() const
Return true if this instruction may read memory.
Definition: Instruction.cpp:576
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
llvm::Value::stripAndAccumulateInBoundsConstantOffsets
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition: Value.h:727
llvm::TargetTransformInfo::getShuffleCost
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask=None, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args=None) const
Definition: TargetTransformInfo.cpp:767
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
ScalarizationResult::discard
void discard()
Reset the state of Unsafe and clear ToFreze if set.
Definition: VectorCombine.cpp:841
llvm::widenShuffleMaskElts
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Definition: VectorUtils.cpp:453
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:123
llvm::commonAlignment
Align commonAlignment(Align A, Align B)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:213
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:972
ScalarizationResult::unsafe
static ScalarizationResult unsafe()
Definition: VectorCombine.cpp:826
ScalarizationResult
Helper class to indicate whether a vector index can be safely scalarized and if a freeze needs to be ...
Definition: VectorCombine.cpp:811
ops
vector Optimize scalar vector ops
Definition: VectorCombine.cpp:1549
get
Should compile to something r4 addze r3 instead we get
Definition: README.txt:24
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:345
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
Function.h
llvm::Value::hasNUses
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition: Value.cpp:145
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1562
ScalarizationResult::safeWithFreeze
static ScalarizationResult safeWithFreeze(Value *ToFreeze)
Definition: VectorCombine.cpp:828
ScalarizationResult::safe
static ScalarizationResult safe()
Definition: VectorCombine.cpp:827
llvm::ARCCC::Z
@ Z
Definition: ARCInfo.h:41
llvm::InstructionWorklist
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
Definition: InstructionWorklist.h:25
llvm::PatternMatch::m_ZeroInt
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:518
llvm::ConstantRange::contains
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
Definition: ConstantRange.cpp:449
llvm::TargetTransformInfo::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index=-1) const
Definition: TargetTransformInfo.cpp:859
llvm::ConstantRange
This class represents a range of values.
Definition: ConstantRange.h:47
llvm::ConstantRange::urem
ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
Definition: ConstantRange.cpp:1320
AA
llvm::Instruction::BinaryOps
BinaryOps
Definition: Instruction.h:786
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
llvm::ShuffleVectorInst
This instruction constructs a fixed permutation of two input vectors.
Definition: Instructions.h:1995
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:188
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:766
Dominators.h
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1351
llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition: GlobalsModRef.h:148
TargetTransformInfo.h
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::ExtractElementInst::getIndexOperand
Value * getIndexOperand()
Definition: Instructions.h:1889
llvm::InstructionWorklist::pushUsersToWorkList
void pushUsersToWorkList(Instruction &I)
When an instruction is simplified, add all users of the instruction to the work lists because they mi...
Definition: InstructionWorklist.h:106
Vectorize.h
llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:97
llvm::isSafeToLoadUnconditionally
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, APInt &Size, const DataLayout &DL, Instruction *ScanFrom=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
Definition: Loads.cpp:328
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::isGuaranteedNotToBePoison
bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Definition: ValueTracking.cpp:5411
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::TargetTransformInfo::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
Definition: TargetTransformInfo.cpp:755
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:405
llvm::SmallPtrSetImpl::contains
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:389
combine
vector combine
Definition: VectorCombine.cpp:1548
llvm::PatternMatch::m_Cmp
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:89
InitializePasses.h
computeAlignmentAfterScalarization
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
Definition: VectorCombine.cpp:908
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:668
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:210
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1236
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:164
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
isMemModifiedBetween
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
Definition: VectorCombine.cpp:799