LLVM  14.0.0git
VectorCombine.cpp
Go to the documentation of this file.
1 //===------- VectorCombine.cpp - Optimize partial vector operations -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass optimizes scalar/vector interactions using target cost models. The
10 // transforms implemented here may not fit in traditional loop-based or SLP
11 // vectorization passes.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/Statistic.h"
20 #include "llvm/Analysis/Loads.h"
24 #include "llvm/IR/Dominators.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/IR/PatternMatch.h"
28 #include "llvm/InitializePasses.h"
29 #include "llvm/Pass.h"
33 
34 #define DEBUG_TYPE "vector-combine"
36 
37 using namespace llvm;
38 using namespace llvm::PatternMatch;
39 
40 STATISTIC(NumVecLoad, "Number of vector loads formed");
41 STATISTIC(NumVecCmp, "Number of vector compares formed");
42 STATISTIC(NumVecBO, "Number of vector binops formed");
43 STATISTIC(NumVecCmpBO, "Number of vector compare + binop formed");
44 STATISTIC(NumShufOfBitcast, "Number of shuffles moved after bitcast");
45 STATISTIC(NumScalarBO, "Number of scalar binops formed");
46 STATISTIC(NumScalarCmp, "Number of scalar compares formed");
47 
49  "disable-vector-combine", cl::init(false), cl::Hidden,
50  cl::desc("Disable all vector combine transforms"));
51 
53  "disable-binop-extract-shuffle", cl::init(false), cl::Hidden,
54  cl::desc("Disable binop extract to shuffle transforms"));
55 
57  "vector-combine-max-scan-instrs", cl::init(30), cl::Hidden,
58  cl::desc("Max number of instructions to scan for vector combining."));
59 
61 
62 namespace {
63 class VectorCombine {
64 public:
65  VectorCombine(Function &F, const TargetTransformInfo &TTI,
66  const DominatorTree &DT, AAResults &AA, AssumptionCache &AC,
67  bool ScalarizationOnly)
68  : F(F), Builder(F.getContext()), TTI(TTI), DT(DT), AA(AA), AC(AC),
69  ScalarizationOnly(ScalarizationOnly) {}
70 
71  bool run();
72 
73 private:
74  Function &F;
76  const TargetTransformInfo &TTI;
77  const DominatorTree &DT;
78  AAResults &AA;
79  AssumptionCache ∾
80 
81  /// If true only perform scalarization combines and do not introduce new
82  /// vector operations.
83  bool ScalarizationOnly;
84 
85  InstructionWorklist Worklist;
86 
87  bool vectorizeLoadInsert(Instruction &I);
88  ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
89  ExtractElementInst *Ext1,
90  unsigned PreferredExtractIndex) const;
91  bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
92  const Instruction &I,
93  ExtractElementInst *&ConvertToShuffle,
94  unsigned PreferredExtractIndex);
95  void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
96  Instruction &I);
97  void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
98  Instruction &I);
99  bool foldExtractExtract(Instruction &I);
100  bool foldBitcastShuf(Instruction &I);
101  bool scalarizeBinopOrCmp(Instruction &I);
102  bool foldExtractedCmps(Instruction &I);
103  bool foldSingleElementStore(Instruction &I);
104  bool scalarizeLoadExtract(Instruction &I);
105  bool foldShuffleOfBinops(Instruction &I);
106 
107  void replaceValue(Value &Old, Value &New) {
108  Old.replaceAllUsesWith(&New);
109  New.takeName(&Old);
110  if (auto *NewI = dyn_cast<Instruction>(&New)) {
111  Worklist.pushUsersToWorkList(*NewI);
112  Worklist.pushValue(NewI);
113  }
114  Worklist.pushValue(&Old);
115  }
116 
118  for (Value *Op : I.operands())
119  Worklist.pushValue(Op);
120  Worklist.remove(&I);
121  I.eraseFromParent();
122  }
123 };
124 } // namespace
125 
126 bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
127  // Match insert into fixed vector of scalar value.
128  // TODO: Handle non-zero insert index.
129  auto *Ty = dyn_cast<FixedVectorType>(I.getType());
130  Value *Scalar;
131  if (!Ty || !match(&I, m_InsertElt(m_Undef(), m_Value(Scalar), m_ZeroInt())) ||
132  !Scalar->hasOneUse())
133  return false;
134 
135  // Optionally match an extract from another vector.
136  Value *X;
137  bool HasExtract = match(Scalar, m_ExtractElt(m_Value(X), m_ZeroInt()));
138  if (!HasExtract)
139  X = Scalar;
140 
141  // Match source value as load of scalar or vector.
142  // Do not vectorize scalar load (widening) if atomic/volatile or under
143  // asan/hwasan/memtag/tsan. The widened load may load data from dirty regions
144  // or create data races non-existent in the source.
145  auto *Load = dyn_cast<LoadInst>(X);
146  if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
147  Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
149  return false;
150 
151  const DataLayout &DL = I.getModule()->getDataLayout();
152  Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts();
153  assert(isa<PointerType>(SrcPtr->getType()) && "Expected a pointer type");
154 
155  // If original AS != Load's AS, we can't bitcast the original pointer and have
156  // to use Load's operand instead. Ideally we would want to strip pointer casts
157  // without changing AS, but there's no API to do that ATM.
158  unsigned AS = Load->getPointerAddressSpace();
159  if (AS != SrcPtr->getType()->getPointerAddressSpace())
160  SrcPtr = Load->getPointerOperand();
161 
162  // We are potentially transforming byte-sized (8-bit) memory accesses, so make
163  // sure we have all of our type-based constraints in place for this target.
164  Type *ScalarTy = Scalar->getType();
165  uint64_t ScalarSize = ScalarTy->getPrimitiveSizeInBits();
166  unsigned MinVectorSize = TTI.getMinVectorRegisterBitWidth();
167  if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
168  ScalarSize % 8 != 0)
169  return false;
170 
171  // Check safety of replacing the scalar load with a larger vector load.
172  // We use minimal alignment (maximum flexibility) because we only care about
173  // the dereferenceable region. When calculating cost and creating a new op,
174  // we may use a larger value based on alignment attributes.
175  unsigned MinVecNumElts = MinVectorSize / ScalarSize;
176  auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts, false);
177  unsigned OffsetEltIndex = 0;
178  Align Alignment = Load->getAlign();
179  if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT)) {
180  // It is not safe to load directly from the pointer, but we can still peek
181  // through gep offsets and check if it safe to load from a base address with
182  // updated alignment. If it is, we can shuffle the element(s) into place
183  // after loading.
184  unsigned OffsetBitWidth = DL.getIndexTypeSizeInBits(SrcPtr->getType());
185  APInt Offset(OffsetBitWidth, 0);
187 
188  // We want to shuffle the result down from a high element of a vector, so
189  // the offset must be positive.
190  if (Offset.isNegative())
191  return false;
192 
193  // The offset must be a multiple of the scalar element to shuffle cleanly
194  // in the element's size.
195  uint64_t ScalarSizeInBytes = ScalarSize / 8;
196  if (Offset.urem(ScalarSizeInBytes) != 0)
197  return false;
198 
199  // If we load MinVecNumElts, will our target element still be loaded?
200  OffsetEltIndex = Offset.udiv(ScalarSizeInBytes).getZExtValue();
201  if (OffsetEltIndex >= MinVecNumElts)
202  return false;
203 
204  if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT))
205  return false;
206 
207  // Update alignment with offset value. Note that the offset could be negated
208  // to more accurately represent "(new) SrcPtr - Offset = (old) SrcPtr", but
209  // negation does not change the result of the alignment calculation.
210  Alignment = commonAlignment(Alignment, Offset.getZExtValue());
211  }
212 
213  // Original pattern: insertelt undef, load [free casts of] PtrOp, 0
214  // Use the greater of the alignment on the load or its source pointer.
215  Alignment = std::max(SrcPtr->getPointerAlignment(DL), Alignment);
216  Type *LoadTy = Load->getType();
217  InstructionCost OldCost =
218  TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS);
219  APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0);
220  OldCost += TTI.getScalarizationOverhead(MinVecTy, DemandedElts,
221  /* Insert */ true, HasExtract);
222 
223  // New pattern: load VecPtr
224  InstructionCost NewCost =
225  TTI.getMemoryOpCost(Instruction::Load, MinVecTy, Alignment, AS);
226  // Optionally, we are shuffling the loaded vector element(s) into place.
227  // For the mask set everything but element 0 to undef to prevent poison from
228  // propagating from the extra loaded memory. This will also optionally
229  // shrink/grow the vector from the loaded size to the output size.
230  // We assume this operation has no cost in codegen if there was no offset.
231  // Note that we could use freeze to avoid poison problems, but then we might
232  // still need a shuffle to change the vector size.
233  unsigned OutputNumElts = Ty->getNumElements();
234  SmallVector<int, 16> Mask(OutputNumElts, UndefMaskElem);
235  assert(OffsetEltIndex < MinVecNumElts && "Address offset too big");
236  Mask[0] = OffsetEltIndex;
237  if (OffsetEltIndex)
238  NewCost += TTI.getShuffleCost(TTI::SK_PermuteSingleSrc, MinVecTy, Mask);
239 
240  // We can aggressively convert to the vector form because the backend can
241  // invert this transform if it does not result in a performance win.
242  if (OldCost < NewCost || !NewCost.isValid())
243  return false;
244 
245  // It is safe and potentially profitable to load a vector directly:
246  // inselt undef, load Scalar, 0 --> load VecPtr
248  Value *CastedPtr = Builder.CreateBitCast(SrcPtr, MinVecTy->getPointerTo(AS));
249  Value *VecLd = Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment);
250  VecLd = Builder.CreateShuffleVector(VecLd, Mask);
251 
252  replaceValue(I, *VecLd);
253  ++NumVecLoad;
254  return true;
255 }
256 
257 /// Determine which, if any, of the inputs should be replaced by a shuffle
258 /// followed by extract from a different index.
259 ExtractElementInst *VectorCombine::getShuffleExtract(
261  unsigned PreferredExtractIndex = InvalidIndex) const {
262  assert(isa<ConstantInt>(Ext0->getIndexOperand()) &&
263  isa<ConstantInt>(Ext1->getIndexOperand()) &&
264  "Expected constant extract indexes");
265 
266  unsigned Index0 = cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue();
267  unsigned Index1 = cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue();
268 
269  // If the extract indexes are identical, no shuffle is needed.
270  if (Index0 == Index1)
271  return nullptr;
272 
273  Type *VecTy = Ext0->getVectorOperand()->getType();
274  assert(VecTy == Ext1->getVectorOperand()->getType() && "Need matching types");
275  InstructionCost Cost0 =
276  TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0);
277  InstructionCost Cost1 =
278  TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1);
279 
280  // If both costs are invalid no shuffle is needed
281  if (!Cost0.isValid() && !Cost1.isValid())
282  return nullptr;
283 
284  // We are extracting from 2 different indexes, so one operand must be shuffled
285  // before performing a vector operation and/or extract. The more expensive
286  // extract will be replaced by a shuffle.
287  if (Cost0 > Cost1)
288  return Ext0;
289  if (Cost1 > Cost0)
290  return Ext1;
291 
292  // If the costs are equal and there is a preferred extract index, shuffle the
293  // opposite operand.
294  if (PreferredExtractIndex == Index0)
295  return Ext1;
296  if (PreferredExtractIndex == Index1)
297  return Ext0;
298 
299  // Otherwise, replace the extract with the higher index.
300  return Index0 > Index1 ? Ext0 : Ext1;
301 }
302 
303 /// Compare the relative costs of 2 extracts followed by scalar operation vs.
304 /// vector operation(s) followed by extract. Return true if the existing
305 /// instructions are cheaper than a vector alternative. Otherwise, return false
306 /// and if one of the extracts should be transformed to a shufflevector, set
307 /// \p ConvertToShuffle to that extract instruction.
308 bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
309  ExtractElementInst *Ext1,
310  const Instruction &I,
311  ExtractElementInst *&ConvertToShuffle,
312  unsigned PreferredExtractIndex) {
313  assert(isa<ConstantInt>(Ext0->getOperand(1)) &&
314  isa<ConstantInt>(Ext1->getOperand(1)) &&
315  "Expected constant extract indexes");
316  unsigned Opcode = I.getOpcode();
317  Type *ScalarTy = Ext0->getType();
318  auto *VecTy = cast<VectorType>(Ext0->getOperand(0)->getType());
319  InstructionCost ScalarOpCost, VectorOpCost;
320 
321  // Get cost estimates for scalar and vector versions of the operation.
322  bool IsBinOp = Instruction::isBinaryOp(Opcode);
323  if (IsBinOp) {
324  ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy);
325  VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy);
326  } else {
327  assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
328  "Expected a compare");
329  CmpInst::Predicate Pred = cast<CmpInst>(I).getPredicate();
330  ScalarOpCost = TTI.getCmpSelInstrCost(
331  Opcode, ScalarTy, CmpInst::makeCmpResultType(ScalarTy), Pred);
332  VectorOpCost = TTI.getCmpSelInstrCost(
333  Opcode, VecTy, CmpInst::makeCmpResultType(VecTy), Pred);
334  }
335 
336  // Get cost estimates for the extract elements. These costs will factor into
337  // both sequences.
338  unsigned Ext0Index = cast<ConstantInt>(Ext0->getOperand(1))->getZExtValue();
339  unsigned Ext1Index = cast<ConstantInt>(Ext1->getOperand(1))->getZExtValue();
340 
341  InstructionCost Extract0Cost =
342  TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext0Index);
343  InstructionCost Extract1Cost =
344  TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext1Index);
345 
346  // A more expensive extract will always be replaced by a splat shuffle.
347  // For example, if Ext0 is more expensive:
348  // opcode (extelt V0, Ext0), (ext V1, Ext1) -->
349  // extelt (opcode (splat V0, Ext0), V1), Ext1
350  // TODO: Evaluate whether that always results in lowest cost. Alternatively,
351  // check the cost of creating a broadcast shuffle and shuffling both
352  // operands to element 0.
353  InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
354 
355  // Extra uses of the extracts mean that we include those costs in the
356  // vector total because those instructions will not be eliminated.
357  InstructionCost OldCost, NewCost;
358  if (Ext0->getOperand(0) == Ext1->getOperand(0) && Ext0Index == Ext1Index) {
359  // Handle a special case. If the 2 extracts are identical, adjust the
360  // formulas to account for that. The extra use charge allows for either the
361  // CSE'd pattern or an unoptimized form with identical values:
362  // opcode (extelt V, C), (extelt V, C) --> extelt (opcode V, V), C
363  bool HasUseTax = Ext0 == Ext1 ? !Ext0->hasNUses(2)
364  : !Ext0->hasOneUse() || !Ext1->hasOneUse();
365  OldCost = CheapExtractCost + ScalarOpCost;
366  NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
367  } else {
368  // Handle the general case. Each extract is actually a different value:
369  // opcode (extelt V0, C0), (extelt V1, C1) --> extelt (opcode V0, V1), C
370  OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
371  NewCost = VectorOpCost + CheapExtractCost +
372  !Ext0->hasOneUse() * Extract0Cost +
373  !Ext1->hasOneUse() * Extract1Cost;
374  }
375 
376  ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
377  if (ConvertToShuffle) {
378  if (IsBinOp && DisableBinopExtractShuffle)
379  return true;
380 
381  // If we are extracting from 2 different indexes, then one operand must be
382  // shuffled before performing the vector operation. The shuffle mask is
383  // undefined except for 1 lane that is being translated to the remaining
384  // extraction lane. Therefore, it is a splat shuffle. Ex:
385  // ShufMask = { undef, undef, 0, undef }
386  // TODO: The cost model has an option for a "broadcast" shuffle
387  // (splat-from-element-0), but no option for a more general splat.
388  NewCost +=
390  }
391 
392  // Aggressively form a vector op if the cost is equal because the transform
393  // may enable further optimization.
394  // Codegen can reverse this transform (scalarize) if it was not profitable.
395  return OldCost < NewCost;
396 }
397 
398 /// Create a shuffle that translates (shifts) 1 element from the input vector
399 /// to a new element location.
400 static Value *createShiftShuffle(Value *Vec, unsigned OldIndex,
401  unsigned NewIndex, IRBuilder<> &Builder) {
402  // The shuffle mask is undefined except for 1 lane that is being translated
403  // to the new element index. Example for OldIndex == 2 and NewIndex == 0:
404  // ShufMask = { 2, undef, undef, undef }
405  auto *VecTy = cast<FixedVectorType>(Vec->getType());
406  SmallVector<int, 32> ShufMask(VecTy->getNumElements(), UndefMaskElem);
407  ShufMask[NewIndex] = OldIndex;
408  return Builder.CreateShuffleVector(Vec, ShufMask, "shift");
409 }
410 
411 /// Given an extract element instruction with constant index operand, shuffle
412 /// the source vector (shift the scalar element) to a NewIndex for extraction.
413 /// Return null if the input can be constant folded, so that we are not creating
414 /// unnecessary instructions.
416  unsigned NewIndex,
417  IRBuilder<> &Builder) {
418  // If the extract can be constant-folded, this code is unsimplified. Defer
419  // to other passes to handle that.
420  Value *X = ExtElt->getVectorOperand();
421  Value *C = ExtElt->getIndexOperand();
422  assert(isa<ConstantInt>(C) && "Expected a constant index operand");
423  if (isa<Constant>(X))
424  return nullptr;
425 
426  Value *Shuf = createShiftShuffle(X, cast<ConstantInt>(C)->getZExtValue(),
427  NewIndex, Builder);
428  return cast<ExtractElementInst>(Builder.CreateExtractElement(Shuf, NewIndex));
429 }
430 
431 /// Try to reduce extract element costs by converting scalar compares to vector
432 /// compares followed by extract.
433 /// cmp (ext0 V0, C), (ext1 V1, C)
434 void VectorCombine::foldExtExtCmp(ExtractElementInst *Ext0,
435  ExtractElementInst *Ext1, Instruction &I) {
436  assert(isa<CmpInst>(&I) && "Expected a compare");
437  assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
438  cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
439  "Expected matching constant extract indexes");
440 
441  // cmp Pred (extelt V0, C), (extelt V1, C) --> extelt (cmp Pred V0, V1), C
442  ++NumVecCmp;
443  CmpInst::Predicate Pred = cast<CmpInst>(&I)->getPredicate();
444  Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
445  Value *VecCmp = Builder.CreateCmp(Pred, V0, V1);
446  Value *NewExt = Builder.CreateExtractElement(VecCmp, Ext0->getIndexOperand());
447  replaceValue(I, *NewExt);
448 }
449 
450 /// Try to reduce extract element costs by converting scalar binops to vector
451 /// binops followed by extract.
452 /// bo (ext0 V0, C), (ext1 V1, C)
453 void VectorCombine::foldExtExtBinop(ExtractElementInst *Ext0,
454  ExtractElementInst *Ext1, Instruction &I) {
455  assert(isa<BinaryOperator>(&I) && "Expected a binary operator");
456  assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
457  cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
458  "Expected matching constant extract indexes");
459 
460  // bo (extelt V0, C), (extelt V1, C) --> extelt (bo V0, V1), C
461  ++NumVecBO;
462  Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
463  Value *VecBO =
464  Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), V0, V1);
465 
466  // All IR flags are safe to back-propagate because any potential poison
467  // created in unused vector elements is discarded by the extract.
468  if (auto *VecBOInst = dyn_cast<Instruction>(VecBO))
469  VecBOInst->copyIRFlags(&I);
470 
471  Value *NewExt = Builder.CreateExtractElement(VecBO, Ext0->getIndexOperand());
472  replaceValue(I, *NewExt);
473 }
474 
475 /// Match an instruction with extracted vector operands.
476 bool VectorCombine::foldExtractExtract(Instruction &I) {
477  // It is not safe to transform things like div, urem, etc. because we may
478  // create undefined behavior when executing those on unknown vector elements.
480  return false;
481 
482  Instruction *I0, *I1;
484  if (!match(&I, m_Cmp(Pred, m_Instruction(I0), m_Instruction(I1))) &&
485  !match(&I, m_BinOp(m_Instruction(I0), m_Instruction(I1))))
486  return false;
487 
488  Value *V0, *V1;
489  uint64_t C0, C1;
490  if (!match(I0, m_ExtractElt(m_Value(V0), m_ConstantInt(C0))) ||
491  !match(I1, m_ExtractElt(m_Value(V1), m_ConstantInt(C1))) ||
492  V0->getType() != V1->getType())
493  return false;
494 
495  // If the scalar value 'I' is going to be re-inserted into a vector, then try
496  // to create an extract to that same element. The extract/insert can be
497  // reduced to a "select shuffle".
498  // TODO: If we add a larger pattern match that starts from an insert, this
499  // probably becomes unnecessary.
500  auto *Ext0 = cast<ExtractElementInst>(I0);
501  auto *Ext1 = cast<ExtractElementInst>(I1);
502  uint64_t InsertIndex = InvalidIndex;
503  if (I.hasOneUse())
504  match(I.user_back(),
505  m_InsertElt(m_Value(), m_Value(), m_ConstantInt(InsertIndex)));
506 
507  ExtractElementInst *ExtractToChange;
508  if (isExtractExtractCheap(Ext0, Ext1, I, ExtractToChange, InsertIndex))
509  return false;
510 
511  if (ExtractToChange) {
512  unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
513  ExtractElementInst *NewExtract =
514  translateExtract(ExtractToChange, CheapExtractIdx, Builder);
515  if (!NewExtract)
516  return false;
517  if (ExtractToChange == Ext0)
518  Ext0 = NewExtract;
519  else
520  Ext1 = NewExtract;
521  }
522 
523  if (Pred != CmpInst::BAD_ICMP_PREDICATE)
524  foldExtExtCmp(Ext0, Ext1, I);
525  else
526  foldExtExtBinop(Ext0, Ext1, I);
527 
528  Worklist.push(Ext0);
529  Worklist.push(Ext1);
530  return true;
531 }
532 
533 /// If this is a bitcast of a shuffle, try to bitcast the source vector to the
534 /// destination type followed by shuffle. This can enable further transforms by
535 /// moving bitcasts or shuffles together.
536 bool VectorCombine::foldBitcastShuf(Instruction &I) {
537  Value *V;
539  if (!match(&I, m_BitCast(
541  return false;
542 
543  // 1) Do not fold bitcast shuffle for scalable type. First, shuffle cost for
544  // scalable type is unknown; Second, we cannot reason if the narrowed shuffle
545  // mask for scalable type is a splat or not.
546  // 2) Disallow non-vector casts and length-changing shuffles.
547  // TODO: We could allow any shuffle.
548  auto *DestTy = dyn_cast<FixedVectorType>(I.getType());
549  auto *SrcTy = dyn_cast<FixedVectorType>(V->getType());
550  if (!SrcTy || !DestTy || I.getOperand(0)->getType() != SrcTy)
551  return false;
552 
553  unsigned DestNumElts = DestTy->getNumElements();
554  unsigned SrcNumElts = SrcTy->getNumElements();
555  SmallVector<int, 16> NewMask;
556  if (SrcNumElts <= DestNumElts) {
557  // The bitcast is from wide to narrow/equal elements. The shuffle mask can
558  // always be expanded to the equivalent form choosing narrower elements.
559  assert(DestNumElts % SrcNumElts == 0 && "Unexpected shuffle mask");
560  unsigned ScaleFactor = DestNumElts / SrcNumElts;
561  narrowShuffleMaskElts(ScaleFactor, Mask, NewMask);
562  } else {
563  // The bitcast is from narrow elements to wide elements. The shuffle mask
564  // must choose consecutive elements to allow casting first.
565  assert(SrcNumElts % DestNumElts == 0 && "Unexpected shuffle mask");
566  unsigned ScaleFactor = SrcNumElts / DestNumElts;
567  if (!widenShuffleMaskElts(ScaleFactor, Mask, NewMask))
568  return false;
569  }
570 
571  // The new shuffle must not cost more than the old shuffle. The bitcast is
572  // moved ahead of the shuffle, so assume that it has the same cost as before.
575  InstructionCost SrcCost =
577  if (DestCost > SrcCost || !DestCost.isValid())
578  return false;
579 
580  // bitcast (shuf V, MaskC) --> shuf (bitcast V), MaskC'
581  ++NumShufOfBitcast;
582  Value *CastV = Builder.CreateBitCast(V, DestTy);
583  Value *Shuf = Builder.CreateShuffleVector(CastV, NewMask);
584  replaceValue(I, *Shuf);
585  return true;
586 }
587 
588 /// Match a vector binop or compare instruction with at least one inserted
589 /// scalar operand and convert to scalar binop/cmp followed by insertelement.
590 bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
592  Value *Ins0, *Ins1;
593  if (!match(&I, m_BinOp(m_Value(Ins0), m_Value(Ins1))) &&
594  !match(&I, m_Cmp(Pred, m_Value(Ins0), m_Value(Ins1))))
595  return false;
596 
597  // Do not convert the vector condition of a vector select into a scalar
598  // condition. That may cause problems for codegen because of differences in
599  // boolean formats and register-file transfers.
600  // TODO: Can we account for that in the cost model?
601  bool IsCmp = Pred != CmpInst::Predicate::BAD_ICMP_PREDICATE;
602  if (IsCmp)
603  for (User *U : I.users())
604  if (match(U, m_Select(m_Specific(&I), m_Value(), m_Value())))
605  return false;
606 
607  // Match against one or both scalar values being inserted into constant
608  // vectors:
609  // vec_op VecC0, (inselt VecC1, V1, Index)
610  // vec_op (inselt VecC0, V0, Index), VecC1
611  // vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index)
612  // TODO: Deal with mismatched index constants and variable indexes?
613  Constant *VecC0 = nullptr, *VecC1 = nullptr;
614  Value *V0 = nullptr, *V1 = nullptr;
615  uint64_t Index0 = 0, Index1 = 0;
616  if (!match(Ins0, m_InsertElt(m_Constant(VecC0), m_Value(V0),
617  m_ConstantInt(Index0))) &&
618  !match(Ins0, m_Constant(VecC0)))
619  return false;
620  if (!match(Ins1, m_InsertElt(m_Constant(VecC1), m_Value(V1),
621  m_ConstantInt(Index1))) &&
622  !match(Ins1, m_Constant(VecC1)))
623  return false;
624 
625  bool IsConst0 = !V0;
626  bool IsConst1 = !V1;
627  if (IsConst0 && IsConst1)
628  return false;
629  if (!IsConst0 && !IsConst1 && Index0 != Index1)
630  return false;
631 
632  // Bail for single insertion if it is a load.
633  // TODO: Handle this once getVectorInstrCost can cost for load/stores.
634  auto *I0 = dyn_cast_or_null<Instruction>(V0);
635  auto *I1 = dyn_cast_or_null<Instruction>(V1);
636  if ((IsConst0 && I1 && I1->mayReadFromMemory()) ||
637  (IsConst1 && I0 && I0->mayReadFromMemory()))
638  return false;
639 
640  uint64_t Index = IsConst0 ? Index1 : Index0;
641  Type *ScalarTy = IsConst0 ? V1->getType() : V0->getType();
642  Type *VecTy = I.getType();
643  assert(VecTy->isVectorTy() &&
644  (IsConst0 || IsConst1 || V0->getType() == V1->getType()) &&
645  (ScalarTy->isIntegerTy() || ScalarTy->isFloatingPointTy() ||
646  ScalarTy->isPointerTy()) &&
647  "Unexpected types for insert element into binop or cmp");
648 
649  unsigned Opcode = I.getOpcode();
650  InstructionCost ScalarOpCost, VectorOpCost;
651  if (IsCmp) {
652  CmpInst::Predicate Pred = cast<CmpInst>(I).getPredicate();
653  ScalarOpCost = TTI.getCmpSelInstrCost(
654  Opcode, ScalarTy, CmpInst::makeCmpResultType(ScalarTy), Pred);
655  VectorOpCost = TTI.getCmpSelInstrCost(
656  Opcode, VecTy, CmpInst::makeCmpResultType(VecTy), Pred);
657  } else {
658  ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy);
659  VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy);
660  }
661 
662  // Get cost estimate for the insert element. This cost will factor into
663  // both sequences.
664  InstructionCost InsertCost =
665  TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, Index);
666  InstructionCost OldCost =
667  (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
668  InstructionCost NewCost = ScalarOpCost + InsertCost +
669  (IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCost) +
670  (IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCost);
671 
672  // We want to scalarize unless the vector variant actually has lower cost.
673  if (OldCost < NewCost || !NewCost.isValid())
674  return false;
675 
676  // vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index) -->
677  // inselt NewVecC, (scalar_op V0, V1), Index
678  if (IsCmp)
679  ++NumScalarCmp;
680  else
681  ++NumScalarBO;
682 
683  // For constant cases, extract the scalar element, this should constant fold.
684  if (IsConst0)
685  V0 = ConstantExpr::getExtractElement(VecC0, Builder.getInt64(Index));
686  if (IsConst1)
687  V1 = ConstantExpr::getExtractElement(VecC1, Builder.getInt64(Index));
688 
689  Value *Scalar =
690  IsCmp ? Builder.CreateCmp(Pred, V0, V1)
691  : Builder.CreateBinOp((Instruction::BinaryOps)Opcode, V0, V1);
692 
693  Scalar->setName(I.getName() + ".scalar");
694 
695  // All IR flags are safe to back-propagate. There is no potential for extra
696  // poison to be created by the scalar instruction.
697  if (auto *ScalarInst = dyn_cast<Instruction>(Scalar))
698  ScalarInst->copyIRFlags(&I);
699 
700  // Fold the vector constants in the original vectors into a new base vector.
701  Constant *NewVecC = IsCmp ? ConstantExpr::getCompare(Pred, VecC0, VecC1)
702  : ConstantExpr::get(Opcode, VecC0, VecC1);
703  Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, Index);
704  replaceValue(I, *Insert);
705  return true;
706 }
707 
708 /// Try to combine a scalar binop + 2 scalar compares of extracted elements of
709 /// a vector into vector operations followed by extract. Note: The SLP pass
710 /// may miss this pattern because of implementation problems.
711 bool VectorCombine::foldExtractedCmps(Instruction &I) {
712  // We are looking for a scalar binop of booleans.
713  // binop i1 (cmp Pred I0, C0), (cmp Pred I1, C1)
714  if (!I.isBinaryOp() || !I.getType()->isIntegerTy(1))
715  return false;
716 
717  // The compare predicates should match, and each compare should have a
718  // constant operand.
719  // TODO: Relax the one-use constraints.
720  Value *B0 = I.getOperand(0), *B1 = I.getOperand(1);
721  Instruction *I0, *I1;
722  Constant *C0, *C1;
723  CmpInst::Predicate P0, P1;
724  if (!match(B0, m_OneUse(m_Cmp(P0, m_Instruction(I0), m_Constant(C0)))) ||
725  !match(B1, m_OneUse(m_Cmp(P1, m_Instruction(I1), m_Constant(C1)))) ||
726  P0 != P1)
727  return false;
728 
729  // The compare operands must be extracts of the same vector with constant
730  // extract indexes.
731  // TODO: Relax the one-use constraints.
732  Value *X;
733  uint64_t Index0, Index1;
734  if (!match(I0, m_OneUse(m_ExtractElt(m_Value(X), m_ConstantInt(Index0)))) ||
736  return false;
737 
738  auto *Ext0 = cast<ExtractElementInst>(I0);
739  auto *Ext1 = cast<ExtractElementInst>(I1);
740  ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1);
741  if (!ConvertToShuf)
742  return false;
743 
744  // The original scalar pattern is:
745  // binop i1 (cmp Pred (ext X, Index0), C0), (cmp Pred (ext X, Index1), C1)
746  CmpInst::Predicate Pred = P0;
747  unsigned CmpOpcode = CmpInst::isFPPredicate(Pred) ? Instruction::FCmp
748  : Instruction::ICmp;
749  auto *VecTy = dyn_cast<FixedVectorType>(X->getType());
750  if (!VecTy)
751  return false;
752 
753  InstructionCost OldCost =
754  TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0);
755  OldCost += TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1);
756  OldCost +=
757  TTI.getCmpSelInstrCost(CmpOpcode, I0->getType(),
758  CmpInst::makeCmpResultType(I0->getType()), Pred) *
759  2;
760  OldCost += TTI.getArithmeticInstrCost(I.getOpcode(), I.getType());
761 
762  // The proposed vector pattern is:
763  // vcmp = cmp Pred X, VecC
764  // ext (binop vNi1 vcmp, (shuffle vcmp, Index1)), Index0
765  int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
766  int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
767  auto *CmpTy = cast<FixedVectorType>(CmpInst::makeCmpResultType(X->getType()));
769  CmpOpcode, X->getType(), CmpInst::makeCmpResultType(X->getType()), Pred);
770  SmallVector<int, 32> ShufMask(VecTy->getNumElements(), UndefMaskElem);
771  ShufMask[CheapIndex] = ExpensiveIndex;
773  ShufMask);
774  NewCost += TTI.getArithmeticInstrCost(I.getOpcode(), CmpTy);
775  NewCost += TTI.getVectorInstrCost(Ext0->getOpcode(), CmpTy, CheapIndex);
776 
777  // Aggressively form vector ops if the cost is equal because the transform
778  // may enable further optimization.
779  // Codegen can reverse this transform (scalarize) if it was not profitable.
780  if (OldCost < NewCost || !NewCost.isValid())
781  return false;
782 
783  // Create a vector constant from the 2 scalar constants.
784  SmallVector<Constant *, 32> CmpC(VecTy->getNumElements(),
785  UndefValue::get(VecTy->getElementType()));
786  CmpC[Index0] = C0;
787  CmpC[Index1] = C1;
788  Value *VCmp = Builder.CreateCmp(Pred, X, ConstantVector::get(CmpC));
789 
790  Value *Shuf = createShiftShuffle(VCmp, ExpensiveIndex, CheapIndex, Builder);
791  Value *VecLogic = Builder.CreateBinOp(cast<BinaryOperator>(I).getOpcode(),
792  VCmp, Shuf);
793  Value *NewExt = Builder.CreateExtractElement(VecLogic, CheapIndex);
794  replaceValue(I, *NewExt);
795  ++NumVecCmpBO;
796  return true;
797 }
798 
799 // Check if memory loc modified between two instrs in the same BB
802  const MemoryLocation &Loc, AAResults &AA) {
803  unsigned NumScanned = 0;
804  return std::any_of(Begin, End, [&](const Instruction &Instr) {
805  return isModSet(AA.getModRefInfo(&Instr, Loc)) ||
806  ++NumScanned > MaxInstrsToScan;
807  });
808 }
809 
810 /// Helper class to indicate whether a vector index can be safely scalarized and
811 /// if a freeze needs to be inserted.
813  enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
814 
815  StatusTy Status;
816  Value *ToFreeze;
817 
818  ScalarizationResult(StatusTy Status, Value *ToFreeze = nullptr)
819  : Status(Status), ToFreeze(ToFreeze) {}
820 
821 public:
824  assert(!ToFreeze && "freeze() not called with ToFreeze being set");
825  }
826 
827  static ScalarizationResult unsafe() { return {StatusTy::Unsafe}; }
828  static ScalarizationResult safe() { return {StatusTy::Safe}; }
830  return {StatusTy::SafeWithFreeze, ToFreeze};
831  }
832 
833  /// Returns true if the index can be scalarize without requiring a freeze.
834  bool isSafe() const { return Status == StatusTy::Safe; }
835  /// Returns true if the index cannot be scalarized.
836  bool isUnsafe() const { return Status == StatusTy::Unsafe; }
837  /// Returns true if the index can be scalarize, but requires inserting a
838  /// freeze.
839  bool isSafeWithFreeze() const { return Status == StatusTy::SafeWithFreeze; }
840 
841  /// Reset the state of Unsafe and clear ToFreze if set.
842  void discard() {
843  ToFreeze = nullptr;
844  Status = StatusTy::Unsafe;
845  }
846 
847  /// Freeze the ToFreeze and update the use in \p User to use it.
849  assert(isSafeWithFreeze() &&
850  "should only be used when freezing is required");
851  assert(is_contained(ToFreeze->users(), &UserI) &&
852  "UserI must be a user of ToFreeze");
854  Builder.SetInsertPoint(cast<Instruction>(&UserI));
855  Value *Frozen =
856  Builder.CreateFreeze(ToFreeze, ToFreeze->getName() + ".frozen");
857  for (Use &U : make_early_inc_range((UserI.operands())))
858  if (U.get() == ToFreeze)
859  U.set(Frozen);
860 
861  ToFreeze = nullptr;
862  }
863 };
864 
865 /// Check if it is legal to scalarize a memory access to \p VecTy at index \p
866 /// Idx. \p Idx must access a valid vector element.
868  Value *Idx, Instruction *CtxI,
869  AssumptionCache &AC,
870  const DominatorTree &DT) {
871  if (auto *C = dyn_cast<ConstantInt>(Idx)) {
872  if (C->getValue().ult(VecTy->getNumElements()))
873  return ScalarizationResult::safe();
875  }
876 
877  unsigned IntWidth = Idx->getType()->getScalarSizeInBits();
878  APInt Zero(IntWidth, 0);
879  APInt MaxElts(IntWidth, VecTy->getNumElements());
880  ConstantRange ValidIndices(Zero, MaxElts);
881  ConstantRange IdxRange(IntWidth, true);
882 
883  if (isGuaranteedNotToBePoison(Idx, &AC)) {
884  if (ValidIndices.contains(computeConstantRange(Idx, /* ForSigned */ false,
885  true, &AC, CtxI, &DT)))
886  return ScalarizationResult::safe();
888  }
889 
890  // If the index may be poison, check if we can insert a freeze before the
891  // range of the index is restricted.
892  Value *IdxBase;
893  ConstantInt *CI;
894  if (match(Idx, m_And(m_Value(IdxBase), m_ConstantInt(CI)))) {
895  IdxRange = IdxRange.binaryAnd(CI->getValue());
896  } else if (match(Idx, m_URem(m_Value(IdxBase), m_ConstantInt(CI)))) {
897  IdxRange = IdxRange.urem(CI->getValue());
898  }
899 
900  if (ValidIndices.contains(IdxRange))
901  return ScalarizationResult::safeWithFreeze(IdxBase);
903 }
904 
905 /// The memory operation on a vector of \p ScalarType had alignment of
906 /// \p VectorAlignment. Compute the maximal, but conservatively correct,
907 /// alignment that will be valid for the memory operation on a single scalar
908 /// element of the same type with index \p Idx.
910  Type *ScalarType, Value *Idx,
911  const DataLayout &DL) {
912  if (auto *C = dyn_cast<ConstantInt>(Idx))
913  return commonAlignment(VectorAlignment,
914  C->getZExtValue() * DL.getTypeStoreSize(ScalarType));
915  return commonAlignment(VectorAlignment, DL.getTypeStoreSize(ScalarType));
916 }
917 
918 // Combine patterns like:
919 // %0 = load <4 x i32>, <4 x i32>* %a
920 // %1 = insertelement <4 x i32> %0, i32 %b, i32 1
921 // store <4 x i32> %1, <4 x i32>* %a
922 // to:
923 // %0 = bitcast <4 x i32>* %a to i32*
924 // %1 = getelementptr inbounds i32, i32* %0, i64 0, i64 1
925 // store i32 %b, i32* %1
926 bool VectorCombine::foldSingleElementStore(Instruction &I) {
927  StoreInst *SI = dyn_cast<StoreInst>(&I);
928  if (!SI || !SI->isSimple() ||
929  !isa<FixedVectorType>(SI->getValueOperand()->getType()))
930  return false;
931 
932  // TODO: Combine more complicated patterns (multiple insert) by referencing
933  // TargetTransformInfo.
935  Value *NewElement;
936  Value *Idx;
937  if (!match(SI->getValueOperand(),
938  m_InsertElt(m_Instruction(Source), m_Value(NewElement),
939  m_Value(Idx))))
940  return false;
941 
942  if (auto *Load = dyn_cast<LoadInst>(Source)) {
943  auto VecTy = cast<FixedVectorType>(SI->getValueOperand()->getType());
944  const DataLayout &DL = I.getModule()->getDataLayout();
945  Value *SrcAddr = Load->getPointerOperand()->stripPointerCasts();
946  // Don't optimize for atomic/volatile load or store. Ensure memory is not
947  // modified between, vector type matches store size, and index is inbounds.
948  if (!Load->isSimple() || Load->getParent() != SI->getParent() ||
949  !DL.typeSizeEqualsStoreSize(Load->getType()) ||
950  SrcAddr != SI->getPointerOperand()->stripPointerCasts())
951  return false;
952 
953  auto ScalarizableIdx = canScalarizeAccess(VecTy, Idx, Load, AC, DT);
954  if (ScalarizableIdx.isUnsafe() ||
955  isMemModifiedBetween(Load->getIterator(), SI->getIterator(),
956  MemoryLocation::get(SI), AA))
957  return false;
958 
959  if (ScalarizableIdx.isSafeWithFreeze())
960  ScalarizableIdx.freeze(Builder, *cast<Instruction>(Idx));
961  Value *GEP = Builder.CreateInBoundsGEP(
962  SI->getValueOperand()->getType(), SI->getPointerOperand(),
963  {ConstantInt::get(Idx->getType(), 0), Idx});
964  StoreInst *NSI = Builder.CreateStore(NewElement, GEP);
965  NSI->copyMetadata(*SI);
966  Align ScalarOpAlignment = computeAlignmentAfterScalarization(
967  std::max(SI->getAlign(), Load->getAlign()), NewElement->getType(), Idx,
968  DL);
969  NSI->setAlignment(ScalarOpAlignment);
970  replaceValue(I, *NSI);
972  return true;
973  }
974 
975  return false;
976 }
977 
978 /// Try to scalarize vector loads feeding extractelement instructions.
979 bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
980  Value *Ptr;
981  if (!match(&I, m_Load(m_Value(Ptr))))
982  return false;
983 
984  auto *LI = cast<LoadInst>(&I);
985  const DataLayout &DL = I.getModule()->getDataLayout();
986  if (LI->isVolatile() || !DL.typeSizeEqualsStoreSize(LI->getType()))
987  return false;
988 
989  auto *FixedVT = dyn_cast<FixedVectorType>(LI->getType());
990  if (!FixedVT)
991  return false;
992 
993  InstructionCost OriginalCost =
994  TTI.getMemoryOpCost(Instruction::Load, LI->getType(), LI->getAlign(),
995  LI->getPointerAddressSpace());
996  InstructionCost ScalarizedCost = 0;
997 
998  Instruction *LastCheckedInst = LI;
999  unsigned NumInstChecked = 0;
1000  // Check if all users of the load are extracts with no memory modifications
1001  // between the load and the extract. Compute the cost of both the original
1002  // code and the scalarized version.
1003  for (User *U : LI->users()) {
1004  auto *UI = dyn_cast<ExtractElementInst>(U);
1005  if (!UI || UI->getParent() != LI->getParent())
1006  return false;
1007 
1008  if (!isGuaranteedNotToBePoison(UI->getOperand(1), &AC, LI, &DT))
1009  return false;
1010 
1011  // Check if any instruction between the load and the extract may modify
1012  // memory.
1013  if (LastCheckedInst->comesBefore(UI)) {
1014  for (Instruction &I :
1015  make_range(std::next(LI->getIterator()), UI->getIterator())) {
1016  // Bail out if we reached the check limit or the instruction may write
1017  // to memory.
1018  if (NumInstChecked == MaxInstrsToScan || I.mayWriteToMemory())
1019  return false;
1020  NumInstChecked++;
1021  }
1022  }
1023 
1024  if (!LastCheckedInst)
1025  LastCheckedInst = UI;
1026  else if (LastCheckedInst->comesBefore(UI))
1027  LastCheckedInst = UI;
1028 
1029  auto ScalarIdx = canScalarizeAccess(FixedVT, UI->getOperand(1), &I, AC, DT);
1030  if (!ScalarIdx.isSafe()) {
1031  // TODO: Freeze index if it is safe to do so.
1032  ScalarIdx.discard();
1033  return false;
1034  }
1035 
1036  auto *Index = dyn_cast<ConstantInt>(UI->getOperand(1));
1037  OriginalCost +=
1038  TTI.getVectorInstrCost(Instruction::ExtractElement, LI->getType(),
1039  Index ? Index->getZExtValue() : -1);
1040  ScalarizedCost +=
1041  TTI.getMemoryOpCost(Instruction::Load, FixedVT->getElementType(),
1042  Align(1), LI->getPointerAddressSpace());
1043  ScalarizedCost += TTI.getAddressComputationCost(FixedVT->getElementType());
1044  }
1045 
1046  if (ScalarizedCost >= OriginalCost)
1047  return false;
1048 
1049  // Replace extracts with narrow scalar loads.
1050  for (User *U : LI->users()) {
1051  auto *EI = cast<ExtractElementInst>(U);
1052  Builder.SetInsertPoint(EI);
1053 
1054  Value *Idx = EI->getOperand(1);
1055  Value *GEP =
1056  Builder.CreateInBoundsGEP(FixedVT, Ptr, {Builder.getInt32(0), Idx});
1057  auto *NewLoad = cast<LoadInst>(Builder.CreateLoad(
1058  FixedVT->getElementType(), GEP, EI->getName() + ".scalar"));
1059 
1060  Align ScalarOpAlignment = computeAlignmentAfterScalarization(
1061  LI->getAlign(), FixedVT->getElementType(), Idx, DL);
1062  NewLoad->setAlignment(ScalarOpAlignment);
1063 
1064  replaceValue(*EI, *NewLoad);
1065  }
1066 
1067  return true;
1068 }
1069 
1070 /// Try to convert "shuffle (binop), (binop)" with a shared binop operand into
1071 /// "binop (shuffle), (shuffle)".
1072 bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
1073  auto *VecTy = dyn_cast<FixedVectorType>(I.getType());
1074  if (!VecTy)
1075  return false;
1076 
1077  BinaryOperator *B0, *B1;
1080  m_Mask(Mask))) ||
1081  B0->getOpcode() != B1->getOpcode() || B0->getType() != VecTy)
1082  return false;
1083 
1084  // Try to replace a binop with a shuffle if the shuffle is not costly.
1085  // The new shuffle will choose from a single, common operand, so it may be
1086  // cheaper than the existing two-operand shuffle.
1087  SmallVector<int> UnaryMask = createUnaryMask(Mask, Mask.size());
1088  Instruction::BinaryOps Opcode = B0->getOpcode();
1089  InstructionCost BinopCost = TTI.getArithmeticInstrCost(Opcode, VecTy);
1090  InstructionCost ShufCost = TTI.getShuffleCost(
1091  TargetTransformInfo::SK_PermuteSingleSrc, VecTy, UnaryMask);
1092  if (ShufCost > BinopCost)
1093  return false;
1094 
1095  // If we have something like "add X, Y" and "add Z, X", swap ops to match.
1096  Value *X = B0->getOperand(0), *Y = B0->getOperand(1);
1097  Value *Z = B1->getOperand(0), *W = B1->getOperand(1);
1098  if (BinaryOperator::isCommutative(Opcode) && X != Z && Y != W)
1099  std::swap(X, Y);
1100 
1101  Value *Shuf0, *Shuf1;
1102  if (X == Z) {
1103  // shuf (bo X, Y), (bo X, W) --> bo (shuf X), (shuf Y, W)
1104  Shuf0 = Builder.CreateShuffleVector(X, UnaryMask);
1105  Shuf1 = Builder.CreateShuffleVector(Y, W, Mask);
1106  } else if (Y == W) {
1107  // shuf (bo X, Y), (bo Z, Y) --> bo (shuf X, Z), (shuf Y)
1108  Shuf0 = Builder.CreateShuffleVector(X, Z, Mask);
1109  Shuf1 = Builder.CreateShuffleVector(Y, UnaryMask);
1110  } else {
1111  return false;
1112  }
1113 
1114  Value *NewBO = Builder.CreateBinOp(Opcode, Shuf0, Shuf1);
1115  // Intersect flags from the old binops.
1116  if (auto *NewInst = dyn_cast<Instruction>(NewBO)) {
1117  NewInst->copyIRFlags(B0);
1118  NewInst->andIRFlags(B1);
1119  }
1120  replaceValue(I, *NewBO);
1121  return true;
1122 }
1123 
1124 /// This is the entry point for all transforms. Pass manager differences are
1125 /// handled in the callers of this function.
1126 bool VectorCombine::run() {
1128  return false;
1129 
1130  // Don't attempt vectorization if the target does not support vectors.
1131  if (!TTI.getNumberOfRegisters(TTI.getRegisterClassForType(/*Vector*/ true)))
1132  return false;
1133 
1134  bool MadeChange = false;
1135  auto FoldInst = [this, &MadeChange](Instruction &I) {
1136  Builder.SetInsertPoint(&I);
1137  if (!ScalarizationOnly) {
1138  MadeChange |= vectorizeLoadInsert(I);
1139  MadeChange |= foldExtractExtract(I);
1140  MadeChange |= foldBitcastShuf(I);
1141  MadeChange |= foldExtractedCmps(I);
1142  MadeChange |= foldShuffleOfBinops(I);
1143  }
1144  MadeChange |= scalarizeBinopOrCmp(I);
1145  MadeChange |= scalarizeLoadExtract(I);
1146  MadeChange |= foldSingleElementStore(I);
1147  };
1148  for (BasicBlock &BB : F) {
1149  // Ignore unreachable basic blocks.
1150  if (!DT.isReachableFromEntry(&BB))
1151  continue;
1152  // Use early increment range so that we can erase instructions in loop.
1153  for (Instruction &I : make_early_inc_range(BB)) {
1154  if (I.isDebugOrPseudoInst())
1155  continue;
1156  FoldInst(I);
1157  }
1158  }
1159 
1160  while (!Worklist.isEmpty()) {
1161  Instruction *I = Worklist.removeOne();
1162  if (!I)
1163  continue;
1164 
1166  eraseInstruction(*I);
1167  continue;
1168  }
1169 
1170  FoldInst(*I);
1171  }
1172 
1173  return MadeChange;
1174 }
1175 
1176 // Pass manager boilerplate below here.
1177 
1178 namespace {
1179 class VectorCombineLegacyPass : public FunctionPass {
1180 public:
1181  static char ID;
1182  VectorCombineLegacyPass() : FunctionPass(ID) {
1184  }
1185 
1186  void getAnalysisUsage(AnalysisUsage &AU) const override {
1191  AU.setPreservesCFG();
1197  }
1198 
1199  bool runOnFunction(Function &F) override {
1200  if (skipFunction(F))
1201  return false;
1202  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1203  auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1204  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1205  auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1206  VectorCombine Combiner(F, TTI, DT, AA, AC, false);
1207  return Combiner.run();
1208  }
1209 };
1210 } // namespace
1211 
1213 INITIALIZE_PASS_BEGIN(VectorCombineLegacyPass, "vector-combine",
1214  "Optimize scalar/vector ops", false,
1215  false)
1218 INITIALIZE_PASS_END(VectorCombineLegacyPass, "vector-combine",
1219  "Optimize scalar/vector ops", false, false)
1221  return new VectorCombineLegacyPass();
1222 }
1223 
1226  auto &AC = FAM.getResult<AssumptionAnalysis>(F);
1229  AAResults &AA = FAM.getResult<AAManager>(F);
1230  VectorCombine Combiner(F, TTI, DT, AA, AC, ScalarizationOnly);
1231  if (!Combiner.run())
1232  return PreservedAnalyses::all();
1233  PreservedAnalyses PA;
1234  PA.preserveSet<CFGAnalyses>();
1235  return PA;
1236 }
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
AssumptionCache.h
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1287
llvm::mustSuppressSpeculation
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
Definition: ValueTracking.cpp:4620
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2418
B1
llvm::MemoryLocation::get
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition: MemoryLocation.cpp:37
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
llvm::ConstantExpr::getExtractElement
static Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2601
llvm::Value::getPointerAlignment
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:918
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::TargetTransformInfo::getMinVectorRegisterBitWidth
unsigned getMinVectorRegisterBitWidth() const
Definition: TargetTransformInfo.cpp:614
ScalarizationResult::~ScalarizationResult
~ScalarizationResult()
Definition: VectorCombine.cpp:823
llvm::PatternMatch::m_Mask
Definition: PatternMatch.h:1524
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::TargetTransformInfo::getShuffleCost
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask=None, int Index=0, VectorType *SubTp=nullptr) const
Definition: TargetTransformInfo.cpp:745
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
llvm::ConstantRange::binaryAnd
ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
Definition: ConstantRange.cpp:1372
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:721
llvm::User::operands
op_range operands()
Definition: User.h:242
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::isSafeToSpeculativelyExecute
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Definition: ValueTracking.cpp:4632
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:217
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:783
llvm::TargetTransformInfo::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:827
Loads.h
llvm::ExtractElementInst
This instruction extracts a single (scalar) element from a VectorType value.
Definition: Instructions.h:1876
llvm::Function
Definition: Function.h:62
Pass.h
llvm::TargetTransformInfo::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:855
ScalarizationResult::isUnsafe
bool isUnsafe() const
Returns true if the index cannot be scalarized.
Definition: VectorCombine.cpp:836
canScalarizeAccess
static ScalarizationResult canScalarizeAccess(FixedVectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
Definition: VectorCombine.cpp:867
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::PatternMatch::m_InsertElt
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Definition: PatternMatch.h:1494
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
llvm::TargetTransformInfo::getAddressComputationCost
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
Definition: TargetTransformInfo.cpp:917
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1177
Statistic.h
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:208
llvm::VectorCombinePass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
Definition: VectorCombine.cpp:1224
llvm::PatternMatch::m_Load
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
Definition: PatternMatch.h:1573
llvm::CmpInst::makeCmpResultType
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:1046
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:734
llvm::IRBuilder<>
llvm::InstructionWorklist::isEmpty
bool isEmpty() const
Definition: InstructionWorklist.h:39
ValueTracking.h
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
GlobalsModRef.h
VectorCombine.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::narrowShuffleMaskElts
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
Definition: VectorUtils.cpp:427
llvm::createVectorCombinePass
Pass * createVectorCombinePass()
Definition: VectorCombine.cpp:1220
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::StoreInst::setAlignment
void setAlignment(Align Align)
Definition: Instructions.h:362
llvm::Instruction::comesBefore
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
Definition: Instruction.cpp:111
llvm::createUnaryMask
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
Definition: VectorUtils.cpp:833
llvm::PatternMatch::m_BinOp
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:80
llvm::Instruction::copyMetadata
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Definition: Instruction.cpp:839
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(VectorCombineLegacyPass, "vector-combine", "Optimize scalar/vector ops", false, false) INITIALIZE_PASS_END(VectorCombineLegacyPass
InvalidIndex
static const unsigned InvalidIndex
Definition: VectorCombine.cpp:60
llvm::PatternMatch::m_BitCast
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
Definition: PatternMatch.h:1603
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
BasicAliasAnalysis.h
MaxInstrsToScan
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
llvm::FixedVectorType
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:525
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::TargetTransformInfo::SK_PermuteSingleSrc
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
Definition: TargetTransformInfo.h:879
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::UndefMaskElem
constexpr int UndefMaskElem
Definition: Instructions.h:2000
llvm::PatternMatch::m_OneUse
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
CommandLine.h
llvm::FixedVectorType::getNumElements
unsigned getNumElements() const
Definition: DerivedTypes.h:568
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:160
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1472
llvm::CmpInst::isFPPredicate
bool isFPPredicate() const
Definition: InstrTypes.h:828
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::AAResults
Definition: AliasAnalysis.h:507
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
createShiftShuffle
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilder<> &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
Definition: VectorCombine.cpp:400
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
InstructionWorklist.h
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
getOpcode
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:199
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:226
llvm::InstructionWorklist::remove
void remove(Instruction *I)
Remove I from the worklist if it exists.
Definition: InstructionWorklist.h:85
llvm::PatternMatch::m_Instruction
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:726
false
Definition: StackSlotColoring.cpp:142
llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition: InstrTypes.h:394
llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:145
llvm::Instruction
Definition: Instruction.h:45
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:191
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:287
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1804
DisableBinopExtractShuffle
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
llvm::PatternMatch::m_URem
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1085
Align
uint64_t Align
Definition: ELFObjHandler.cpp:82
PatternMatch.h
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::NVPTX::PTXLdStInstCode::Scalar
@ Scalar
Definition: NVPTX.h:122
llvm::isModSet
LLVM_NODISCARD bool isModSet(const ModRefInfo MRI)
Definition: AliasAnalysis.h:196
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
ScalarizationResult::isSafeWithFreeze
bool isSafeWithFreeze() const
Returns true if the index can be scalarize, but requires inserting a freeze.
Definition: VectorCombine.cpp:839
llvm::Instruction::isCommutative
bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
Definition: Instruction.cpp:768
llvm::PatternMatch::m_ExtractElt
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
Definition: PatternMatch.h:1502
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:190
llvm::APInt::getOneBitSet
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:222
ScalarizationResult::freeze
void freeze(IRBuilder<> &Builder, Instruction &UserI)
Freeze the ToFreeze and update the use in User to use it.
Definition: VectorCombine.cpp:848
VectorUtils.h
llvm::cl::opt< bool >
llvm::ConstantExpr::getCompare
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:2447
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:309
llvm::InstructionWorklist::removeOne
Instruction * removeOne()
Definition: InstructionWorklist.h:96
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
Index
uint32_t Index
Definition: ELFObjHandler.cpp:83
uint64_t
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2474
llvm::Combiner
Definition: Combiner.h:27
llvm::InstructionWorklist::push
void push(Instruction *I)
Push the instruction onto the worklist stack.
Definition: InstructionWorklist.h:58
eraseInstruction
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1494
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::BasicAAWrapperPass
Legacy wrapper pass to provide the BasicAAResult object.
Definition: BasicAliasAnalysis.h:175
llvm::TargetTransformInfo::getRegisterClassForType
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
Definition: TargetTransformInfo.cpp:600
DisableVectorCombine
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:642
llvm::PatternMatch::m_And
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1103
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1714
llvm::TargetTransformInfo::getScalarizationOverhead
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract) const
Estimate the overhead of scalarizing an instruction.
Definition: TargetTransformInfo.cpp:495
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
translateExtract
static ExtractElementInst * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilder<> &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
Definition: VectorCombine.cpp:415
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:328
llvm::CmpInst::BAD_ICMP_PREDICATE
@ BAD_ICMP_PREDICATE
Definition: InstrTypes.h:754
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:163
llvm::Instruction::isBinaryOp
bool isBinaryOp() const
Definition: Instruction.h:165
llvm::PatternMatch::m_Constant
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:142
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
Status
Definition: SIModeRegister.cpp:28
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:650
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:99
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:202
llvm::ArrayRef< int >
llvm::isInstructionTriviallyDead
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:398
llvm::BinaryOperator
Definition: InstrTypes.h:190
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1656
llvm::InstructionWorklist::pushValue
void pushValue(Value *V)
Definition: InstructionWorklist.h:68
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::PatternMatch::m_Undef
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:136
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::ExtractElementInst::getVectorOperand
Value * getVectorOperand()
Definition: Instructions.h:1905
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:116
llvm::PatternMatch::m_Shuffle
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
Definition: PatternMatch.h:1561
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
llvm::ms_demangle::IntrinsicFunctionKind::New
@ New
ScalarizationResult::isSafe
bool isSafe() const
Returns true if the index can be scalarize without requiring a freeze.
Definition: VectorCombine.cpp:834
llvm::initializeVectorCombineLegacyPassPass
void initializeVectorCombineLegacyPassPass(PassRegistry &)
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::computeConstantRange
ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
Definition: ValueTracking.cpp:7096
llvm::TargetTransformInfo::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned ClassID) const
Definition: TargetTransformInfo.cpp:596
llvm::ConstantVector::get
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1402
llvm::InstructionCost::isValid
bool isValid() const
Definition: InstructionCost.h:79
llvm::Instruction::mayReadFromMemory
bool mayReadFromMemory() const
Return true if this instruction may read memory.
Definition: Instruction.cpp:574
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::Value::stripAndAccumulateInBoundsConstantOffsets
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition: Value.h:727
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
ScalarizationResult::discard
void discard()
Reset the state of Unsafe and clear ToFreze if set.
Definition: VectorCombine.cpp:842
llvm::widenShuffleMaskElts
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Definition: VectorUtils.cpp:448
llvm::commonAlignment
Align commonAlignment(Align A, Align B)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:211
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:971
ScalarizationResult::unsafe
static ScalarizationResult unsafe()
Definition: VectorCombine.cpp:827
ScalarizationResult
Helper class to indicate whether a vector index can be safely scalarized and if a freeze needs to be ...
Definition: VectorCombine.cpp:812
ops
vector Optimize scalar vector ops
Definition: VectorCombine.cpp:1219
get
Should compile to something r4 addze r3 instead we get
Definition: README.txt:24
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:325
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
Function.h
llvm::Value::hasNUses
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition: Value.cpp:149
ScalarizationResult::safeWithFreeze
static ScalarizationResult safeWithFreeze(Value *ToFreeze)
Definition: VectorCombine.cpp:829
ScalarizationResult::safe
static ScalarizationResult safe()
Definition: VectorCombine.cpp:828
llvm::ARCCC::Z
@ Z
Definition: ARCInfo.h:41
llvm::InstructionWorklist
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
Definition: InstructionWorklist.h:25
llvm::PatternMatch::m_ZeroInt
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:522
llvm::ConstantRange::contains
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
Definition: ConstantRange.cpp:431
llvm::TargetTransformInfo::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index=-1) const
Definition: TargetTransformInfo.cpp:838
llvm::ConstantRange
This class represents a range of values.
Definition: ConstantRange.h:47
llvm::ConstantRange::urem
ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
Definition: ConstantRange.cpp:1294
llvm::Instruction::BinaryOps
BinaryOps
Definition: Instruction.h:789
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:191
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:780
Dominators.h
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1335
llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition: GlobalsModRef.h:143
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
TargetTransformInfo.h
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::ExtractElementInst::getIndexOperand
Value * getIndexOperand()
Definition: Instructions.h:1906
llvm::InstructionWorklist::pushUsersToWorkList
void pushUsersToWorkList(Instruction &I)
When an instruction is simplified, add all users of the instruction to the work lists because they mi...
Definition: InstructionWorklist.h:106
Vectorize.h
llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:93
llvm::isSafeToLoadUnconditionally
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, APInt &Size, const DataLayout &DL, Instruction *ScanFrom=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
Definition: Loads.cpp:335
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::isGuaranteedNotToBePoison
bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Definition: ValueTracking.cpp:5281
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::TargetTransformInfo::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
Definition: TargetTransformInfo.cpp:733
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:412
combine
vector combine
Definition: VectorCombine.cpp:1218
llvm::PatternMatch::m_Cmp
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:89
InitializePasses.h
computeAlignmentAfterScalarization
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
Definition: VectorCombine.cpp:909
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:670
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::AAResults::getModRefInfo
ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
Definition: AliasAnalysis.cpp:218
llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:209
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1198
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:166
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
isMemModifiedBetween
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
Definition: VectorCombine.cpp:800