LLVM  14.0.0git
VectorCombine.cpp
Go to the documentation of this file.
1 //===------- VectorCombine.cpp - Optimize partial vector operations -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass optimizes scalar/vector interactions using target cost models. The
10 // transforms implemented here may not fit in traditional loop-based or SLP
11 // vectorization passes.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/Statistic.h"
20 #include "llvm/Analysis/Loads.h"
24 #include "llvm/IR/Dominators.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/IR/PatternMatch.h"
28 #include "llvm/InitializePasses.h"
29 #include "llvm/Pass.h"
33 
34 using namespace llvm;
35 using namespace llvm::PatternMatch;
36 
37 #define DEBUG_TYPE "vector-combine"
38 STATISTIC(NumVecLoad, "Number of vector loads formed");
39 STATISTIC(NumVecCmp, "Number of vector compares formed");
40 STATISTIC(NumVecBO, "Number of vector binops formed");
41 STATISTIC(NumVecCmpBO, "Number of vector compare + binop formed");
42 STATISTIC(NumShufOfBitcast, "Number of shuffles moved after bitcast");
43 STATISTIC(NumScalarBO, "Number of scalar binops formed");
44 STATISTIC(NumScalarCmp, "Number of scalar compares formed");
45 
47  "disable-vector-combine", cl::init(false), cl::Hidden,
48  cl::desc("Disable all vector combine transforms"));
49 
51  "disable-binop-extract-shuffle", cl::init(false), cl::Hidden,
52  cl::desc("Disable binop extract to shuffle transforms"));
53 
55  "vector-combine-max-scan-instrs", cl::init(30), cl::Hidden,
56  cl::desc("Max number of instructions to scan for vector combining."));
57 
59 
60 namespace {
61 class VectorCombine {
62 public:
63  VectorCombine(Function &F, const TargetTransformInfo &TTI,
64  const DominatorTree &DT, AAResults &AA, AssumptionCache &AC)
65  : F(F), Builder(F.getContext()), TTI(TTI), DT(DT), AA(AA), AC(AC) {}
66 
67  bool run();
68 
69 private:
70  Function &F;
72  const TargetTransformInfo &TTI;
73  const DominatorTree &DT;
74  AAResults &AA;
75  AssumptionCache ∾
76 
77  bool vectorizeLoadInsert(Instruction &I);
78  ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
79  ExtractElementInst *Ext1,
80  unsigned PreferredExtractIndex) const;
81  bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
82  unsigned Opcode,
83  ExtractElementInst *&ConvertToShuffle,
84  unsigned PreferredExtractIndex);
85  void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
86  Instruction &I);
87  void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
88  Instruction &I);
89  bool foldExtractExtract(Instruction &I);
90  bool foldBitcastShuf(Instruction &I);
91  bool scalarizeBinopOrCmp(Instruction &I);
92  bool foldExtractedCmps(Instruction &I);
93  bool foldSingleElementStore(Instruction &I);
94  bool scalarizeLoadExtract(Instruction &I);
95 };
96 } // namespace
97 
98 static void replaceValue(Value &Old, Value &New) {
99  Old.replaceAllUsesWith(&New);
100  New.takeName(&Old);
101 }
102 
103 bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
104  // Match insert into fixed vector of scalar value.
105  // TODO: Handle non-zero insert index.
106  auto *Ty = dyn_cast<FixedVectorType>(I.getType());
107  Value *Scalar;
108  if (!Ty || !match(&I, m_InsertElt(m_Undef(), m_Value(Scalar), m_ZeroInt())) ||
109  !Scalar->hasOneUse())
110  return false;
111 
112  // Optionally match an extract from another vector.
113  Value *X;
114  bool HasExtract = match(Scalar, m_ExtractElt(m_Value(X), m_ZeroInt()));
115  if (!HasExtract)
116  X = Scalar;
117 
118  // Match source value as load of scalar or vector.
119  // Do not vectorize scalar load (widening) if atomic/volatile or under
120  // asan/hwasan/memtag/tsan. The widened load may load data from dirty regions
121  // or create data races non-existent in the source.
122  auto *Load = dyn_cast<LoadInst>(X);
123  if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
124  Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
126  return false;
127 
128  const DataLayout &DL = I.getModule()->getDataLayout();
129  Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts();
130  assert(isa<PointerType>(SrcPtr->getType()) && "Expected a pointer type");
131 
132  // If original AS != Load's AS, we can't bitcast the original pointer and have
133  // to use Load's operand instead. Ideally we would want to strip pointer casts
134  // without changing AS, but there's no API to do that ATM.
135  unsigned AS = Load->getPointerAddressSpace();
136  if (AS != SrcPtr->getType()->getPointerAddressSpace())
137  SrcPtr = Load->getPointerOperand();
138 
139  // We are potentially transforming byte-sized (8-bit) memory accesses, so make
140  // sure we have all of our type-based constraints in place for this target.
141  Type *ScalarTy = Scalar->getType();
142  uint64_t ScalarSize = ScalarTy->getPrimitiveSizeInBits();
143  unsigned MinVectorSize = TTI.getMinVectorRegisterBitWidth();
144  if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
145  ScalarSize % 8 != 0)
146  return false;
147 
148  // Check safety of replacing the scalar load with a larger vector load.
149  // We use minimal alignment (maximum flexibility) because we only care about
150  // the dereferenceable region. When calculating cost and creating a new op,
151  // we may use a larger value based on alignment attributes.
152  unsigned MinVecNumElts = MinVectorSize / ScalarSize;
153  auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts, false);
154  unsigned OffsetEltIndex = 0;
155  Align Alignment = Load->getAlign();
156  if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT)) {
157  // It is not safe to load directly from the pointer, but we can still peek
158  // through gep offsets and check if it safe to load from a base address with
159  // updated alignment. If it is, we can shuffle the element(s) into place
160  // after loading.
161  unsigned OffsetBitWidth = DL.getIndexTypeSizeInBits(SrcPtr->getType());
162  APInt Offset(OffsetBitWidth, 0);
164 
165  // We want to shuffle the result down from a high element of a vector, so
166  // the offset must be positive.
167  if (Offset.isNegative())
168  return false;
169 
170  // The offset must be a multiple of the scalar element to shuffle cleanly
171  // in the element's size.
172  uint64_t ScalarSizeInBytes = ScalarSize / 8;
173  if (Offset.urem(ScalarSizeInBytes) != 0)
174  return false;
175 
176  // If we load MinVecNumElts, will our target element still be loaded?
177  OffsetEltIndex = Offset.udiv(ScalarSizeInBytes).getZExtValue();
178  if (OffsetEltIndex >= MinVecNumElts)
179  return false;
180 
181  if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT))
182  return false;
183 
184  // Update alignment with offset value. Note that the offset could be negated
185  // to more accurately represent "(new) SrcPtr - Offset = (old) SrcPtr", but
186  // negation does not change the result of the alignment calculation.
187  Alignment = commonAlignment(Alignment, Offset.getZExtValue());
188  }
189 
190  // Original pattern: insertelt undef, load [free casts of] PtrOp, 0
191  // Use the greater of the alignment on the load or its source pointer.
192  Alignment = std::max(SrcPtr->getPointerAlignment(DL), Alignment);
193  Type *LoadTy = Load->getType();
194  InstructionCost OldCost =
195  TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS);
196  APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0);
197  OldCost += TTI.getScalarizationOverhead(MinVecTy, DemandedElts,
198  /* Insert */ true, HasExtract);
199 
200  // New pattern: load VecPtr
201  InstructionCost NewCost =
202  TTI.getMemoryOpCost(Instruction::Load, MinVecTy, Alignment, AS);
203  // Optionally, we are shuffling the loaded vector element(s) into place.
204  // For the mask set everything but element 0 to undef to prevent poison from
205  // propagating from the extra loaded memory. This will also optionally
206  // shrink/grow the vector from the loaded size to the output size.
207  // We assume this operation has no cost in codegen if there was no offset.
208  // Note that we could use freeze to avoid poison problems, but then we might
209  // still need a shuffle to change the vector size.
210  unsigned OutputNumElts = Ty->getNumElements();
211  SmallVector<int, 16> Mask(OutputNumElts, UndefMaskElem);
212  assert(OffsetEltIndex < MinVecNumElts && "Address offset too big");
213  Mask[0] = OffsetEltIndex;
214  if (OffsetEltIndex)
215  NewCost += TTI.getShuffleCost(TTI::SK_PermuteSingleSrc, MinVecTy, Mask);
216 
217  // We can aggressively convert to the vector form because the backend can
218  // invert this transform if it does not result in a performance win.
219  if (OldCost < NewCost || !NewCost.isValid())
220  return false;
221 
222  // It is safe and potentially profitable to load a vector directly:
223  // inselt undef, load Scalar, 0 --> load VecPtr
225  Value *CastedPtr = Builder.CreateBitCast(SrcPtr, MinVecTy->getPointerTo(AS));
226  Value *VecLd = Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment);
227  VecLd = Builder.CreateShuffleVector(VecLd, Mask);
228 
229  replaceValue(I, *VecLd);
230  ++NumVecLoad;
231  return true;
232 }
233 
234 /// Determine which, if any, of the inputs should be replaced by a shuffle
235 /// followed by extract from a different index.
236 ExtractElementInst *VectorCombine::getShuffleExtract(
238  unsigned PreferredExtractIndex = InvalidIndex) const {
239  assert(isa<ConstantInt>(Ext0->getIndexOperand()) &&
240  isa<ConstantInt>(Ext1->getIndexOperand()) &&
241  "Expected constant extract indexes");
242 
243  unsigned Index0 = cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue();
244  unsigned Index1 = cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue();
245 
246  // If the extract indexes are identical, no shuffle is needed.
247  if (Index0 == Index1)
248  return nullptr;
249 
250  Type *VecTy = Ext0->getVectorOperand()->getType();
251  assert(VecTy == Ext1->getVectorOperand()->getType() && "Need matching types");
252  InstructionCost Cost0 =
253  TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0);
254  InstructionCost Cost1 =
255  TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1);
256 
257  // If both costs are invalid no shuffle is needed
258  if (!Cost0.isValid() && !Cost1.isValid())
259  return nullptr;
260 
261  // We are extracting from 2 different indexes, so one operand must be shuffled
262  // before performing a vector operation and/or extract. The more expensive
263  // extract will be replaced by a shuffle.
264  if (Cost0 > Cost1)
265  return Ext0;
266  if (Cost1 > Cost0)
267  return Ext1;
268 
269  // If the costs are equal and there is a preferred extract index, shuffle the
270  // opposite operand.
271  if (PreferredExtractIndex == Index0)
272  return Ext1;
273  if (PreferredExtractIndex == Index1)
274  return Ext0;
275 
276  // Otherwise, replace the extract with the higher index.
277  return Index0 > Index1 ? Ext0 : Ext1;
278 }
279 
280 /// Compare the relative costs of 2 extracts followed by scalar operation vs.
281 /// vector operation(s) followed by extract. Return true if the existing
282 /// instructions are cheaper than a vector alternative. Otherwise, return false
283 /// and if one of the extracts should be transformed to a shufflevector, set
284 /// \p ConvertToShuffle to that extract instruction.
285 bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
286  ExtractElementInst *Ext1,
287  unsigned Opcode,
288  ExtractElementInst *&ConvertToShuffle,
289  unsigned PreferredExtractIndex) {
290  assert(isa<ConstantInt>(Ext0->getOperand(1)) &&
291  isa<ConstantInt>(Ext1->getOperand(1)) &&
292  "Expected constant extract indexes");
293  Type *ScalarTy = Ext0->getType();
294  auto *VecTy = cast<VectorType>(Ext0->getOperand(0)->getType());
295  InstructionCost ScalarOpCost, VectorOpCost;
296 
297  // Get cost estimates for scalar and vector versions of the operation.
298  bool IsBinOp = Instruction::isBinaryOp(Opcode);
299  if (IsBinOp) {
300  ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy);
301  VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy);
302  } else {
303  assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
304  "Expected a compare");
305  ScalarOpCost = TTI.getCmpSelInstrCost(Opcode, ScalarTy,
306  CmpInst::makeCmpResultType(ScalarTy));
307  VectorOpCost = TTI.getCmpSelInstrCost(Opcode, VecTy,
309  }
310 
311  // Get cost estimates for the extract elements. These costs will factor into
312  // both sequences.
313  unsigned Ext0Index = cast<ConstantInt>(Ext0->getOperand(1))->getZExtValue();
314  unsigned Ext1Index = cast<ConstantInt>(Ext1->getOperand(1))->getZExtValue();
315 
316  InstructionCost Extract0Cost =
317  TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext0Index);
318  InstructionCost Extract1Cost =
319  TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext1Index);
320 
321  // A more expensive extract will always be replaced by a splat shuffle.
322  // For example, if Ext0 is more expensive:
323  // opcode (extelt V0, Ext0), (ext V1, Ext1) -->
324  // extelt (opcode (splat V0, Ext0), V1), Ext1
325  // TODO: Evaluate whether that always results in lowest cost. Alternatively,
326  // check the cost of creating a broadcast shuffle and shuffling both
327  // operands to element 0.
328  InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
329 
330  // Extra uses of the extracts mean that we include those costs in the
331  // vector total because those instructions will not be eliminated.
332  InstructionCost OldCost, NewCost;
333  if (Ext0->getOperand(0) == Ext1->getOperand(0) && Ext0Index == Ext1Index) {
334  // Handle a special case. If the 2 extracts are identical, adjust the
335  // formulas to account for that. The extra use charge allows for either the
336  // CSE'd pattern or an unoptimized form with identical values:
337  // opcode (extelt V, C), (extelt V, C) --> extelt (opcode V, V), C
338  bool HasUseTax = Ext0 == Ext1 ? !Ext0->hasNUses(2)
339  : !Ext0->hasOneUse() || !Ext1->hasOneUse();
340  OldCost = CheapExtractCost + ScalarOpCost;
341  NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
342  } else {
343  // Handle the general case. Each extract is actually a different value:
344  // opcode (extelt V0, C0), (extelt V1, C1) --> extelt (opcode V0, V1), C
345  OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
346  NewCost = VectorOpCost + CheapExtractCost +
347  !Ext0->hasOneUse() * Extract0Cost +
348  !Ext1->hasOneUse() * Extract1Cost;
349  }
350 
351  ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
352  if (ConvertToShuffle) {
353  if (IsBinOp && DisableBinopExtractShuffle)
354  return true;
355 
356  // If we are extracting from 2 different indexes, then one operand must be
357  // shuffled before performing the vector operation. The shuffle mask is
358  // undefined except for 1 lane that is being translated to the remaining
359  // extraction lane. Therefore, it is a splat shuffle. Ex:
360  // ShufMask = { undef, undef, 0, undef }
361  // TODO: The cost model has an option for a "broadcast" shuffle
362  // (splat-from-element-0), but no option for a more general splat.
363  NewCost +=
365  }
366 
367  // Aggressively form a vector op if the cost is equal because the transform
368  // may enable further optimization.
369  // Codegen can reverse this transform (scalarize) if it was not profitable.
370  return OldCost < NewCost;
371 }
372 
373 /// Create a shuffle that translates (shifts) 1 element from the input vector
374 /// to a new element location.
375 static Value *createShiftShuffle(Value *Vec, unsigned OldIndex,
376  unsigned NewIndex, IRBuilder<> &Builder) {
377  // The shuffle mask is undefined except for 1 lane that is being translated
378  // to the new element index. Example for OldIndex == 2 and NewIndex == 0:
379  // ShufMask = { 2, undef, undef, undef }
380  auto *VecTy = cast<FixedVectorType>(Vec->getType());
381  SmallVector<int, 32> ShufMask(VecTy->getNumElements(), UndefMaskElem);
382  ShufMask[NewIndex] = OldIndex;
383  return Builder.CreateShuffleVector(Vec, ShufMask, "shift");
384 }
385 
386 /// Given an extract element instruction with constant index operand, shuffle
387 /// the source vector (shift the scalar element) to a NewIndex for extraction.
388 /// Return null if the input can be constant folded, so that we are not creating
389 /// unnecessary instructions.
391  unsigned NewIndex,
392  IRBuilder<> &Builder) {
393  // If the extract can be constant-folded, this code is unsimplified. Defer
394  // to other passes to handle that.
395  Value *X = ExtElt->getVectorOperand();
396  Value *C = ExtElt->getIndexOperand();
397  assert(isa<ConstantInt>(C) && "Expected a constant index operand");
398  if (isa<Constant>(X))
399  return nullptr;
400 
401  Value *Shuf = createShiftShuffle(X, cast<ConstantInt>(C)->getZExtValue(),
402  NewIndex, Builder);
403  return cast<ExtractElementInst>(Builder.CreateExtractElement(Shuf, NewIndex));
404 }
405 
406 /// Try to reduce extract element costs by converting scalar compares to vector
407 /// compares followed by extract.
408 /// cmp (ext0 V0, C), (ext1 V1, C)
409 void VectorCombine::foldExtExtCmp(ExtractElementInst *Ext0,
410  ExtractElementInst *Ext1, Instruction &I) {
411  assert(isa<CmpInst>(&I) && "Expected a compare");
412  assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
413  cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
414  "Expected matching constant extract indexes");
415 
416  // cmp Pred (extelt V0, C), (extelt V1, C) --> extelt (cmp Pred V0, V1), C
417  ++NumVecCmp;
418  CmpInst::Predicate Pred = cast<CmpInst>(&I)->getPredicate();
419  Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
420  Value *VecCmp = Builder.CreateCmp(Pred, V0, V1);
421  Value *NewExt = Builder.CreateExtractElement(VecCmp, Ext0->getIndexOperand());
422  replaceValue(I, *NewExt);
423 }
424 
425 /// Try to reduce extract element costs by converting scalar binops to vector
426 /// binops followed by extract.
427 /// bo (ext0 V0, C), (ext1 V1, C)
428 void VectorCombine::foldExtExtBinop(ExtractElementInst *Ext0,
429  ExtractElementInst *Ext1, Instruction &I) {
430  assert(isa<BinaryOperator>(&I) && "Expected a binary operator");
431  assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
432  cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
433  "Expected matching constant extract indexes");
434 
435  // bo (extelt V0, C), (extelt V1, C) --> extelt (bo V0, V1), C
436  ++NumVecBO;
437  Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
438  Value *VecBO =
439  Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), V0, V1);
440 
441  // All IR flags are safe to back-propagate because any potential poison
442  // created in unused vector elements is discarded by the extract.
443  if (auto *VecBOInst = dyn_cast<Instruction>(VecBO))
444  VecBOInst->copyIRFlags(&I);
445 
446  Value *NewExt = Builder.CreateExtractElement(VecBO, Ext0->getIndexOperand());
447  replaceValue(I, *NewExt);
448 }
449 
450 /// Match an instruction with extracted vector operands.
451 bool VectorCombine::foldExtractExtract(Instruction &I) {
452  // It is not safe to transform things like div, urem, etc. because we may
453  // create undefined behavior when executing those on unknown vector elements.
455  return false;
456 
457  Instruction *I0, *I1;
459  if (!match(&I, m_Cmp(Pred, m_Instruction(I0), m_Instruction(I1))) &&
460  !match(&I, m_BinOp(m_Instruction(I0), m_Instruction(I1))))
461  return false;
462 
463  Value *V0, *V1;
464  uint64_t C0, C1;
465  if (!match(I0, m_ExtractElt(m_Value(V0), m_ConstantInt(C0))) ||
466  !match(I1, m_ExtractElt(m_Value(V1), m_ConstantInt(C1))) ||
467  V0->getType() != V1->getType())
468  return false;
469 
470  // If the scalar value 'I' is going to be re-inserted into a vector, then try
471  // to create an extract to that same element. The extract/insert can be
472  // reduced to a "select shuffle".
473  // TODO: If we add a larger pattern match that starts from an insert, this
474  // probably becomes unnecessary.
475  auto *Ext0 = cast<ExtractElementInst>(I0);
476  auto *Ext1 = cast<ExtractElementInst>(I1);
477  uint64_t InsertIndex = InvalidIndex;
478  if (I.hasOneUse())
479  match(I.user_back(),
480  m_InsertElt(m_Value(), m_Value(), m_ConstantInt(InsertIndex)));
481 
482  ExtractElementInst *ExtractToChange;
483  if (isExtractExtractCheap(Ext0, Ext1, I.getOpcode(), ExtractToChange,
484  InsertIndex))
485  return false;
486 
487  if (ExtractToChange) {
488  unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
489  ExtractElementInst *NewExtract =
490  translateExtract(ExtractToChange, CheapExtractIdx, Builder);
491  if (!NewExtract)
492  return false;
493  if (ExtractToChange == Ext0)
494  Ext0 = NewExtract;
495  else
496  Ext1 = NewExtract;
497  }
498 
499  if (Pred != CmpInst::BAD_ICMP_PREDICATE)
500  foldExtExtCmp(Ext0, Ext1, I);
501  else
502  foldExtExtBinop(Ext0, Ext1, I);
503 
504  return true;
505 }
506 
507 /// If this is a bitcast of a shuffle, try to bitcast the source vector to the
508 /// destination type followed by shuffle. This can enable further transforms by
509 /// moving bitcasts or shuffles together.
510 bool VectorCombine::foldBitcastShuf(Instruction &I) {
511  Value *V;
513  if (!match(&I, m_BitCast(
515  return false;
516 
517  // 1) Do not fold bitcast shuffle for scalable type. First, shuffle cost for
518  // scalable type is unknown; Second, we cannot reason if the narrowed shuffle
519  // mask for scalable type is a splat or not.
520  // 2) Disallow non-vector casts and length-changing shuffles.
521  // TODO: We could allow any shuffle.
522  auto *DestTy = dyn_cast<FixedVectorType>(I.getType());
523  auto *SrcTy = dyn_cast<FixedVectorType>(V->getType());
524  if (!SrcTy || !DestTy || I.getOperand(0)->getType() != SrcTy)
525  return false;
526 
527  unsigned DestNumElts = DestTy->getNumElements();
528  unsigned SrcNumElts = SrcTy->getNumElements();
529  SmallVector<int, 16> NewMask;
530  if (SrcNumElts <= DestNumElts) {
531  // The bitcast is from wide to narrow/equal elements. The shuffle mask can
532  // always be expanded to the equivalent form choosing narrower elements.
533  assert(DestNumElts % SrcNumElts == 0 && "Unexpected shuffle mask");
534  unsigned ScaleFactor = DestNumElts / SrcNumElts;
535  narrowShuffleMaskElts(ScaleFactor, Mask, NewMask);
536  } else {
537  // The bitcast is from narrow elements to wide elements. The shuffle mask
538  // must choose consecutive elements to allow casting first.
539  assert(SrcNumElts % DestNumElts == 0 && "Unexpected shuffle mask");
540  unsigned ScaleFactor = SrcNumElts / DestNumElts;
541  if (!widenShuffleMaskElts(ScaleFactor, Mask, NewMask))
542  return false;
543  }
544 
545  // The new shuffle must not cost more than the old shuffle. The bitcast is
546  // moved ahead of the shuffle, so assume that it has the same cost as before.
549  InstructionCost SrcCost =
551  if (DestCost > SrcCost || !DestCost.isValid())
552  return false;
553 
554  // bitcast (shuf V, MaskC) --> shuf (bitcast V), MaskC'
555  ++NumShufOfBitcast;
556  Value *CastV = Builder.CreateBitCast(V, DestTy);
557  Value *Shuf = Builder.CreateShuffleVector(CastV, NewMask);
558  replaceValue(I, *Shuf);
559  return true;
560 }
561 
562 /// Match a vector binop or compare instruction with at least one inserted
563 /// scalar operand and convert to scalar binop/cmp followed by insertelement.
564 bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
566  Value *Ins0, *Ins1;
567  if (!match(&I, m_BinOp(m_Value(Ins0), m_Value(Ins1))) &&
568  !match(&I, m_Cmp(Pred, m_Value(Ins0), m_Value(Ins1))))
569  return false;
570 
571  // Do not convert the vector condition of a vector select into a scalar
572  // condition. That may cause problems for codegen because of differences in
573  // boolean formats and register-file transfers.
574  // TODO: Can we account for that in the cost model?
575  bool IsCmp = Pred != CmpInst::Predicate::BAD_ICMP_PREDICATE;
576  if (IsCmp)
577  for (User *U : I.users())
578  if (match(U, m_Select(m_Specific(&I), m_Value(), m_Value())))
579  return false;
580 
581  // Match against one or both scalar values being inserted into constant
582  // vectors:
583  // vec_op VecC0, (inselt VecC1, V1, Index)
584  // vec_op (inselt VecC0, V0, Index), VecC1
585  // vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index)
586  // TODO: Deal with mismatched index constants and variable indexes?
587  Constant *VecC0 = nullptr, *VecC1 = nullptr;
588  Value *V0 = nullptr, *V1 = nullptr;
589  uint64_t Index0 = 0, Index1 = 0;
590  if (!match(Ins0, m_InsertElt(m_Constant(VecC0), m_Value(V0),
591  m_ConstantInt(Index0))) &&
592  !match(Ins0, m_Constant(VecC0)))
593  return false;
594  if (!match(Ins1, m_InsertElt(m_Constant(VecC1), m_Value(V1),
595  m_ConstantInt(Index1))) &&
596  !match(Ins1, m_Constant(VecC1)))
597  return false;
598 
599  bool IsConst0 = !V0;
600  bool IsConst1 = !V1;
601  if (IsConst0 && IsConst1)
602  return false;
603  if (!IsConst0 && !IsConst1 && Index0 != Index1)
604  return false;
605 
606  // Bail for single insertion if it is a load.
607  // TODO: Handle this once getVectorInstrCost can cost for load/stores.
608  auto *I0 = dyn_cast_or_null<Instruction>(V0);
609  auto *I1 = dyn_cast_or_null<Instruction>(V1);
610  if ((IsConst0 && I1 && I1->mayReadFromMemory()) ||
611  (IsConst1 && I0 && I0->mayReadFromMemory()))
612  return false;
613 
614  uint64_t Index = IsConst0 ? Index1 : Index0;
615  Type *ScalarTy = IsConst0 ? V1->getType() : V0->getType();
616  Type *VecTy = I.getType();
617  assert(VecTy->isVectorTy() &&
618  (IsConst0 || IsConst1 || V0->getType() == V1->getType()) &&
619  (ScalarTy->isIntegerTy() || ScalarTy->isFloatingPointTy() ||
620  ScalarTy->isPointerTy()) &&
621  "Unexpected types for insert element into binop or cmp");
622 
623  unsigned Opcode = I.getOpcode();
624  InstructionCost ScalarOpCost, VectorOpCost;
625  if (IsCmp) {
626  ScalarOpCost = TTI.getCmpSelInstrCost(Opcode, ScalarTy);
627  VectorOpCost = TTI.getCmpSelInstrCost(Opcode, VecTy);
628  } else {
629  ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy);
630  VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy);
631  }
632 
633  // Get cost estimate for the insert element. This cost will factor into
634  // both sequences.
635  InstructionCost InsertCost =
636  TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, Index);
637  InstructionCost OldCost =
638  (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
639  InstructionCost NewCost = ScalarOpCost + InsertCost +
640  (IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCost) +
641  (IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCost);
642 
643  // We want to scalarize unless the vector variant actually has lower cost.
644  if (OldCost < NewCost || !NewCost.isValid())
645  return false;
646 
647  // vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index) -->
648  // inselt NewVecC, (scalar_op V0, V1), Index
649  if (IsCmp)
650  ++NumScalarCmp;
651  else
652  ++NumScalarBO;
653 
654  // For constant cases, extract the scalar element, this should constant fold.
655  if (IsConst0)
656  V0 = ConstantExpr::getExtractElement(VecC0, Builder.getInt64(Index));
657  if (IsConst1)
658  V1 = ConstantExpr::getExtractElement(VecC1, Builder.getInt64(Index));
659 
660  Value *Scalar =
661  IsCmp ? Builder.CreateCmp(Pred, V0, V1)
662  : Builder.CreateBinOp((Instruction::BinaryOps)Opcode, V0, V1);
663 
664  Scalar->setName(I.getName() + ".scalar");
665 
666  // All IR flags are safe to back-propagate. There is no potential for extra
667  // poison to be created by the scalar instruction.
668  if (auto *ScalarInst = dyn_cast<Instruction>(Scalar))
669  ScalarInst->copyIRFlags(&I);
670 
671  // Fold the vector constants in the original vectors into a new base vector.
672  Constant *NewVecC = IsCmp ? ConstantExpr::getCompare(Pred, VecC0, VecC1)
673  : ConstantExpr::get(Opcode, VecC0, VecC1);
674  Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, Index);
675  replaceValue(I, *Insert);
676  return true;
677 }
678 
679 /// Try to combine a scalar binop + 2 scalar compares of extracted elements of
680 /// a vector into vector operations followed by extract. Note: The SLP pass
681 /// may miss this pattern because of implementation problems.
682 bool VectorCombine::foldExtractedCmps(Instruction &I) {
683  // We are looking for a scalar binop of booleans.
684  // binop i1 (cmp Pred I0, C0), (cmp Pred I1, C1)
685  if (!I.isBinaryOp() || !I.getType()->isIntegerTy(1))
686  return false;
687 
688  // The compare predicates should match, and each compare should have a
689  // constant operand.
690  // TODO: Relax the one-use constraints.
691  Value *B0 = I.getOperand(0), *B1 = I.getOperand(1);
692  Instruction *I0, *I1;
693  Constant *C0, *C1;
694  CmpInst::Predicate P0, P1;
695  if (!match(B0, m_OneUse(m_Cmp(P0, m_Instruction(I0), m_Constant(C0)))) ||
696  !match(B1, m_OneUse(m_Cmp(P1, m_Instruction(I1), m_Constant(C1)))) ||
697  P0 != P1)
698  return false;
699 
700  // The compare operands must be extracts of the same vector with constant
701  // extract indexes.
702  // TODO: Relax the one-use constraints.
703  Value *X;
704  uint64_t Index0, Index1;
705  if (!match(I0, m_OneUse(m_ExtractElt(m_Value(X), m_ConstantInt(Index0)))) ||
707  return false;
708 
709  auto *Ext0 = cast<ExtractElementInst>(I0);
710  auto *Ext1 = cast<ExtractElementInst>(I1);
711  ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1);
712  if (!ConvertToShuf)
713  return false;
714 
715  // The original scalar pattern is:
716  // binop i1 (cmp Pred (ext X, Index0), C0), (cmp Pred (ext X, Index1), C1)
717  CmpInst::Predicate Pred = P0;
718  unsigned CmpOpcode = CmpInst::isFPPredicate(Pred) ? Instruction::FCmp
719  : Instruction::ICmp;
720  auto *VecTy = dyn_cast<FixedVectorType>(X->getType());
721  if (!VecTy)
722  return false;
723 
724  InstructionCost OldCost =
725  TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0);
726  OldCost += TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1);
727  OldCost += TTI.getCmpSelInstrCost(CmpOpcode, I0->getType()) * 2;
728  OldCost += TTI.getArithmeticInstrCost(I.getOpcode(), I.getType());
729 
730  // The proposed vector pattern is:
731  // vcmp = cmp Pred X, VecC
732  // ext (binop vNi1 vcmp, (shuffle vcmp, Index1)), Index0
733  int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
734  int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
735  auto *CmpTy = cast<FixedVectorType>(CmpInst::makeCmpResultType(X->getType()));
736  InstructionCost NewCost = TTI.getCmpSelInstrCost(CmpOpcode, X->getType());
737  SmallVector<int, 32> ShufMask(VecTy->getNumElements(), UndefMaskElem);
738  ShufMask[CheapIndex] = ExpensiveIndex;
740  ShufMask);
741  NewCost += TTI.getArithmeticInstrCost(I.getOpcode(), CmpTy);
742  NewCost += TTI.getVectorInstrCost(Ext0->getOpcode(), CmpTy, CheapIndex);
743 
744  // Aggressively form vector ops if the cost is equal because the transform
745  // may enable further optimization.
746  // Codegen can reverse this transform (scalarize) if it was not profitable.
747  if (OldCost < NewCost || !NewCost.isValid())
748  return false;
749 
750  // Create a vector constant from the 2 scalar constants.
751  SmallVector<Constant *, 32> CmpC(VecTy->getNumElements(),
752  UndefValue::get(VecTy->getElementType()));
753  CmpC[Index0] = C0;
754  CmpC[Index1] = C1;
755  Value *VCmp = Builder.CreateCmp(Pred, X, ConstantVector::get(CmpC));
756 
757  Value *Shuf = createShiftShuffle(VCmp, ExpensiveIndex, CheapIndex, Builder);
758  Value *VecLogic = Builder.CreateBinOp(cast<BinaryOperator>(I).getOpcode(),
759  VCmp, Shuf);
760  Value *NewExt = Builder.CreateExtractElement(VecLogic, CheapIndex);
761  replaceValue(I, *NewExt);
762  ++NumVecCmpBO;
763  return true;
764 }
765 
766 // Check if memory loc modified between two instrs in the same BB
769  const MemoryLocation &Loc, AAResults &AA) {
770  unsigned NumScanned = 0;
771  return std::any_of(Begin, End, [&](const Instruction &Instr) {
772  return isModSet(AA.getModRefInfo(&Instr, Loc)) ||
773  ++NumScanned > MaxInstrsToScan;
774  });
775 }
776 
777 /// Helper class to indicate whether a vector index can be safely scalarized and
778 /// if a freeze needs to be inserted.
780  enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
781 
782  StatusTy Status;
783  Value *ToFreeze;
784 
785  ScalarizationResult(StatusTy Status, Value *ToFreeze = nullptr)
786  : Status(Status), ToFreeze(ToFreeze) {}
787 
788 public:
791  assert(!ToFreeze && "freeze() not called with ToFreeze being set");
792  }
793 
794  static ScalarizationResult unsafe() { return {StatusTy::Unsafe}; }
795  static ScalarizationResult safe() { return {StatusTy::Safe}; }
797  return {StatusTy::SafeWithFreeze, ToFreeze};
798  }
799 
800  /// Returns true if the index can be scalarize without requiring a freeze.
801  bool isSafe() const { return Status == StatusTy::Safe; }
802  /// Returns true if the index cannot be scalarized.
803  bool isUnsafe() const { return Status == StatusTy::Unsafe; }
804  /// Returns true if the index can be scalarize, but requires inserting a
805  /// freeze.
806  bool isSafeWithFreeze() const { return Status == StatusTy::SafeWithFreeze; }
807 
808  /// Freeze the ToFreeze and update the use in \p User to use it.
810  assert(isSafeWithFreeze() &&
811  "should only be used when freezing is required");
812  assert(is_contained(ToFreeze->users(), &UserI) &&
813  "UserI must be a user of ToFreeze");
815  Builder.SetInsertPoint(cast<Instruction>(&UserI));
816  Value *Frozen =
817  Builder.CreateFreeze(ToFreeze, ToFreeze->getName() + ".frozen");
818  for (Use &U : make_early_inc_range((UserI.operands())))
819  if (U.get() == ToFreeze)
820  U.set(Frozen);
821 
822  ToFreeze = nullptr;
823  }
824 };
825 
826 /// Check if it is legal to scalarize a memory access to \p VecTy at index \p
827 /// Idx. \p Idx must access a valid vector element.
829  Value *Idx, Instruction *CtxI,
830  AssumptionCache &AC) {
831  if (auto *C = dyn_cast<ConstantInt>(Idx)) {
832  if (C->getValue().ult(VecTy->getNumElements()))
833  return ScalarizationResult::safe();
835  }
836 
837  unsigned IntWidth = Idx->getType()->getScalarSizeInBits();
838  APInt Zero(IntWidth, 0);
839  APInt MaxElts(IntWidth, VecTy->getNumElements());
840  ConstantRange ValidIndices(Zero, MaxElts);
841  ConstantRange IdxRange(IntWidth, true);
842 
843  if (isGuaranteedNotToBePoison(Idx, &AC)) {
844  if (ValidIndices.contains(computeConstantRange(Idx, true, &AC, CtxI, 0)))
845  return ScalarizationResult::safe();
847  }
848 
849  // If the index may be poison, check if we can insert a freeze before the
850  // range of the index is restricted.
851  Value *IdxBase;
852  ConstantInt *CI;
853  if (match(Idx, m_And(m_Value(IdxBase), m_ConstantInt(CI)))) {
854  IdxRange = IdxRange.binaryAnd(CI->getValue());
855  } else if (match(Idx, m_URem(m_Value(IdxBase), m_ConstantInt(CI)))) {
856  IdxRange = IdxRange.urem(CI->getValue());
857  }
858 
859  if (ValidIndices.contains(IdxRange))
860  return ScalarizationResult::safeWithFreeze(IdxBase);
862 }
863 
864 /// The memory operation on a vector of \p ScalarType had alignment of
865 /// \p VectorAlignment. Compute the maximal, but conservatively correct,
866 /// alignment that will be valid for the memory operation on a single scalar
867 /// element of the same type with index \p Idx.
869  Type *ScalarType, Value *Idx,
870  const DataLayout &DL) {
871  if (auto *C = dyn_cast<ConstantInt>(Idx))
872  return commonAlignment(VectorAlignment,
873  C->getZExtValue() * DL.getTypeStoreSize(ScalarType));
874  return commonAlignment(VectorAlignment, DL.getTypeStoreSize(ScalarType));
875 }
876 
877 // Combine patterns like:
878 // %0 = load <4 x i32>, <4 x i32>* %a
879 // %1 = insertelement <4 x i32> %0, i32 %b, i32 1
880 // store <4 x i32> %1, <4 x i32>* %a
881 // to:
882 // %0 = bitcast <4 x i32>* %a to i32*
883 // %1 = getelementptr inbounds i32, i32* %0, i64 0, i64 1
884 // store i32 %b, i32* %1
885 bool VectorCombine::foldSingleElementStore(Instruction &I) {
886  StoreInst *SI = dyn_cast<StoreInst>(&I);
887  if (!SI || !SI->isSimple() ||
888  !isa<FixedVectorType>(SI->getValueOperand()->getType()))
889  return false;
890 
891  // TODO: Combine more complicated patterns (multiple insert) by referencing
892  // TargetTransformInfo.
894  Value *NewElement;
895  Value *Idx;
896  if (!match(SI->getValueOperand(),
897  m_InsertElt(m_Instruction(Source), m_Value(NewElement),
898  m_Value(Idx))))
899  return false;
900 
901  if (auto *Load = dyn_cast<LoadInst>(Source)) {
902  auto VecTy = cast<FixedVectorType>(SI->getValueOperand()->getType());
903  const DataLayout &DL = I.getModule()->getDataLayout();
904  Value *SrcAddr = Load->getPointerOperand()->stripPointerCasts();
905  // Don't optimize for atomic/volatile load or store. Ensure memory is not
906  // modified between, vector type matches store size, and index is inbounds.
907  if (!Load->isSimple() || Load->getParent() != SI->getParent() ||
908  !DL.typeSizeEqualsStoreSize(Load->getType()) ||
909  SrcAddr != SI->getPointerOperand()->stripPointerCasts())
910  return false;
911 
912  auto ScalarizableIdx = canScalarizeAccess(VecTy, Idx, Load, AC);
913  if (ScalarizableIdx.isUnsafe() ||
914  isMemModifiedBetween(Load->getIterator(), SI->getIterator(),
915  MemoryLocation::get(SI), AA))
916  return false;
917 
918  if (ScalarizableIdx.isSafeWithFreeze())
919  ScalarizableIdx.freeze(Builder, *cast<Instruction>(Idx));
920  Value *GEP = Builder.CreateInBoundsGEP(
921  SI->getValueOperand()->getType(), SI->getPointerOperand(),
922  {ConstantInt::get(Idx->getType(), 0), Idx});
923  StoreInst *NSI = Builder.CreateStore(NewElement, GEP);
924  NSI->copyMetadata(*SI);
925  Align ScalarOpAlignment = computeAlignmentAfterScalarization(
926  std::max(SI->getAlign(), Load->getAlign()), NewElement->getType(), Idx,
927  DL);
928  NSI->setAlignment(ScalarOpAlignment);
929  replaceValue(I, *NSI);
930  // Need erasing the store manually.
931  I.eraseFromParent();
932  return true;
933  }
934 
935  return false;
936 }
937 
938 /// Try to scalarize vector loads feeding extractelement instructions.
939 bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
940  Value *Ptr;
941  Value *Idx;
942  if (!match(&I, m_ExtractElt(m_Load(m_Value(Ptr)), m_Value(Idx))))
943  return false;
944 
945  auto *LI = cast<LoadInst>(I.getOperand(0));
946  const DataLayout &DL = I.getModule()->getDataLayout();
947  if (LI->isVolatile() || !DL.typeSizeEqualsStoreSize(LI->getType()))
948  return false;
949 
950  auto *FixedVT = dyn_cast<FixedVectorType>(LI->getType());
951  if (!FixedVT)
952  return false;
953 
954  InstructionCost OriginalCost = TTI.getMemoryOpCost(
955  Instruction::Load, LI->getType(), Align(LI->getAlignment()),
956  LI->getPointerAddressSpace());
957  InstructionCost ScalarizedCost = 0;
958 
959  Instruction *LastCheckedInst = LI;
960  unsigned NumInstChecked = 0;
961  // Check if all users of the load are extracts with no memory modifications
962  // between the load and the extract. Compute the cost of both the original
963  // code and the scalarized version.
964  for (User *U : LI->users()) {
965  auto *UI = dyn_cast<ExtractElementInst>(U);
966  if (!UI || UI->getParent() != LI->getParent())
967  return false;
968 
969  if (!isGuaranteedNotToBePoison(UI->getOperand(1), &AC, LI, &DT))
970  return false;
971 
972  // Check if any instruction between the load and the extract may modify
973  // memory.
974  if (LastCheckedInst->comesBefore(UI)) {
975  for (Instruction &I :
976  make_range(std::next(LI->getIterator()), UI->getIterator())) {
977  // Bail out if we reached the check limit or the instruction may write
978  // to memory.
979  if (NumInstChecked == MaxInstrsToScan || I.mayWriteToMemory())
980  return false;
981  NumInstChecked++;
982  }
983  }
984 
985  if (!LastCheckedInst)
986  LastCheckedInst = UI;
987  else if (LastCheckedInst->comesBefore(UI))
988  LastCheckedInst = UI;
989 
990  auto ScalarIdx = canScalarizeAccess(FixedVT, UI->getOperand(1), &I, AC);
991  if (!ScalarIdx.isSafe()) {
992  // TODO: Freeze index if it is safe to do so.
993  return false;
994  }
995 
996  auto *Index = dyn_cast<ConstantInt>(UI->getOperand(1));
997  OriginalCost +=
998  TTI.getVectorInstrCost(Instruction::ExtractElement, LI->getType(),
999  Index ? Index->getZExtValue() : -1);
1000  ScalarizedCost +=
1001  TTI.getMemoryOpCost(Instruction::Load, FixedVT->getElementType(),
1002  Align(1), LI->getPointerAddressSpace());
1003  ScalarizedCost += TTI.getAddressComputationCost(FixedVT->getElementType());
1004  }
1005 
1006  if (ScalarizedCost >= OriginalCost)
1007  return false;
1008 
1009  // Replace extracts with narrow scalar loads.
1010  for (User *U : LI->users()) {
1011  auto *EI = cast<ExtractElementInst>(U);
1012  Builder.SetInsertPoint(EI);
1013 
1014  Value *Idx = EI->getOperand(1);
1015  Value *GEP =
1016  Builder.CreateInBoundsGEP(FixedVT, Ptr, {Builder.getInt32(0), Idx});
1017  auto *NewLoad = cast<LoadInst>(Builder.CreateLoad(
1018  FixedVT->getElementType(), GEP, EI->getName() + ".scalar"));
1019 
1020  Align ScalarOpAlignment = computeAlignmentAfterScalarization(
1021  LI->getAlign(), FixedVT->getElementType(), Idx, DL);
1022  NewLoad->setAlignment(ScalarOpAlignment);
1023 
1024  replaceValue(*EI, *NewLoad);
1025  }
1026 
1027  return true;
1028 }
1029 
1030 /// This is the entry point for all transforms. Pass manager differences are
1031 /// handled in the callers of this function.
1032 bool VectorCombine::run() {
1034  return false;
1035 
1036  // Don't attempt vectorization if the target does not support vectors.
1037  if (!TTI.getNumberOfRegisters(TTI.getRegisterClassForType(/*Vector*/ true)))
1038  return false;
1039 
1040  bool MadeChange = false;
1041  for (BasicBlock &BB : F) {
1042  // Ignore unreachable basic blocks.
1043  if (!DT.isReachableFromEntry(&BB))
1044  continue;
1045  // Use early increment range so that we can erase instructions in loop.
1046  for (Instruction &I : make_early_inc_range(BB)) {
1047  if (isa<DbgInfoIntrinsic>(I))
1048  continue;
1049  Builder.SetInsertPoint(&I);
1050  MadeChange |= vectorizeLoadInsert(I);
1051  MadeChange |= foldExtractExtract(I);
1052  MadeChange |= foldBitcastShuf(I);
1053  MadeChange |= scalarizeBinopOrCmp(I);
1054  MadeChange |= foldExtractedCmps(I);
1055  MadeChange |= scalarizeLoadExtract(I);
1056  MadeChange |= foldSingleElementStore(I);
1057  }
1058  }
1059 
1060  // We're done with transforms, so remove dead instructions.
1061  if (MadeChange)
1062  for (BasicBlock &BB : F)
1064 
1065  return MadeChange;
1066 }
1067 
1068 // Pass manager boilerplate below here.
1069 
1070 namespace {
1071 class VectorCombineLegacyPass : public FunctionPass {
1072 public:
1073  static char ID;
1074  VectorCombineLegacyPass() : FunctionPass(ID) {
1076  }
1077 
1078  void getAnalysisUsage(AnalysisUsage &AU) const override {
1083  AU.setPreservesCFG();
1089  }
1090 
1091  bool runOnFunction(Function &F) override {
1092  if (skipFunction(F))
1093  return false;
1094  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1095  auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1096  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1097  auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1098  VectorCombine Combiner(F, TTI, DT, AA, AC);
1099  return Combiner.run();
1100  }
1101 };
1102 } // namespace
1103 
1105 INITIALIZE_PASS_BEGIN(VectorCombineLegacyPass, "vector-combine",
1106  "Optimize scalar/vector ops", false,
1107  false)
1110 INITIALIZE_PASS_END(VectorCombineLegacyPass, "vector-combine",
1111  "Optimize scalar/vector ops", false, false)
1113  return new VectorCombineLegacyPass();
1114 }
1115 
1118  auto &AC = FAM.getResult<AssumptionAnalysis>(F);
1121  AAResults &AA = FAM.getResult<AAManager>(F);
1122  VectorCombine Combiner(F, TTI, DT, AA, AC);
1123  if (!Combiner.run())
1124  return PreservedAnalyses::all();
1125  PreservedAnalyses PA;
1126  PA.preserveSet<CFGAnalyses>();
1127  return PA;
1128 }
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
AssumptionCache.h
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1233
llvm::mustSuppressSpeculation
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
Definition: ValueTracking.cpp:4577
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2331
B1
llvm::MemoryLocation::get
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition: MemoryLocation.cpp:37
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::ConstantExpr::getExtractElement
static Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2527
llvm::Value::getPointerAlignment
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:905
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::TargetTransformInfo::getMinVectorRegisterBitWidth
unsigned getMinVectorRegisterBitWidth() const
Definition: TargetTransformInfo.cpp:599
ScalarizationResult::~ScalarizationResult
~ScalarizationResult()
Definition: VectorCombine.cpp:790
llvm::PatternMatch::m_Mask
Definition: PatternMatch.h:1524
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::TargetTransformInfo::getShuffleCost
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask=None, int Index=0, VectorType *SubTp=nullptr) const
Definition: TargetTransformInfo.cpp:726
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:435
llvm::ConstantRange::binaryAnd
ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
Definition: ConstantRange.cpp:1297
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:720
llvm::User::operands
op_range operands()
Definition: User.h:242
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::isSafeToSpeculativelyExecute
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Definition: ValueTracking.cpp:4589
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:228
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:785
Loads.h
llvm::ExtractElementInst
This instruction extracts a single (scalar) element from a VectorType value.
Definition: Instructions.h:1873
llvm::Function
Definition: Function.h:61
Pass.h
llvm::TargetTransformInfo::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:827
ScalarizationResult::isUnsafe
bool isUnsafe() const
Returns true if the index cannot be scalarized.
Definition: VectorCombine.cpp:803
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::PatternMatch::m_InsertElt
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Definition: PatternMatch.h:1494
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
llvm::TargetTransformInfo::getAddressComputationCost
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
Definition: TargetTransformInfo.cpp:889
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::PatternMatch::m_Load
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
Definition: PatternMatch.h:1573
llvm::CmpInst::makeCmpResultType
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:1031
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:734
llvm::IRBuilder<>
ValueTracking.h
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
GlobalsModRef.h
VectorCombine.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::narrowShuffleMaskElts
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
Definition: VectorUtils.cpp:427
llvm::createVectorCombinePass
Pass * createVectorCombinePass()
Definition: VectorCombine.cpp:1112
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::StoreInst::setAlignment
void setAlignment(Align Align)
Definition: Instructions.h:357
llvm::Instruction::comesBefore
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
Definition: Instruction.cpp:111
llvm::PatternMatch::m_BinOp
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::Instruction::copyMetadata
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Definition: Instruction.cpp:829
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(VectorCombineLegacyPass, "vector-combine", "Optimize scalar/vector ops", false, false) INITIALIZE_PASS_END(VectorCombineLegacyPass
InvalidIndex
static const unsigned InvalidIndex
Definition: VectorCombine.cpp:58
llvm::PatternMatch::m_BitCast
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
Definition: PatternMatch.h:1603
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
BasicAliasAnalysis.h
MaxInstrsToScan
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
llvm::FixedVectorType
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:525
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::TargetTransformInfo::SK_PermuteSingleSrc
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
Definition: TargetTransformInfo.h:870
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::UndefMaskElem
constexpr int UndefMaskElem
Definition: Instructions.h:1997
llvm::PatternMatch::m_OneUse
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
CommandLine.h
llvm::FixedVectorType::getNumElements
unsigned getNumElements() const
Definition: DerivedTypes.h:568
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:160
llvm::SimplifyInstructionsInBlock
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition: Local.cpp:701
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1472
llvm::CmpInst::isFPPredicate
bool isFPPredicate() const
Definition: InstrTypes.h:813
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::AAResults
Definition: AliasAnalysis.h:456
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
createShiftShuffle
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilder<> &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
Definition: VectorCombine.cpp:375
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
getOpcode
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:199
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:237
llvm::PatternMatch::m_Instruction
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:748
false
Definition: StackSlotColoring.cpp:142
llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:145
llvm::Instruction
Definition: Instruction.h:45
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:153
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:287
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1771
DisableBinopExtractShuffle
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
llvm::PatternMatch::m_URem
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1105
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
PatternMatch.h
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::NVPTX::PTXLdStInstCode::Scalar
@ Scalar
Definition: NVPTX.h:122
llvm::isModSet
LLVM_NODISCARD bool isModSet(const ModRefInfo MRI)
Definition: AliasAnalysis.h:196
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
ScalarizationResult::isSafeWithFreeze
bool isSafeWithFreeze() const
Returns true if the index can be scalarize, but requires inserting a freeze.
Definition: VectorCombine.cpp:806
llvm::PatternMatch::m_ExtractElt
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
Definition: PatternMatch.h:1502
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:201
llvm::APInt::getOneBitSet
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:224
ScalarizationResult::freeze
void freeze(IRBuilder<> &Builder, Instruction &UserI)
Freeze the ToFreeze and update the use in User to use it.
Definition: VectorCombine.cpp:809
VectorUtils.h
llvm::cl::opt< bool >
llvm::ConstantExpr::getCompare
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:2373
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:304
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::VectorCombinePass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
Definition: VectorCombine.cpp:1116
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
uint64_t
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2387
llvm::Combiner
Definition: Combiner.h:27
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:169
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::BasicAAWrapperPass
Legacy wrapper pass to provide the BasicAAResult object.
Definition: BasicAliasAnalysis.h:244
llvm::TargetTransformInfo::getRegisterClassForType
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
Definition: TargetTransformInfo.cpp:585
DisableVectorCombine
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
I
#define I(x, y, z)
Definition: MD5.cpp:59
replaceValue
static void replaceValue(Value &Old, Value &New)
Definition: VectorCombine.cpp:98
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:572
llvm::PatternMatch::m_And
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1123
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1612
llvm::TargetTransformInfo::getScalarizationOverhead
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract) const
Estimate the overhead of scalarizing an instruction.
Definition: TargetTransformInfo.cpp:480
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
translateExtract
static ExtractElementInst * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilder<> &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
Definition: VectorCombine.cpp:390
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::CmpInst::BAD_ICMP_PREDICATE
@ BAD_ICMP_PREDICATE
Definition: InstrTypes.h:753
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:163
llvm::Instruction::isBinaryOp
bool isBinaryOp() const
Definition: Instruction.h:165
canScalarizeAccess
static ScalarizationResult canScalarizeAccess(FixedVectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
Definition: VectorCombine.cpp:828
llvm::PatternMatch::m_Constant
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:142
llvm::computeConstantRange
ConstantRange computeConstantRange(const Value *V, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
Definition: ValueTracking.cpp:7005
Status
Definition: SIModeRegister.cpp:28
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:650
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:100
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:200
llvm::ArrayRef< int >
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1554
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::PatternMatch::m_Undef
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:136
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::ExtractElementInst::getVectorOperand
Value * getVectorOperand()
Definition: Instructions.h:1902
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:116
llvm::PatternMatch::m_Shuffle
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
Definition: PatternMatch.h:1561
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:520
ScalarizationResult::isSafe
bool isSafe() const
Returns true if the index can be scalarize without requiring a freeze.
Definition: VectorCombine.cpp:801
llvm::initializeVectorCombineLegacyPassPass
void initializeVectorCombineLegacyPassPass(PassRegistry &)
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::TargetTransformInfo::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy=nullptr, CmpInst::Predicate VecPred=CmpInst::BAD_ICMP_PREDICATE, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:808
llvm::TargetTransformInfo::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned ClassID) const
Definition: TargetTransformInfo.cpp:581
llvm::ConstantVector::get
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1369
llvm::InstructionCost::isValid
bool isValid() const
Definition: InstructionCost.h:79
llvm::Instruction::mayReadFromMemory
bool mayReadFromMemory() const
Return true if this instruction may read memory.
Definition: Instruction.cpp:564
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:297
llvm::Value::stripAndAccumulateInBoundsConstantOffsets
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition: Value.h:716
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::widenShuffleMaskElts
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Definition: VectorUtils.cpp:448
llvm::commonAlignment
Align commonAlignment(Align A, Align B)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:211
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:936
ScalarizationResult::unsafe
static ScalarizationResult unsafe()
Definition: VectorCombine.cpp:794
ScalarizationResult
Helper class to indicate whether a vector index can be safely scalarized and if a freeze needs to be ...
Definition: VectorCombine.cpp:779
ops
vector Optimize scalar vector ops
Definition: VectorCombine.cpp:1111
get
Should compile to something r4 addze r3 instead we get
Definition: README.txt:24
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
Function.h
llvm::Value::hasNUses
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition: Value.cpp:149
ScalarizationResult::safeWithFreeze
static ScalarizationResult safeWithFreeze(Value *ToFreeze)
Definition: VectorCombine.cpp:796
ScalarizationResult::safe
static ScalarizationResult safe()
Definition: VectorCombine.cpp:795
llvm::PatternMatch::m_ZeroInt
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:522
llvm::ConstantRange::contains
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
Definition: ConstantRange.cpp:393
llvm::TargetTransformInfo::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index=-1) const
Definition: TargetTransformInfo.cpp:819
llvm::ConstantRange
This class represents a range of values.
Definition: ConstantRange.h:47
llvm::ConstantRange::urem
ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
Definition: ConstantRange.cpp:1219
llvm::Instruction::BinaryOps
BinaryOps
Definition: Instruction.h:785
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:191
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:802
Dominators.h
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1281
llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition: GlobalsModRef.h:143
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
TargetTransformInfo.h
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::ExtractElementInst::getIndexOperand
Value * getIndexOperand()
Definition: Instructions.h:1903
Vectorize.h
llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:93
llvm::isSafeToLoadUnconditionally
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, APInt &Size, const DataLayout &DL, Instruction *ScanFrom=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
Definition: Loads.cpp:335
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::isGuaranteedNotToBePoison
bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Definition: ValueTracking.cpp:5248
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::TargetTransformInfo::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
Definition: TargetTransformInfo.cpp:714
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:414
combine
vector combine
Definition: VectorCombine.cpp:1110
llvm::PatternMatch::m_Cmp
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:89
InitializePasses.h
computeAlignmentAfterScalarization
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
Definition: VectorCombine.cpp:868
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:632
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:422
llvm::AAResults::getModRefInfo
ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
Definition: AliasAnalysis.cpp:218
llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:209
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1172
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:128
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
isMemModifiedBetween
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
Definition: VectorCombine.cpp:767