LLVM  7.0.0svn
TargetTransformInfo.cpp
Go to the documentation of this file.
1 //===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
12 #include "llvm/IR/CallSite.h"
13 #include "llvm/IR/DataLayout.h"
14 #include "llvm/IR/Instruction.h"
15 #include "llvm/IR/Instructions.h"
16 #include "llvm/IR/IntrinsicInst.h"
17 #include "llvm/IR/Module.h"
18 #include "llvm/IR/Operator.h"
19 #include "llvm/IR/PatternMatch.h"
22 #include <utility>
23 
24 using namespace llvm;
25 using namespace PatternMatch;
26 
27 #define DEBUG_TYPE "tti"
28 
29 static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
30  cl::Hidden,
31  cl::desc("Recognize reduction patterns."));
32 
33 namespace {
34 /// \brief No-op implementation of the TTI interface using the utility base
35 /// classes.
36 ///
37 /// This is used when no target specific information is available.
38 struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> {
39  explicit NoTTIImpl(const DataLayout &DL)
40  : TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {}
41 };
42 }
43 
45  : TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {}
46 
48 
50  : TTIImpl(std::move(Arg.TTIImpl)) {}
51 
53  TTIImpl = std::move(RHS.TTIImpl);
54  return *this;
55 }
56 
58  Type *OpTy) const {
59  int Cost = TTIImpl->getOperationCost(Opcode, Ty, OpTy);
60  assert(Cost >= 0 && "TTI should not produce negative costs!");
61  return Cost;
62 }
63 
64 int TargetTransformInfo::getCallCost(FunctionType *FTy, int NumArgs) const {
65  int Cost = TTIImpl->getCallCost(FTy, NumArgs);
66  assert(Cost >= 0 && "TTI should not produce negative costs!");
67  return Cost;
68 }
69 
72  int Cost = TTIImpl->getCallCost(F, Arguments);
73  assert(Cost >= 0 && "TTI should not produce negative costs!");
74  return Cost;
75 }
76 
78  return TTIImpl->getInliningThresholdMultiplier();
79 }
80 
81 int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
82  ArrayRef<const Value *> Operands) const {
83  return TTIImpl->getGEPCost(PointeeType, Ptr, Operands);
84 }
85 
87  const Value *Src) const {
88  return TTIImpl->getExtCost(I, Src);
89 }
90 
93  int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments);
94  assert(Cost >= 0 && "TTI should not produce negative costs!");
95  return Cost;
96 }
97 
98 unsigned
100  unsigned &JTSize) const {
101  return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize);
102 }
103 
105  ArrayRef<const Value *> Operands) const {
106  int Cost = TTIImpl->getUserCost(U, Operands);
107  assert(Cost >= 0 && "TTI should not produce negative costs!");
108  return Cost;
109 }
110 
112  return TTIImpl->hasBranchDivergence();
113 }
114 
116  return TTIImpl->isSourceOfDivergence(V);
117 }
118 
120  return TTIImpl->isAlwaysUniform(V);
121 }
122 
124  return TTIImpl->getFlatAddressSpace();
125 }
126 
128  return TTIImpl->isLoweredToCall(F);
129 }
130 
132  Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const {
133  return TTIImpl->getUnrollingPreferences(L, SE, UP);
134 }
135 
137  return TTIImpl->isLegalAddImmediate(Imm);
138 }
139 
141  return TTIImpl->isLegalICmpImmediate(Imm);
142 }
143 
145  int64_t BaseOffset,
146  bool HasBaseReg,
147  int64_t Scale,
148  unsigned AddrSpace,
149  Instruction *I) const {
150  return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
151  Scale, AddrSpace, I);
152 }
153 
155  return TTIImpl->isLSRCostLess(C1, C2);
156 }
157 
159  return TTIImpl->isLegalMaskedStore(DataType);
160 }
161 
163  return TTIImpl->isLegalMaskedLoad(DataType);
164 }
165 
167  return TTIImpl->isLegalMaskedGather(DataType);
168 }
169 
171  return TTIImpl->isLegalMaskedScatter(DataType);
172 }
173 
174 bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
175  return TTIImpl->hasDivRemOp(DataType, IsSigned);
176 }
177 
179  unsigned AddrSpace) const {
180  return TTIImpl->hasVolatileVariant(I, AddrSpace);
181 }
182 
184  return TTIImpl->prefersVectorizedAddressing();
185 }
186 
188  int64_t BaseOffset,
189  bool HasBaseReg,
190  int64_t Scale,
191  unsigned AddrSpace) const {
192  int Cost = TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
193  Scale, AddrSpace);
194  assert(Cost >= 0 && "TTI should not produce negative costs!");
195  return Cost;
196 }
197 
199  return TTIImpl->LSRWithInstrQueries();
200 }
201 
203  return TTIImpl->isTruncateFree(Ty1, Ty2);
204 }
205 
207  return TTIImpl->isProfitableToHoist(I);
208 }
209 
211  return TTIImpl->isTypeLegal(Ty);
212 }
213 
215  return TTIImpl->getJumpBufAlignment();
216 }
217 
219  return TTIImpl->getJumpBufSize();
220 }
221 
223  return TTIImpl->shouldBuildLookupTables();
224 }
226  return TTIImpl->shouldBuildLookupTablesForConstant(C);
227 }
228 
229 unsigned TargetTransformInfo::
230 getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const {
231  return TTIImpl->getScalarizationOverhead(Ty, Insert, Extract);
232 }
233 
234 unsigned TargetTransformInfo::
236  unsigned VF) const {
237  return TTIImpl->getOperandsScalarizationOverhead(Args, VF);
238 }
239 
241  return TTIImpl->supportsEfficientVectorElementLoadStore();
242 }
243 
244 bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const {
245  return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
246 }
247 
250  return TTIImpl->enableMemCmpExpansion(IsZeroCmp);
251 }
252 
254  return TTIImpl->enableInterleavedAccessVectorization();
255 }
256 
258  return TTIImpl->isFPVectorizationPotentiallyUnsafe();
259 }
260 
262  unsigned BitWidth,
263  unsigned AddressSpace,
264  unsigned Alignment,
265  bool *Fast) const {
266  return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
267  Alignment, Fast);
268 }
269 
271 TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
272  return TTIImpl->getPopcntSupport(IntTyWidthInBit);
273 }
274 
276  return TTIImpl->haveFastSqrt(Ty);
277 }
278 
280  return TTIImpl->isFCmpOrdCheaperThanFCmpZero(Ty);
281 }
282 
284  int Cost = TTIImpl->getFPOpCost(Ty);
285  assert(Cost >= 0 && "TTI should not produce negative costs!");
286  return Cost;
287 }
288 
289 int TargetTransformInfo::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
290  const APInt &Imm,
291  Type *Ty) const {
292  int Cost = TTIImpl->getIntImmCodeSizeCost(Opcode, Idx, Imm, Ty);
293  assert(Cost >= 0 && "TTI should not produce negative costs!");
294  return Cost;
295 }
296 
297 int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
298  int Cost = TTIImpl->getIntImmCost(Imm, Ty);
299  assert(Cost >= 0 && "TTI should not produce negative costs!");
300  return Cost;
301 }
302 
303 int TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx,
304  const APInt &Imm, Type *Ty) const {
305  int Cost = TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty);
306  assert(Cost >= 0 && "TTI should not produce negative costs!");
307  return Cost;
308 }
309 
311  const APInt &Imm, Type *Ty) const {
312  int Cost = TTIImpl->getIntImmCost(IID, Idx, Imm, Ty);
313  assert(Cost >= 0 && "TTI should not produce negative costs!");
314  return Cost;
315 }
316 
317 unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
318  return TTIImpl->getNumberOfRegisters(Vector);
319 }
320 
321 unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
322  return TTIImpl->getRegisterBitWidth(Vector);
323 }
324 
326  return TTIImpl->getMinVectorRegisterBitWidth();
327 }
328 
330  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
331  return TTIImpl->shouldConsiderAddressTypePromotion(
332  I, AllowPromotionWithoutCommonHeader);
333 }
334 
336  return TTIImpl->getCacheLineSize();
337 }
338 
340  const {
341  return TTIImpl->getCacheSize(Level);
342 }
343 
345  CacheLevel Level) const {
346  return TTIImpl->getCacheAssociativity(Level);
347 }
348 
350  return TTIImpl->getPrefetchDistance();
351 }
352 
354  return TTIImpl->getMinPrefetchStride();
355 }
356 
358  return TTIImpl->getMaxPrefetchIterationsAhead();
359 }
360 
361 unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
362  return TTIImpl->getMaxInterleaveFactor(VF);
363 }
364 
366  unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
367  OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
368  OperandValueProperties Opd2PropInfo,
370  int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
371  Opd1PropInfo, Opd2PropInfo, Args);
372  assert(Cost >= 0 && "TTI should not produce negative costs!");
373  return Cost;
374 }
375 
377  Type *SubTp) const {
378  int Cost = TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp);
379  assert(Cost >= 0 && "TTI should not produce negative costs!");
380  return Cost;
381 }
382 
383 int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst,
384  Type *Src, const Instruction *I) const {
385  assert ((I == nullptr || I->getOpcode() == Opcode) &&
386  "Opcode should reflect passed instruction.");
387  int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, I);
388  assert(Cost >= 0 && "TTI should not produce negative costs!");
389  return Cost;
390 }
391 
393  VectorType *VecTy,
394  unsigned Index) const {
395  int Cost = TTIImpl->getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
396  assert(Cost >= 0 && "TTI should not produce negative costs!");
397  return Cost;
398 }
399 
400 int TargetTransformInfo::getCFInstrCost(unsigned Opcode) const {
401  int Cost = TTIImpl->getCFInstrCost(Opcode);
402  assert(Cost >= 0 && "TTI should not produce negative costs!");
403  return Cost;
404 }
405 
406 int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
407  Type *CondTy, const Instruction *I) const {
408  assert ((I == nullptr || I->getOpcode() == Opcode) &&
409  "Opcode should reflect passed instruction.");
410  int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
411  assert(Cost >= 0 && "TTI should not produce negative costs!");
412  return Cost;
413 }
414 
416  unsigned Index) const {
417  int Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index);
418  assert(Cost >= 0 && "TTI should not produce negative costs!");
419  return Cost;
420 }
421 
422 int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
423  unsigned Alignment,
424  unsigned AddressSpace,
425  const Instruction *I) const {
426  assert ((I == nullptr || I->getOpcode() == Opcode) &&
427  "Opcode should reflect passed instruction.");
428  int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
429  assert(Cost >= 0 && "TTI should not produce negative costs!");
430  return Cost;
431 }
432 
434  unsigned Alignment,
435  unsigned AddressSpace) const {
436  int Cost =
437  TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
438  assert(Cost >= 0 && "TTI should not produce negative costs!");
439  return Cost;
440 }
441 
443  Value *Ptr, bool VariableMask,
444  unsigned Alignment) const {
445  int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
446  Alignment);
447  assert(Cost >= 0 && "TTI should not produce negative costs!");
448  return Cost;
449 }
450 
452  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
453  unsigned Alignment, unsigned AddressSpace) const {
454  int Cost = TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
455  Alignment, AddressSpace);
456  assert(Cost >= 0 && "TTI should not produce negative costs!");
457  return Cost;
458 }
459 
462  unsigned ScalarizationCostPassed) const {
463  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
464  ScalarizationCostPassed);
465  assert(Cost >= 0 && "TTI should not produce negative costs!");
466  return Cost;
467 }
468 
470  ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) const {
471  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
472  assert(Cost >= 0 && "TTI should not produce negative costs!");
473  return Cost;
474 }
475 
477  ArrayRef<Type *> Tys) const {
478  int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys);
479  assert(Cost >= 0 && "TTI should not produce negative costs!");
480  return Cost;
481 }
482 
484  return TTIImpl->getNumberOfParts(Tp);
485 }
486 
488  ScalarEvolution *SE,
489  const SCEV *Ptr) const {
490  int Cost = TTIImpl->getAddressComputationCost(Tp, SE, Ptr);
491  assert(Cost >= 0 && "TTI should not produce negative costs!");
492  return Cost;
493 }
494 
496  bool IsPairwiseForm) const {
497  int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
498  assert(Cost >= 0 && "TTI should not produce negative costs!");
499  return Cost;
500 }
501 
503  bool IsPairwiseForm,
504  bool IsUnsigned) const {
505  int Cost =
506  TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
507  assert(Cost >= 0 && "TTI should not produce negative costs!");
508  return Cost;
509 }
510 
511 unsigned
513  return TTIImpl->getCostOfKeepingLiveOverCall(Tys);
514 }
515 
517  MemIntrinsicInfo &Info) const {
518  return TTIImpl->getTgtMemIntrinsic(Inst, Info);
519 }
520 
522  return TTIImpl->getAtomicMemIntrinsicMaxElementSize();
523 }
524 
526  IntrinsicInst *Inst, Type *ExpectedType) const {
527  return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
528 }
529 
531  Value *Length,
532  unsigned SrcAlign,
533  unsigned DestAlign) const {
534  return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAlign,
535  DestAlign);
536 }
537 
540  unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const {
541  TTIImpl->getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
542  SrcAlign, DestAlign);
543 }
544 
546  const Function *Callee) const {
547  return TTIImpl->areInlineCompatible(Caller, Callee);
548 }
549 
551  return TTIImpl->getLoadStoreVecRegBitWidth(AS);
552 }
553 
555  return TTIImpl->isLegalToVectorizeLoad(LI);
556 }
557 
559  return TTIImpl->isLegalToVectorizeStore(SI);
560 }
561 
563  unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
564  return TTIImpl->isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
565  AddrSpace);
566 }
567 
569  unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
570  return TTIImpl->isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
571  AddrSpace);
572 }
573 
575  unsigned LoadSize,
576  unsigned ChainSizeInBytes,
577  VectorType *VecTy) const {
578  return TTIImpl->getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
579 }
580 
582  unsigned StoreSize,
583  unsigned ChainSizeInBytes,
584  VectorType *VecTy) const {
585  return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
586 }
587 
589  Type *Ty, ReductionFlags Flags) const {
590  return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags);
591 }
592 
594  return TTIImpl->shouldExpandReduction(II);
595 }
596 
597 int TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
598  return TTIImpl->getInstructionLatency(I);
599 }
600 
602  for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i)
603  if (Mask[i] >= 0 && Mask[i] != (int)(MaskSize - 1 - i))
604  return false;
605  return true;
606 }
607 
609  bool Vec0 = false;
610  bool Vec1 = false;
611  for (unsigned i = 0, NumVecElts = Mask.size(); i < NumVecElts; ++i) {
612  if (Mask[i] >= 0) {
613  if ((unsigned)Mask[i] >= NumVecElts)
614  Vec1 = true;
615  else
616  Vec0 = true;
617  }
618  }
619  return !(Vec0 && Vec1);
620 }
621 
623  for (unsigned i = 0; i < Mask.size(); ++i)
624  if (Mask[i] > 0)
625  return false;
626  return true;
627 }
628 
630  bool isAlternate = true;
631  unsigned MaskSize = Mask.size();
632 
633  // Example: shufflevector A, B, <0,5,2,7>
634  for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
635  if (Mask[i] < 0)
636  continue;
637  isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i);
638  }
639 
640  if (isAlternate)
641  return true;
642 
643  isAlternate = true;
644  // Example: shufflevector A, B, <4,1,6,3>
645  for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
646  if (Mask[i] < 0)
647  continue;
648  isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i);
649  }
650 
651  return isAlternate;
652 }
653 
657 
658  // Check for a splat of a constant or for a non uniform vector of constants.
659  if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
661  if (cast<Constant>(V)->getSplatValue() != nullptr)
663  }
664 
665  // Check for a splat of a uniform value. This is not loop aware, so return
666  // true only for the obviously uniform cases (argument, globalvalue)
667  const Value *Splat = getSplatValue(V);
668  if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
670 
671  return OpInfo;
672 }
673 
675  unsigned Level) {
676  // We don't need a shuffle if we just want to have element 0 in position 0 of
677  // the vector.
678  if (!SI && Level == 0 && IsLeft)
679  return true;
680  else if (!SI)
681  return false;
682 
684 
685  // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
686  // we look at the left or right side.
687  for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2)
688  Mask[i] = val;
689 
690  SmallVector<int, 16> ActualMask = SI->getShuffleMask();
691  return Mask == ActualMask;
692 }
693 
694 namespace {
695 /// Kind of the reduction data.
697  RK_None, /// Not a reduction.
698  RK_Arithmetic, /// Binary reduction data.
699  RK_MinMax, /// Min/max reduction data.
700  RK_UnsignedMinMax, /// Unsigned min/max reduction data.
701 };
702 /// Contains opcode + LHS/RHS parts of the reduction operations.
703 struct ReductionData {
704  ReductionData() = delete;
705  ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS)
706  : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) {
707  assert(Kind != RK_None && "expected binary or min/max reduction only.");
708  }
709  unsigned Opcode = 0;
710  Value *LHS = nullptr;
711  Value *RHS = nullptr;
712  ReductionKind Kind = RK_None;
713  bool hasSameData(ReductionData &RD) const {
714  return Kind == RD.Kind && Opcode == RD.Opcode;
715  }
716 };
717 } // namespace
718 
720  Value *L, *R;
721  if (m_BinOp(m_Value(L), m_Value(R)).match(I))
722  return ReductionData(RK_Arithmetic, I->getOpcode(), L, R);
723  if (auto *SI = dyn_cast<SelectInst>(I)) {
724  if (m_SMin(m_Value(L), m_Value(R)).match(SI) ||
725  m_SMax(m_Value(L), m_Value(R)).match(SI) ||
726  m_OrdFMin(m_Value(L), m_Value(R)).match(SI) ||
727  m_OrdFMax(m_Value(L), m_Value(R)).match(SI) ||
728  m_UnordFMin(m_Value(L), m_Value(R)).match(SI) ||
729  m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) {
730  auto *CI = cast<CmpInst>(SI->getCondition());
731  return ReductionData(RK_MinMax, CI->getOpcode(), L, R);
732  }
733  if (m_UMin(m_Value(L), m_Value(R)).match(SI) ||
734  m_UMax(m_Value(L), m_Value(R)).match(SI)) {
735  auto *CI = cast<CmpInst>(SI->getCondition());
736  return ReductionData(RK_UnsignedMinMax, CI->getOpcode(), L, R);
737  }
738  }
739  return llvm::None;
740 }
741 
743  unsigned Level,
744  unsigned NumLevels) {
745  // Match one level of pairwise operations.
746  // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
747  // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
748  // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
749  // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
750  // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
751  if (!I)
752  return RK_None;
753 
754  assert(I->getType()->isVectorTy() && "Expecting a vector type");
755 
757  if (!RD)
758  return RK_None;
759 
761  if (!LS && Level)
762  return RK_None;
764  if (!RS && Level)
765  return RK_None;
766 
767  // On level 0 we can omit one shufflevector instruction.
768  if (!Level && !RS && !LS)
769  return RK_None;
770 
771  // Shuffle inputs must match.
772  Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr;
773  Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr;
774  Value *NextLevelOp = nullptr;
775  if (NextLevelOpR && NextLevelOpL) {
776  // If we have two shuffles their operands must match.
777  if (NextLevelOpL != NextLevelOpR)
778  return RK_None;
779 
780  NextLevelOp = NextLevelOpL;
781  } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) {
782  // On the first level we can omit the shufflevector <0, undef,...>. So the
783  // input to the other shufflevector <1, undef> must match with one of the
784  // inputs to the current binary operation.
785  // Example:
786  // %NextLevelOpL = shufflevector %R, <1, undef ...>
787  // %BinOp = fadd %NextLevelOpL, %R
788  if (NextLevelOpL && NextLevelOpL != RD->RHS)
789  return RK_None;
790  else if (NextLevelOpR && NextLevelOpR != RD->LHS)
791  return RK_None;
792 
793  NextLevelOp = NextLevelOpL ? RD->RHS : RD->LHS;
794  } else
795  return RK_None;
796 
797  // Check that the next levels binary operation exists and matches with the
798  // current one.
799  if (Level + 1 != NumLevels) {
800  Optional<ReductionData> NextLevelRD =
801  getReductionData(cast<Instruction>(NextLevelOp));
802  if (!NextLevelRD || !RD->hasSameData(*NextLevelRD))
803  return RK_None;
804  }
805 
806  // Shuffle mask for pairwise operation must match.
807  if (matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)) {
808  if (!matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level))
809  return RK_None;
810  } else if (matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)) {
811  if (!matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level))
812  return RK_None;
813  } else {
814  return RK_None;
815  }
816 
817  if (++Level == NumLevels)
818  return RD->Kind;
819 
820  // Match next level.
821  return matchPairwiseReductionAtLevel(cast<Instruction>(NextLevelOp), Level,
822  NumLevels);
823 }
824 
826  unsigned &Opcode, Type *&Ty) {
827  if (!EnableReduxCost)
828  return RK_None;
829 
830  // Need to extract the first element.
831  ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
832  unsigned Idx = ~0u;
833  if (CI)
834  Idx = CI->getZExtValue();
835  if (Idx != 0)
836  return RK_None;
837 
838  auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
839  if (!RdxStart)
840  return RK_None;
842  if (!RD)
843  return RK_None;
844 
845  Type *VecTy = RdxStart->getType();
846  unsigned NumVecElems = VecTy->getVectorNumElements();
847  if (!isPowerOf2_32(NumVecElems))
848  return RK_None;
849 
850  // We look for a sequence of shuffle,shuffle,add triples like the following
851  // that builds a pairwise reduction tree.
852  //
853  // (X0, X1, X2, X3)
854  // (X0 + X1, X2 + X3, undef, undef)
855  // ((X0 + X1) + (X2 + X3), undef, undef, undef)
856  //
857  // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
858  // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
859  // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
860  // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
861  // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
862  // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
863  // <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
864  // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
865  // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
866  // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
867  // %r = extractelement <4 x float> %bin.rdx8, i32 0
868  if (matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) ==
869  RK_None)
870  return RK_None;
871 
872  Opcode = RD->Opcode;
873  Ty = VecTy;
874 
875  return RD->Kind;
876 }
877 
878 static std::pair<Value *, ShuffleVectorInst *>
880  ShuffleVectorInst *S = nullptr;
881 
882  if ((S = dyn_cast<ShuffleVectorInst>(L)))
883  return std::make_pair(R, S);
884 
885  S = dyn_cast<ShuffleVectorInst>(R);
886  return std::make_pair(L, S);
887 }
888 
889 static ReductionKind
891  unsigned &Opcode, Type *&Ty) {
892  if (!EnableReduxCost)
893  return RK_None;
894 
895  // Need to extract the first element.
896  ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
897  unsigned Idx = ~0u;
898  if (CI)
899  Idx = CI->getZExtValue();
900  if (Idx != 0)
901  return RK_None;
902 
903  auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
904  if (!RdxStart)
905  return RK_None;
907  if (!RD)
908  return RK_None;
909 
910  Type *VecTy = ReduxRoot->getOperand(0)->getType();
911  unsigned NumVecElems = VecTy->getVectorNumElements();
912  if (!isPowerOf2_32(NumVecElems))
913  return RK_None;
914 
915  // We look for a sequence of shuffles and adds like the following matching one
916  // fadd, shuffle vector pair at a time.
917  //
918  // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
919  // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
920  // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
921  // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
922  // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
923  // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
924  // %r = extractelement <4 x float> %bin.rdx8, i32 0
925 
926  unsigned MaskStart = 1;
927  Instruction *RdxOp = RdxStart;
928  SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
929  unsigned NumVecElemsRemain = NumVecElems;
930  while (NumVecElemsRemain - 1) {
931  // Check for the right reduction operation.
932  if (!RdxOp)
933  return RK_None;
934  Optional<ReductionData> RDLevel = getReductionData(RdxOp);
935  if (!RDLevel || !RDLevel->hasSameData(*RD))
936  return RK_None;
937 
938  Value *NextRdxOp;
939  ShuffleVectorInst *Shuffle;
940  std::tie(NextRdxOp, Shuffle) =
941  getShuffleAndOtherOprd(RDLevel->LHS, RDLevel->RHS);
942 
943  // Check the current reduction operation and the shuffle use the same value.
944  if (Shuffle == nullptr)
945  return RK_None;
946  if (Shuffle->getOperand(0) != NextRdxOp)
947  return RK_None;
948 
949  // Check that shuffle masks matches.
950  for (unsigned j = 0; j != MaskStart; ++j)
951  ShuffleMask[j] = MaskStart + j;
952  // Fill the rest of the mask with -1 for undef.
953  std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);
954 
956  if (ShuffleMask != Mask)
957  return RK_None;
958 
959  RdxOp = dyn_cast<Instruction>(NextRdxOp);
960  NumVecElemsRemain /= 2;
961  MaskStart *= 2;
962  }
963 
964  Opcode = RD->Opcode;
965  Ty = VecTy;
966  return RD->Kind;
967 }
968 
969 int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
970  switch (I->getOpcode()) {
971  case Instruction::GetElementPtr:
972  return getUserCost(I);
973 
974  case Instruction::Ret:
975  case Instruction::PHI:
976  case Instruction::Br: {
977  return getCFInstrCost(I->getOpcode());
978  }
979  case Instruction::Add:
980  case Instruction::FAdd:
981  case Instruction::Sub:
982  case Instruction::FSub:
983  case Instruction::Mul:
984  case Instruction::FMul:
985  case Instruction::UDiv:
986  case Instruction::SDiv:
987  case Instruction::FDiv:
988  case Instruction::URem:
989  case Instruction::SRem:
990  case Instruction::FRem:
991  case Instruction::Shl:
992  case Instruction::LShr:
993  case Instruction::AShr:
994  case Instruction::And:
995  case Instruction::Or:
996  case Instruction::Xor: {
998  getOperandInfo(I->getOperand(0));
1000  getOperandInfo(I->getOperand(1));
1002  return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK,
1005  Operands);
1006  }
1007  case Instruction::Select: {
1008  const SelectInst *SI = cast<SelectInst>(I);
1009  Type *CondTy = SI->getCondition()->getType();
1010  return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I);
1011  }
1012  case Instruction::ICmp:
1013  case Instruction::FCmp: {
1014  Type *ValTy = I->getOperand(0)->getType();
1015  return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I);
1016  }
1017  case Instruction::Store: {
1018  const StoreInst *SI = cast<StoreInst>(I);
1019  Type *ValTy = SI->getValueOperand()->getType();
1020  return getMemoryOpCost(I->getOpcode(), ValTy,
1021  SI->getAlignment(),
1022  SI->getPointerAddressSpace(), I);
1023  }
1024  case Instruction::Load: {
1025  const LoadInst *LI = cast<LoadInst>(I);
1026  return getMemoryOpCost(I->getOpcode(), I->getType(),
1027  LI->getAlignment(),
1028  LI->getPointerAddressSpace(), I);
1029  }
1030  case Instruction::ZExt:
1031  case Instruction::SExt:
1032  case Instruction::FPToUI:
1033  case Instruction::FPToSI:
1034  case Instruction::FPExt:
1035  case Instruction::PtrToInt:
1036  case Instruction::IntToPtr:
1037  case Instruction::SIToFP:
1038  case Instruction::UIToFP:
1039  case Instruction::Trunc:
1040  case Instruction::FPTrunc:
1041  case Instruction::BitCast:
1042  case Instruction::AddrSpaceCast: {
1043  Type *SrcTy = I->getOperand(0)->getType();
1044  return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I);
1045  }
1046  case Instruction::ExtractElement: {
1047  const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
1049  unsigned Idx = -1;
1050  if (CI)
1051  Idx = CI->getZExtValue();
1052 
1053  // Try to match a reduction sequence (series of shufflevector and vector
1054  // adds followed by a extractelement).
1055  unsigned ReduxOpCode;
1056  Type *ReduxType;
1057 
1058  switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) {
1059  case RK_Arithmetic:
1060  return getArithmeticReductionCost(ReduxOpCode, ReduxType,
1061  /*IsPairwiseForm=*/false);
1062  case RK_MinMax:
1063  return getMinMaxReductionCost(
1064  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1065  /*IsPairwiseForm=*/false, /*IsUnsigned=*/false);
1066  case RK_UnsignedMinMax:
1067  return getMinMaxReductionCost(
1068  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1069  /*IsPairwiseForm=*/false, /*IsUnsigned=*/true);
1070  case RK_None:
1071  break;
1072  }
1073 
1074  switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) {
1075  case RK_Arithmetic:
1076  return getArithmeticReductionCost(ReduxOpCode, ReduxType,
1077  /*IsPairwiseForm=*/true);
1078  case RK_MinMax:
1079  return getMinMaxReductionCost(
1080  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1081  /*IsPairwiseForm=*/true, /*IsUnsigned=*/false);
1082  case RK_UnsignedMinMax:
1083  return getMinMaxReductionCost(
1084  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1085  /*IsPairwiseForm=*/true, /*IsUnsigned=*/true);
1086  case RK_None:
1087  break;
1088  }
1089 
1090  return getVectorInstrCost(I->getOpcode(),
1091  EEI->getOperand(0)->getType(), Idx);
1092  }
1093  case Instruction::InsertElement: {
1094  const InsertElementInst * IE = cast<InsertElementInst>(I);
1095  ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
1096  unsigned Idx = -1;
1097  if (CI)
1098  Idx = CI->getZExtValue();
1099  return getVectorInstrCost(I->getOpcode(),
1100  IE->getType(), Idx);
1101  }
1102  case Instruction::ShuffleVector: {
1103  const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
1104  Type *VecTypOp0 = Shuffle->getOperand(0)->getType();
1105  unsigned NumVecElems = VecTypOp0->getVectorNumElements();
1107 
1108  if (NumVecElems == Mask.size()) {
1109  if (isReverseVectorMask(Mask))
1111  0, nullptr);
1112  if (isAlternateVectorMask(Mask))
1114  VecTypOp0, 0, nullptr);
1115 
1116  if (isZeroEltBroadcastVectorMask(Mask))
1118  VecTypOp0, 0, nullptr);
1119 
1120  if (isSingleSourceVectorMask(Mask))
1122  VecTypOp0, 0, nullptr);
1123 
1125  VecTypOp0, 0, nullptr);
1126  }
1127 
1128  return -1;
1129  }
1130  case Instruction::Call:
1131  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1132  SmallVector<Value *, 4> Args(II->arg_operands());
1133 
1134  FastMathFlags FMF;
1135  if (auto *FPMO = dyn_cast<FPMathOperator>(II))
1136  FMF = FPMO->getFastMathFlags();
1137 
1138  return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
1139  Args, FMF);
1140  }
1141  return -1;
1142  default:
1143  // We don't have any information on this instruction.
1144  return -1;
1145  }
1146 }
1147 
1149 
1150 TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
1151 
1153  std::function<Result(const Function &)> TTICallback)
1154  : TTICallback(std::move(TTICallback)) {}
1155 
1158  return TTICallback(F);
1159 }
1160 
1161 AnalysisKey TargetIRAnalysis::Key;
1162 
1163 TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) {
1164  return Result(F.getParent()->getDataLayout());
1165 }
1166 
1167 // Register the basic pass.
1169  "Target Transform Information", false, true)
1171 
1172 void TargetTransformInfoWrapperPass::anchor() {}
1173 
1175  : ImmutablePass(ID) {
1178 }
1179 
1181  TargetIRAnalysis TIRA)
1182  : ImmutablePass(ID), TIRA(std::move(TIRA)) {
1185 }
1186 
1188  FunctionAnalysisManager DummyFAM;
1189  TTI = TIRA.run(F, DummyFAM);
1190  return *TTI;
1191 }
1192 
1193 ImmutablePass *
1195  return new TargetTransformInfoWrapperPass(std::move(TIRA));
1196 }
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
uint64_t CallInst * C
Value * getValueOperand()
Definition: Instructions.h:395
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:72
static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, unsigned Level)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
LLVMContext & Context
static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot, unsigned &Opcode, Type *&Ty)
TargetTransformInfo & operator=(TargetTransformInfo &&RHS)
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:1067
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isFPVectorizationPotentiallyUnsafe() const
Indicate that it is potentially unsafe to automatically vectorize floating-point operations because t...
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
const Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
static bool isAlternateVectorMask(ArrayRef< int > Mask)
bool supportsEfficientVectorElementLoadStore() const
If target has efficient vector element load/store instructions, it can return true here so that inser...
AMDGPU Rewrite Out Arguments
static ReductionKind matchPairwiseReductionAtLevel(Instruction *I, unsigned Level, unsigned NumLevels)
The main scalar evolution driver.
TargetTransformInfoWrapperPass()
We must provide a default constructor for the pass but it should never be used.
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value *> Operands) const
Estimate the cost of a GEP operation when lowered.
unsigned getNumberOfRegisters(bool Vector) const
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
Analysis pass providing the TargetTransformInfo.
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAlign, unsigned DestAlign) const
This instruction constructs a fixed permutation of two input vectors.
int getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type *> Tys) const
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace) const
F(f)
An instruction for reading from memory.
Definition: Instructions.h:164
MaxMin_match< FCmpInst, LHS, RHS, ufmax_pred_ty > m_UnordFMax(const LHS &L, const RHS &R)
Match an &#39;unordered&#39; floating point maximum function.
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
bool enableAggressiveInterleaving(bool LoopHasReductions) const
Don&#39;t restrict interleaved unrolling to small loops.
llvm::Optional< unsigned > getCacheAssociativity(CacheLevel Level) const
This file provides helpers for the implementation of a TargetTransformInfo-conforming class...
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
unsigned getJumpBufSize() const
Returns the target&#39;s jmp_buf size in bytes.
unsigned getMaxInterleaveFactor(unsigned VF) const
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Definition: BitVector.h:920
int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy=nullptr) const
Estimate the cost of a specific operation when lowered.
bool hasBranchDivergence() const
Return true if branch divergence exists.
This class represents the LLVM &#39;select&#39; instruction.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize) const
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr) const
static AnalysisKey * ID()
Returns an opaque, unique ID for this analysis type.
Definition: PassManager.h:398
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
MaxMin_match< FCmpInst, LHS, RHS, ufmin_pred_ty > m_UnordFMin(const LHS &L, const RHS &R)
Match an &#39;unordered&#39; floating point minimum function.
bool isLegalMaskedScatter(Type *DataType) const
Return true if the target supports masked gather/scatter AVX-512 fully supports gather and scatter fo...
Choose alternate elements from vector.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:404
Class to represent function types.
Definition: DerivedTypes.h:103
unsigned getMinVectorRegisterBitWidth() const
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it&#39;s free to truncate a value of type Ty1 to type Ty2.
PopcntSupportKind
Flags indicating the kind of support for population count.
bool hasDivRemOp(Type *DataType, bool IsSigned) const
Return true if the target has a unified operation to calculate division and remainder.
MaxMin_match< FCmpInst, LHS, RHS, ofmin_pred_ty > m_OrdFMin(const LHS &L, const RHS &R)
Match an &#39;ordered&#39; floating point minimum function.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef< Value *> Args, FastMathFlags FMF, unsigned VF=1) const
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
static bool isSingleSourceVectorMask(ArrayRef< int > Mask)
An instruction for storing to memory.
Definition: Instructions.h:306
void initializeTargetTransformInfoWrapperPassPass(PassRegistry &)
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
Return true if the given instruction (assumed to be a memory access instruction) has a volatile varia...
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index=-1) const
bool isLegalToVectorizeLoad(LoadInst *LI) const
Reverse the order of the vector.
VectorType * getType() const
Overload to return most specific vector type.
amdgpu Simplify well known AMD library false Value * Callee
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
unsigned getJumpBufAlignment() const
Returns the target&#39;s jmp_buf alignment in bytes.
Value * getOperand(unsigned i) const
Definition: User.h:154
int getExtCost(const Instruction *I, const Value *Src) const
Estimate the cost of a EXT operation when lowered.
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
static cl::opt< bool > EnableReduxCost("costmodel-reduxcost", cl::init(false), cl::Hidden, cl::desc("Recognize reduction patterns."))
unsigned getMaxPrefetchIterationsAhead() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
This instruction inserts a single (scalar) element into a VectorType value.
static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V)
int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment) const
If not nullptr, enable inline expansion of memcmp.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
Wrapper pass for TargetTransformInfo.
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
bool isLegalToVectorizeStore(StoreInst *SI) const
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:421
unsigned getRegisterBitWidth(bool Vector) const
Flags describing the kind of vector reduction.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
bool isAlwaysUniform(const Value *V) const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
unsigned getNumberOfParts(Type *Tp) const
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
This is an important base class in LLVM.
Definition: Constant.h:42
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target...
TargetIRAnalysis()
Default construct a target IR analysis.
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy=nullptr, const Instruction *I=nullptr) const
bool isLegalMaskedStore(Type *DataType) const
Return true if the target supports masked load/store AVX2 and AVX-512 targets allow masks for consecu...
Merge elements from two source vectors into one with any shuffle mask.
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:75
const Value * getCondition() const
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
int getCallCost(FunctionType *FTy, int NumArgs=-1) const
Estimate the cost of a function call when lowered.
const AMDGPUAS & AS
static std::pair< Value *, ShuffleVectorInst * > getShuffleAndOtherOprd(Value *L, Value *R)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:34
OperandValueProperties
Additional properties of an operand&#39;s values.
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
ReductionKind
Kind of the reduction data.
int getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type...
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
ImmutablePass class - This class is used to provide information that does not need to be run...
Definition: Pass.h:256
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C1.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static bool isReverseVectorMask(ArrayRef< int > Mask)
int getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) const
Calculate the cost of performing a vector reduction.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
Module.h This file contains the declarations for the Module class.
unsigned getCostOfKeepingLiveOverCall(ArrayRef< Type *> Tys) const
int getIntImmCost(const APInt &Imm, Type *Ty) const
Return the expected cost of materializing for the given integer immediate of the specified type...
bool isLegalMaskedGather(Type *DataType) const
AddressSpace
Definition: NVPTXBaseInfo.h:22
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const
bool isSourceOfDivergence(const Value *V) const
Returns whether V is a source of divergence.
bool LSRWithInstrQueries() const
Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...
static ReductionKind matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, unsigned &Opcode, Type *&Ty)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:531
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:462
Class to represent vector types.
Definition: DerivedTypes.h:393
Class for arbitrary precision integers.
Definition: APInt.h:69
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index=-1) const
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
Result run(const Function &F, FunctionAnalysisManager &)
bool shouldExpandReduction(const IntrinsicInst *II) const
int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const
Return the expected cost for the given integer when optimising for size.
amdgpu Simplify well known AMD library false Value Value * Arg
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, unsigned Alignment=1, bool *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isLegalMaskedLoad(Type *DataType) const
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
unsigned getAtomicMemIntrinsicMaxElementSize() const
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:226
This class represents an analyzed expression in the program.
int getUserCost(const User *U, ArrayRef< const Value *> Operands) const
Estimate the cost of a given IR user when lowered.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:439
Parameters that control the generic loop unrolling transformation.
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
#define I(x, y, z)
Definition: MD5.cpp:58
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
const MemCmpExpansionOptions * enableMemCmpExpansion(bool IsZeroCmp) const
iterator_range< value_op_iterator > operand_values()
Definition: User.h:246
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
This instruction extracts a single (scalar) element from a VectorType value.
int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index=0, Type *SubTp=nullptr) const
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>()) const
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:351
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:276
const unsigned Kind
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
Multiway switch.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
TargetTransformInfo Result
MaxMin_match< FCmpInst, LHS, RHS, ofmax_pred_ty > m_OrdFMax(const LHS &L, const RHS &R)
Match an &#39;ordered&#39; floating point maximum function.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> ParamTys) const
Estimate the cost of an intrinsic when lowered.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:559
LLVM Value Representation.
Definition: Value.h:73
unsigned getInliningThresholdMultiplier() const
bool useReductionIntrinsic(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
static bool isZeroEltBroadcastVectorMask(ArrayRef< int > Mask)
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
bool enableInterleavedAccessVectorization() const
Enable matching of interleaved access groups.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
Broadcast element 0 to all other elements.
static Optional< ReductionData > getReductionData(Instruction *I)
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type *> &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const
int getCFInstrCost(unsigned Opcode) const
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
Return true if it is faster to check if a floating-point value is NaN (or not-NaN) versus a compariso...
bool prefersVectorizedAddressing() const
Return true if target doesn&#39;t mind addresses in vectors.
unsigned getOperandsScalarizationOverhead(ArrayRef< const Value *> Args, unsigned VF) const
print Print MemDeps of function
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP) const
Get target-customized preferences for the generic loop unrolling transformation.
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:160
OperandValueKind
Additional information about an operand&#39;s possible values.
A container for analyses that lazily runs them and caches their results.
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
This pass exposes codegen information to IR-level passes.
TargetTransformInfo & getTTI(const Function &F)
CacheLevel
The possible cache levels.
static void getShuffleMask(Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr) const
VectorType * getType() const
Overload to return most specific vector type.
unsigned getFlatAddressSpace() const
Returns the address space ID for a target&#39;s &#39;flat&#39; address space.
Information about a load/store intrinsic defined by the target.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:70
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm, bool IsUnsigned) const
llvm::Optional< unsigned > getCacheSize(CacheLevel Level) const
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Shuffle elements of single source vector with any shuffle mask.