LLVM  7.0.0svn
TargetTransformInfo.cpp
Go to the documentation of this file.
1 //===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
12 #include "llvm/IR/CallSite.h"
13 #include "llvm/IR/DataLayout.h"
14 #include "llvm/IR/Instruction.h"
15 #include "llvm/IR/Instructions.h"
16 #include "llvm/IR/IntrinsicInst.h"
17 #include "llvm/IR/Module.h"
18 #include "llvm/IR/Operator.h"
19 #include "llvm/IR/PatternMatch.h"
22 #include <utility>
23 
24 using namespace llvm;
25 using namespace PatternMatch;
26 
27 #define DEBUG_TYPE "tti"
28 
29 static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
30  cl::Hidden,
31  cl::desc("Recognize reduction patterns."));
32 
33 namespace {
34 /// No-op implementation of the TTI interface using the utility base
35 /// classes.
36 ///
37 /// This is used when no target specific information is available.
38 struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> {
39  explicit NoTTIImpl(const DataLayout &DL)
40  : TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {}
41 };
42 }
43 
45  : TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {}
46 
48 
50  : TTIImpl(std::move(Arg.TTIImpl)) {}
51 
53  TTIImpl = std::move(RHS.TTIImpl);
54  return *this;
55 }
56 
58  Type *OpTy) const {
59  int Cost = TTIImpl->getOperationCost(Opcode, Ty, OpTy);
60  assert(Cost >= 0 && "TTI should not produce negative costs!");
61  return Cost;
62 }
63 
64 int TargetTransformInfo::getCallCost(FunctionType *FTy, int NumArgs) const {
65  int Cost = TTIImpl->getCallCost(FTy, NumArgs);
66  assert(Cost >= 0 && "TTI should not produce negative costs!");
67  return Cost;
68 }
69 
72  int Cost = TTIImpl->getCallCost(F, Arguments);
73  assert(Cost >= 0 && "TTI should not produce negative costs!");
74  return Cost;
75 }
76 
78  return TTIImpl->getInliningThresholdMultiplier();
79 }
80 
81 int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
82  ArrayRef<const Value *> Operands) const {
83  return TTIImpl->getGEPCost(PointeeType, Ptr, Operands);
84 }
85 
87  const Value *Src) const {
88  return TTIImpl->getExtCost(I, Src);
89 }
90 
93  int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments);
94  assert(Cost >= 0 && "TTI should not produce negative costs!");
95  return Cost;
96 }
97 
98 unsigned
100  unsigned &JTSize) const {
101  return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize);
102 }
103 
105  ArrayRef<const Value *> Operands) const {
106  int Cost = TTIImpl->getUserCost(U, Operands);
107  assert(Cost >= 0 && "TTI should not produce negative costs!");
108  return Cost;
109 }
110 
112  return TTIImpl->hasBranchDivergence();
113 }
114 
116  return TTIImpl->isSourceOfDivergence(V);
117 }
118 
120  return TTIImpl->isAlwaysUniform(V);
121 }
122 
124  return TTIImpl->getFlatAddressSpace();
125 }
126 
128  return TTIImpl->isLoweredToCall(F);
129 }
130 
132  Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const {
133  return TTIImpl->getUnrollingPreferences(L, SE, UP);
134 }
135 
137  return TTIImpl->isLegalAddImmediate(Imm);
138 }
139 
141  return TTIImpl->isLegalICmpImmediate(Imm);
142 }
143 
145  int64_t BaseOffset,
146  bool HasBaseReg,
147  int64_t Scale,
148  unsigned AddrSpace,
149  Instruction *I) const {
150  return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
151  Scale, AddrSpace, I);
152 }
153 
155  return TTIImpl->isLSRCostLess(C1, C2);
156 }
157 
159  return TTIImpl->canMacroFuseCmp();
160 }
161 
163  return TTIImpl->shouldFavorPostInc();
164 }
165 
167  return TTIImpl->isLegalMaskedStore(DataType);
168 }
169 
171  return TTIImpl->isLegalMaskedLoad(DataType);
172 }
173 
175  return TTIImpl->isLegalMaskedGather(DataType);
176 }
177 
179  return TTIImpl->isLegalMaskedScatter(DataType);
180 }
181 
182 bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
183  return TTIImpl->hasDivRemOp(DataType, IsSigned);
184 }
185 
187  unsigned AddrSpace) const {
188  return TTIImpl->hasVolatileVariant(I, AddrSpace);
189 }
190 
192  return TTIImpl->prefersVectorizedAddressing();
193 }
194 
196  int64_t BaseOffset,
197  bool HasBaseReg,
198  int64_t Scale,
199  unsigned AddrSpace) const {
200  int Cost = TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
201  Scale, AddrSpace);
202  assert(Cost >= 0 && "TTI should not produce negative costs!");
203  return Cost;
204 }
205 
207  return TTIImpl->LSRWithInstrQueries();
208 }
209 
211  return TTIImpl->isTruncateFree(Ty1, Ty2);
212 }
213 
215  return TTIImpl->isProfitableToHoist(I);
216 }
217 
218 bool TargetTransformInfo::useAA() const { return TTIImpl->useAA(); }
219 
221  return TTIImpl->isTypeLegal(Ty);
222 }
223 
225  return TTIImpl->getJumpBufAlignment();
226 }
227 
229  return TTIImpl->getJumpBufSize();
230 }
231 
233  return TTIImpl->shouldBuildLookupTables();
234 }
236  return TTIImpl->shouldBuildLookupTablesForConstant(C);
237 }
238 
240  return TTIImpl->useColdCCForColdCall(F);
241 }
242 
243 unsigned TargetTransformInfo::
244 getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const {
245  return TTIImpl->getScalarizationOverhead(Ty, Insert, Extract);
246 }
247 
248 unsigned TargetTransformInfo::
250  unsigned VF) const {
251  return TTIImpl->getOperandsScalarizationOverhead(Args, VF);
252 }
253 
255  return TTIImpl->supportsEfficientVectorElementLoadStore();
256 }
257 
258 bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const {
259  return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
260 }
261 
264  return TTIImpl->enableMemCmpExpansion(IsZeroCmp);
265 }
266 
268  return TTIImpl->enableInterleavedAccessVectorization();
269 }
270 
272  return TTIImpl->isFPVectorizationPotentiallyUnsafe();
273 }
274 
276  unsigned BitWidth,
277  unsigned AddressSpace,
278  unsigned Alignment,
279  bool *Fast) const {
280  return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
281  Alignment, Fast);
282 }
283 
285 TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
286  return TTIImpl->getPopcntSupport(IntTyWidthInBit);
287 }
288 
290  return TTIImpl->haveFastSqrt(Ty);
291 }
292 
294  return TTIImpl->isFCmpOrdCheaperThanFCmpZero(Ty);
295 }
296 
298  int Cost = TTIImpl->getFPOpCost(Ty);
299  assert(Cost >= 0 && "TTI should not produce negative costs!");
300  return Cost;
301 }
302 
303 int TargetTransformInfo::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
304  const APInt &Imm,
305  Type *Ty) const {
306  int Cost = TTIImpl->getIntImmCodeSizeCost(Opcode, Idx, Imm, Ty);
307  assert(Cost >= 0 && "TTI should not produce negative costs!");
308  return Cost;
309 }
310 
311 int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
312  int Cost = TTIImpl->getIntImmCost(Imm, Ty);
313  assert(Cost >= 0 && "TTI should not produce negative costs!");
314  return Cost;
315 }
316 
317 int TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx,
318  const APInt &Imm, Type *Ty) const {
319  int Cost = TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty);
320  assert(Cost >= 0 && "TTI should not produce negative costs!");
321  return Cost;
322 }
323 
325  const APInt &Imm, Type *Ty) const {
326  int Cost = TTIImpl->getIntImmCost(IID, Idx, Imm, Ty);
327  assert(Cost >= 0 && "TTI should not produce negative costs!");
328  return Cost;
329 }
330 
331 unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
332  return TTIImpl->getNumberOfRegisters(Vector);
333 }
334 
335 unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
336  return TTIImpl->getRegisterBitWidth(Vector);
337 }
338 
340  return TTIImpl->getMinVectorRegisterBitWidth();
341 }
342 
344  return TTIImpl->shouldMaximizeVectorBandwidth(OptSize);
345 }
346 
347 unsigned TargetTransformInfo::getMinimumVF(unsigned ElemWidth) const {
348  return TTIImpl->getMinimumVF(ElemWidth);
349 }
350 
352  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
353  return TTIImpl->shouldConsiderAddressTypePromotion(
354  I, AllowPromotionWithoutCommonHeader);
355 }
356 
358  return TTIImpl->getCacheLineSize();
359 }
360 
362  const {
363  return TTIImpl->getCacheSize(Level);
364 }
365 
367  CacheLevel Level) const {
368  return TTIImpl->getCacheAssociativity(Level);
369 }
370 
372  return TTIImpl->getPrefetchDistance();
373 }
374 
376  return TTIImpl->getMinPrefetchStride();
377 }
378 
380  return TTIImpl->getMaxPrefetchIterationsAhead();
381 }
382 
383 unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
384  return TTIImpl->getMaxInterleaveFactor(VF);
385 }
386 
388  unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
389  OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
390  OperandValueProperties Opd2PropInfo,
392  int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
393  Opd1PropInfo, Opd2PropInfo, Args);
394  assert(Cost >= 0 && "TTI should not produce negative costs!");
395  return Cost;
396 }
397 
399  Type *SubTp) const {
400  int Cost = TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp);
401  assert(Cost >= 0 && "TTI should not produce negative costs!");
402  return Cost;
403 }
404 
405 int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst,
406  Type *Src, const Instruction *I) const {
407  assert ((I == nullptr || I->getOpcode() == Opcode) &&
408  "Opcode should reflect passed instruction.");
409  int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, I);
410  assert(Cost >= 0 && "TTI should not produce negative costs!");
411  return Cost;
412 }
413 
415  VectorType *VecTy,
416  unsigned Index) const {
417  int Cost = TTIImpl->getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
418  assert(Cost >= 0 && "TTI should not produce negative costs!");
419  return Cost;
420 }
421 
422 int TargetTransformInfo::getCFInstrCost(unsigned Opcode) const {
423  int Cost = TTIImpl->getCFInstrCost(Opcode);
424  assert(Cost >= 0 && "TTI should not produce negative costs!");
425  return Cost;
426 }
427 
428 int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
429  Type *CondTy, const Instruction *I) const {
430  assert ((I == nullptr || I->getOpcode() == Opcode) &&
431  "Opcode should reflect passed instruction.");
432  int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
433  assert(Cost >= 0 && "TTI should not produce negative costs!");
434  return Cost;
435 }
436 
438  unsigned Index) const {
439  int Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index);
440  assert(Cost >= 0 && "TTI should not produce negative costs!");
441  return Cost;
442 }
443 
444 int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
445  unsigned Alignment,
446  unsigned AddressSpace,
447  const Instruction *I) const {
448  assert ((I == nullptr || I->getOpcode() == Opcode) &&
449  "Opcode should reflect passed instruction.");
450  int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
451  assert(Cost >= 0 && "TTI should not produce negative costs!");
452  return Cost;
453 }
454 
456  unsigned Alignment,
457  unsigned AddressSpace) const {
458  int Cost =
459  TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
460  assert(Cost >= 0 && "TTI should not produce negative costs!");
461  return Cost;
462 }
463 
465  Value *Ptr, bool VariableMask,
466  unsigned Alignment) const {
467  int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
468  Alignment);
469  assert(Cost >= 0 && "TTI should not produce negative costs!");
470  return Cost;
471 }
472 
474  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
475  unsigned Alignment, unsigned AddressSpace) const {
476  int Cost = TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
477  Alignment, AddressSpace);
478  assert(Cost >= 0 && "TTI should not produce negative costs!");
479  return Cost;
480 }
481 
484  unsigned ScalarizationCostPassed) const {
485  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
486  ScalarizationCostPassed);
487  assert(Cost >= 0 && "TTI should not produce negative costs!");
488  return Cost;
489 }
490 
492  ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) const {
493  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
494  assert(Cost >= 0 && "TTI should not produce negative costs!");
495  return Cost;
496 }
497 
499  ArrayRef<Type *> Tys) const {
500  int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys);
501  assert(Cost >= 0 && "TTI should not produce negative costs!");
502  return Cost;
503 }
504 
506  return TTIImpl->getNumberOfParts(Tp);
507 }
508 
510  ScalarEvolution *SE,
511  const SCEV *Ptr) const {
512  int Cost = TTIImpl->getAddressComputationCost(Tp, SE, Ptr);
513  assert(Cost >= 0 && "TTI should not produce negative costs!");
514  return Cost;
515 }
516 
518  bool IsPairwiseForm) const {
519  int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
520  assert(Cost >= 0 && "TTI should not produce negative costs!");
521  return Cost;
522 }
523 
525  bool IsPairwiseForm,
526  bool IsUnsigned) const {
527  int Cost =
528  TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
529  assert(Cost >= 0 && "TTI should not produce negative costs!");
530  return Cost;
531 }
532 
533 unsigned
535  return TTIImpl->getCostOfKeepingLiveOverCall(Tys);
536 }
537 
539  MemIntrinsicInfo &Info) const {
540  return TTIImpl->getTgtMemIntrinsic(Inst, Info);
541 }
542 
544  return TTIImpl->getAtomicMemIntrinsicMaxElementSize();
545 }
546 
548  IntrinsicInst *Inst, Type *ExpectedType) const {
549  return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
550 }
551 
553  Value *Length,
554  unsigned SrcAlign,
555  unsigned DestAlign) const {
556  return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAlign,
557  DestAlign);
558 }
559 
562  unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const {
563  TTIImpl->getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
564  SrcAlign, DestAlign);
565 }
566 
568  const Function *Callee) const {
569  return TTIImpl->areInlineCompatible(Caller, Callee);
570 }
571 
573  Type *Ty) const {
574  return TTIImpl->isIndexedLoadLegal(Mode, Ty);
575 }
576 
578  Type *Ty) const {
579  return TTIImpl->isIndexedStoreLegal(Mode, Ty);
580 }
581 
583  return TTIImpl->getLoadStoreVecRegBitWidth(AS);
584 }
585 
587  return TTIImpl->isLegalToVectorizeLoad(LI);
588 }
589 
591  return TTIImpl->isLegalToVectorizeStore(SI);
592 }
593 
595  unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
596  return TTIImpl->isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
597  AddrSpace);
598 }
599 
601  unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
602  return TTIImpl->isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
603  AddrSpace);
604 }
605 
607  unsigned LoadSize,
608  unsigned ChainSizeInBytes,
609  VectorType *VecTy) const {
610  return TTIImpl->getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
611 }
612 
614  unsigned StoreSize,
615  unsigned ChainSizeInBytes,
616  VectorType *VecTy) const {
617  return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
618 }
619 
621  Type *Ty, ReductionFlags Flags) const {
622  return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags);
623 }
624 
626  return TTIImpl->shouldExpandReduction(II);
627 }
628 
629 int TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
630  return TTIImpl->getInstructionLatency(I);
631 }
632 
638 
639  if (auto *CI = dyn_cast<ConstantInt>(V)) {
640  if (CI->getValue().isPowerOf2())
643  }
644 
645  const Value *Splat = getSplatValue(V);
646 
647  // Check for a splat of a constant or for a non uniform vector of constants
648  // and check if the constant(s) are all powers of two.
649  if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
651  if (Splat) {
653  if (auto *CI = dyn_cast<ConstantInt>(Splat))
654  if (CI->getValue().isPowerOf2())
656  } else if (auto *CDS = dyn_cast<ConstantDataSequential>(V)) {
658  for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
659  if (auto *CI = dyn_cast<ConstantInt>(CDS->getElementAsConstant(I)))
660  if (CI->getValue().isPowerOf2())
661  continue;
663  break;
664  }
665  }
666  }
667 
668  // Check for a splat of a uniform value. This is not loop aware, so return
669  // true only for the obviously uniform cases (argument, globalvalue)
670  if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
672 
673  return OpInfo;
674 }
675 
677  unsigned Level) {
678  // We don't need a shuffle if we just want to have element 0 in position 0 of
679  // the vector.
680  if (!SI && Level == 0 && IsLeft)
681  return true;
682  else if (!SI)
683  return false;
684 
686 
687  // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
688  // we look at the left or right side.
689  for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2)
690  Mask[i] = val;
691 
692  SmallVector<int, 16> ActualMask = SI->getShuffleMask();
693  return Mask == ActualMask;
694 }
695 
696 namespace {
697 /// Kind of the reduction data.
699  RK_None, /// Not a reduction.
700  RK_Arithmetic, /// Binary reduction data.
701  RK_MinMax, /// Min/max reduction data.
702  RK_UnsignedMinMax, /// Unsigned min/max reduction data.
703 };
704 /// Contains opcode + LHS/RHS parts of the reduction operations.
705 struct ReductionData {
706  ReductionData() = delete;
707  ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS)
708  : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) {
709  assert(Kind != RK_None && "expected binary or min/max reduction only.");
710  }
711  unsigned Opcode = 0;
712  Value *LHS = nullptr;
713  Value *RHS = nullptr;
714  ReductionKind Kind = RK_None;
715  bool hasSameData(ReductionData &RD) const {
716  return Kind == RD.Kind && Opcode == RD.Opcode;
717  }
718 };
719 } // namespace
720 
722  Value *L, *R;
723  if (m_BinOp(m_Value(L), m_Value(R)).match(I))
724  return ReductionData(RK_Arithmetic, I->getOpcode(), L, R);
725  if (auto *SI = dyn_cast<SelectInst>(I)) {
726  if (m_SMin(m_Value(L), m_Value(R)).match(SI) ||
727  m_SMax(m_Value(L), m_Value(R)).match(SI) ||
728  m_OrdFMin(m_Value(L), m_Value(R)).match(SI) ||
729  m_OrdFMax(m_Value(L), m_Value(R)).match(SI) ||
730  m_UnordFMin(m_Value(L), m_Value(R)).match(SI) ||
731  m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) {
732  auto *CI = cast<CmpInst>(SI->getCondition());
733  return ReductionData(RK_MinMax, CI->getOpcode(), L, R);
734  }
735  if (m_UMin(m_Value(L), m_Value(R)).match(SI) ||
736  m_UMax(m_Value(L), m_Value(R)).match(SI)) {
737  auto *CI = cast<CmpInst>(SI->getCondition());
738  return ReductionData(RK_UnsignedMinMax, CI->getOpcode(), L, R);
739  }
740  }
741  return llvm::None;
742 }
743 
745  unsigned Level,
746  unsigned NumLevels) {
747  // Match one level of pairwise operations.
748  // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
749  // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
750  // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
751  // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
752  // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
753  if (!I)
754  return RK_None;
755 
756  assert(I->getType()->isVectorTy() && "Expecting a vector type");
757 
759  if (!RD)
760  return RK_None;
761 
763  if (!LS && Level)
764  return RK_None;
766  if (!RS && Level)
767  return RK_None;
768 
769  // On level 0 we can omit one shufflevector instruction.
770  if (!Level && !RS && !LS)
771  return RK_None;
772 
773  // Shuffle inputs must match.
774  Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr;
775  Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr;
776  Value *NextLevelOp = nullptr;
777  if (NextLevelOpR && NextLevelOpL) {
778  // If we have two shuffles their operands must match.
779  if (NextLevelOpL != NextLevelOpR)
780  return RK_None;
781 
782  NextLevelOp = NextLevelOpL;
783  } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) {
784  // On the first level we can omit the shufflevector <0, undef,...>. So the
785  // input to the other shufflevector <1, undef> must match with one of the
786  // inputs to the current binary operation.
787  // Example:
788  // %NextLevelOpL = shufflevector %R, <1, undef ...>
789  // %BinOp = fadd %NextLevelOpL, %R
790  if (NextLevelOpL && NextLevelOpL != RD->RHS)
791  return RK_None;
792  else if (NextLevelOpR && NextLevelOpR != RD->LHS)
793  return RK_None;
794 
795  NextLevelOp = NextLevelOpL ? RD->RHS : RD->LHS;
796  } else
797  return RK_None;
798 
799  // Check that the next levels binary operation exists and matches with the
800  // current one.
801  if (Level + 1 != NumLevels) {
802  Optional<ReductionData> NextLevelRD =
803  getReductionData(cast<Instruction>(NextLevelOp));
804  if (!NextLevelRD || !RD->hasSameData(*NextLevelRD))
805  return RK_None;
806  }
807 
808  // Shuffle mask for pairwise operation must match.
809  if (matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)) {
810  if (!matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level))
811  return RK_None;
812  } else if (matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)) {
813  if (!matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level))
814  return RK_None;
815  } else {
816  return RK_None;
817  }
818 
819  if (++Level == NumLevels)
820  return RD->Kind;
821 
822  // Match next level.
823  return matchPairwiseReductionAtLevel(cast<Instruction>(NextLevelOp), Level,
824  NumLevels);
825 }
826 
828  unsigned &Opcode, Type *&Ty) {
829  if (!EnableReduxCost)
830  return RK_None;
831 
832  // Need to extract the first element.
833  ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
834  unsigned Idx = ~0u;
835  if (CI)
836  Idx = CI->getZExtValue();
837  if (Idx != 0)
838  return RK_None;
839 
840  auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
841  if (!RdxStart)
842  return RK_None;
844  if (!RD)
845  return RK_None;
846 
847  Type *VecTy = RdxStart->getType();
848  unsigned NumVecElems = VecTy->getVectorNumElements();
849  if (!isPowerOf2_32(NumVecElems))
850  return RK_None;
851 
852  // We look for a sequence of shuffle,shuffle,add triples like the following
853  // that builds a pairwise reduction tree.
854  //
855  // (X0, X1, X2, X3)
856  // (X0 + X1, X2 + X3, undef, undef)
857  // ((X0 + X1) + (X2 + X3), undef, undef, undef)
858  //
859  // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
860  // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
861  // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
862  // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
863  // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
864  // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
865  // <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
866  // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
867  // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
868  // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
869  // %r = extractelement <4 x float> %bin.rdx8, i32 0
870  if (matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) ==
871  RK_None)
872  return RK_None;
873 
874  Opcode = RD->Opcode;
875  Ty = VecTy;
876 
877  return RD->Kind;
878 }
879 
880 static std::pair<Value *, ShuffleVectorInst *>
882  ShuffleVectorInst *S = nullptr;
883 
884  if ((S = dyn_cast<ShuffleVectorInst>(L)))
885  return std::make_pair(R, S);
886 
887  S = dyn_cast<ShuffleVectorInst>(R);
888  return std::make_pair(L, S);
889 }
890 
891 static ReductionKind
893  unsigned &Opcode, Type *&Ty) {
894  if (!EnableReduxCost)
895  return RK_None;
896 
897  // Need to extract the first element.
898  ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
899  unsigned Idx = ~0u;
900  if (CI)
901  Idx = CI->getZExtValue();
902  if (Idx != 0)
903  return RK_None;
904 
905  auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
906  if (!RdxStart)
907  return RK_None;
909  if (!RD)
910  return RK_None;
911 
912  Type *VecTy = ReduxRoot->getOperand(0)->getType();
913  unsigned NumVecElems = VecTy->getVectorNumElements();
914  if (!isPowerOf2_32(NumVecElems))
915  return RK_None;
916 
917  // We look for a sequence of shuffles and adds like the following matching one
918  // fadd, shuffle vector pair at a time.
919  //
920  // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
921  // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
922  // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
923  // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
924  // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
925  // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
926  // %r = extractelement <4 x float> %bin.rdx8, i32 0
927 
928  unsigned MaskStart = 1;
929  Instruction *RdxOp = RdxStart;
930  SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
931  unsigned NumVecElemsRemain = NumVecElems;
932  while (NumVecElemsRemain - 1) {
933  // Check for the right reduction operation.
934  if (!RdxOp)
935  return RK_None;
936  Optional<ReductionData> RDLevel = getReductionData(RdxOp);
937  if (!RDLevel || !RDLevel->hasSameData(*RD))
938  return RK_None;
939 
940  Value *NextRdxOp;
941  ShuffleVectorInst *Shuffle;
942  std::tie(NextRdxOp, Shuffle) =
943  getShuffleAndOtherOprd(RDLevel->LHS, RDLevel->RHS);
944 
945  // Check the current reduction operation and the shuffle use the same value.
946  if (Shuffle == nullptr)
947  return RK_None;
948  if (Shuffle->getOperand(0) != NextRdxOp)
949  return RK_None;
950 
951  // Check that shuffle masks matches.
952  for (unsigned j = 0; j != MaskStart; ++j)
953  ShuffleMask[j] = MaskStart + j;
954  // Fill the rest of the mask with -1 for undef.
955  std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);
956 
958  if (ShuffleMask != Mask)
959  return RK_None;
960 
961  RdxOp = dyn_cast<Instruction>(NextRdxOp);
962  NumVecElemsRemain /= 2;
963  MaskStart *= 2;
964  }
965 
966  Opcode = RD->Opcode;
967  Ty = VecTy;
968  return RD->Kind;
969 }
970 
971 int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
972  switch (I->getOpcode()) {
973  case Instruction::GetElementPtr:
974  return getUserCost(I);
975 
976  case Instruction::Ret:
977  case Instruction::PHI:
978  case Instruction::Br: {
979  return getCFInstrCost(I->getOpcode());
980  }
981  case Instruction::Add:
982  case Instruction::FAdd:
983  case Instruction::Sub:
984  case Instruction::FSub:
985  case Instruction::Mul:
986  case Instruction::FMul:
987  case Instruction::UDiv:
988  case Instruction::SDiv:
989  case Instruction::FDiv:
990  case Instruction::URem:
991  case Instruction::SRem:
992  case Instruction::FRem:
993  case Instruction::Shl:
994  case Instruction::LShr:
995  case Instruction::AShr:
996  case Instruction::And:
997  case Instruction::Or:
998  case Instruction::Xor: {
1001  Op1VK = getOperandInfo(I->getOperand(0), Op1VP);
1002  Op2VK = getOperandInfo(I->getOperand(1), Op2VP);
1004  return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
1005  Op1VP, Op2VP, Operands);
1006  }
1007  case Instruction::Select: {
1008  const SelectInst *SI = cast<SelectInst>(I);
1009  Type *CondTy = SI->getCondition()->getType();
1010  return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I);
1011  }
1012  case Instruction::ICmp:
1013  case Instruction::FCmp: {
1014  Type *ValTy = I->getOperand(0)->getType();
1015  return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I);
1016  }
1017  case Instruction::Store: {
1018  const StoreInst *SI = cast<StoreInst>(I);
1019  Type *ValTy = SI->getValueOperand()->getType();
1020  return getMemoryOpCost(I->getOpcode(), ValTy,
1021  SI->getAlignment(),
1022  SI->getPointerAddressSpace(), I);
1023  }
1024  case Instruction::Load: {
1025  const LoadInst *LI = cast<LoadInst>(I);
1026  return getMemoryOpCost(I->getOpcode(), I->getType(),
1027  LI->getAlignment(),
1028  LI->getPointerAddressSpace(), I);
1029  }
1030  case Instruction::ZExt:
1031  case Instruction::SExt:
1032  case Instruction::FPToUI:
1033  case Instruction::FPToSI:
1034  case Instruction::FPExt:
1035  case Instruction::PtrToInt:
1036  case Instruction::IntToPtr:
1037  case Instruction::SIToFP:
1038  case Instruction::UIToFP:
1039  case Instruction::Trunc:
1040  case Instruction::FPTrunc:
1041  case Instruction::BitCast:
1042  case Instruction::AddrSpaceCast: {
1043  Type *SrcTy = I->getOperand(0)->getType();
1044  return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I);
1045  }
1046  case Instruction::ExtractElement: {
1047  const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
1049  unsigned Idx = -1;
1050  if (CI)
1051  Idx = CI->getZExtValue();
1052 
1053  // Try to match a reduction sequence (series of shufflevector and vector
1054  // adds followed by a extractelement).
1055  unsigned ReduxOpCode;
1056  Type *ReduxType;
1057 
1058  switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) {
1059  case RK_Arithmetic:
1060  return getArithmeticReductionCost(ReduxOpCode, ReduxType,
1061  /*IsPairwiseForm=*/false);
1062  case RK_MinMax:
1063  return getMinMaxReductionCost(
1064  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1065  /*IsPairwiseForm=*/false, /*IsUnsigned=*/false);
1066  case RK_UnsignedMinMax:
1067  return getMinMaxReductionCost(
1068  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1069  /*IsPairwiseForm=*/false, /*IsUnsigned=*/true);
1070  case RK_None:
1071  break;
1072  }
1073 
1074  switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) {
1075  case RK_Arithmetic:
1076  return getArithmeticReductionCost(ReduxOpCode, ReduxType,
1077  /*IsPairwiseForm=*/true);
1078  case RK_MinMax:
1079  return getMinMaxReductionCost(
1080  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1081  /*IsPairwiseForm=*/true, /*IsUnsigned=*/false);
1082  case RK_UnsignedMinMax:
1083  return getMinMaxReductionCost(
1084  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1085  /*IsPairwiseForm=*/true, /*IsUnsigned=*/true);
1086  case RK_None:
1087  break;
1088  }
1089 
1090  return getVectorInstrCost(I->getOpcode(),
1091  EEI->getOperand(0)->getType(), Idx);
1092  }
1093  case Instruction::InsertElement: {
1094  const InsertElementInst * IE = cast<InsertElementInst>(I);
1095  ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
1096  unsigned Idx = -1;
1097  if (CI)
1098  Idx = CI->getZExtValue();
1099  return getVectorInstrCost(I->getOpcode(),
1100  IE->getType(), Idx);
1101  }
1102  case Instruction::ShuffleVector: {
1103  const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
1104  // TODO: Identify and add costs for insert/extract subvector, etc.
1105  if (Shuffle->changesLength())
1106  return -1;
1107 
1108  if (Shuffle->isIdentity())
1109  return 0;
1110 
1111  Type *Ty = Shuffle->getType();
1112  if (Shuffle->isReverse())
1113  return TTIImpl->getShuffleCost(SK_Reverse, Ty, 0, nullptr);
1114 
1115  if (Shuffle->isSelect())
1116  return TTIImpl->getShuffleCost(SK_Select, Ty, 0, nullptr);
1117 
1118  if (Shuffle->isTranspose())
1119  return TTIImpl->getShuffleCost(SK_Transpose, Ty, 0, nullptr);
1120 
1121  if (Shuffle->isZeroEltSplat())
1122  return TTIImpl->getShuffleCost(SK_Broadcast, Ty, 0, nullptr);
1123 
1124  if (Shuffle->isSingleSource())
1125  return TTIImpl->getShuffleCost(SK_PermuteSingleSrc, Ty, 0, nullptr);
1126 
1127  return TTIImpl->getShuffleCost(SK_PermuteTwoSrc, Ty, 0, nullptr);
1128  }
1129  case Instruction::Call:
1130  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1131  SmallVector<Value *, 4> Args(II->arg_operands());
1132 
1133  FastMathFlags FMF;
1134  if (auto *FPMO = dyn_cast<FPMathOperator>(II))
1135  FMF = FPMO->getFastMathFlags();
1136 
1137  return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
1138  Args, FMF);
1139  }
1140  return -1;
1141  default:
1142  // We don't have any information on this instruction.
1143  return -1;
1144  }
1145 }
1146 
1148 
1149 TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
1150 
1152  std::function<Result(const Function &)> TTICallback)
1153  : TTICallback(std::move(TTICallback)) {}
1154 
1157  return TTICallback(F);
1158 }
1159 
1160 AnalysisKey TargetIRAnalysis::Key;
1161 
1162 TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) {
1163  return Result(F.getParent()->getDataLayout());
1164 }
1165 
1166 // Register the basic pass.
1168  "Target Transform Information", false, true)
1170 
1171 void TargetTransformInfoWrapperPass::anchor() {}
1172 
1174  : ImmutablePass(ID) {
1177 }
1178 
1180  TargetIRAnalysis TIRA)
1181  : ImmutablePass(ID), TIRA(std::move(TIRA)) {
1184 }
1185 
1187  FunctionAnalysisManager DummyFAM;
1188  TTI = TIRA.run(F, DummyFAM);
1189  return *TTI;
1190 }
1191 
1192 ImmutablePass *
1194  return new TargetTransformInfoWrapperPass(std::move(TIRA));
1195 }
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
uint64_t CallInst * C
Value * getValueOperand()
Definition: Instructions.h:399
bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:72
static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, unsigned Level)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
LLVMContext & Context
static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot, unsigned &Opcode, Type *&Ty)
TargetTransformInfo & operator=(TargetTransformInfo &&RHS)
SI Whole Quad Mode
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:1127
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isFPVectorizationPotentiallyUnsafe() const
Indicate that it is potentially unsafe to automatically vectorize floating-point operations because t...
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
const Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool supportsEfficientVectorElementLoadStore() const
If target has efficient vector element load/store instructions, it can return true here so that inser...
static ReductionKind matchPairwiseReductionAtLevel(Instruction *I, unsigned Level, unsigned NumLevels)
bool useColdCCForColdCall(Function &F) const
Return true if the input function which is cold at all call sites, should use coldcc calling conventi...
The main scalar evolution driver.
TargetTransformInfoWrapperPass()
We must provide a default constructor for the pass but it should never be used.
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value *> Operands) const
Estimate the cost of a GEP operation when lowered.
MemIndexedMode
The type of load/store indexing.
unsigned getNumberOfRegisters(bool Vector) const
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
bool isReverse() const
Return true if this shuffle swaps the order of elements from exactly one source vector.
Analysis pass providing the TargetTransformInfo.
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAlign, unsigned DestAlign) const
This instruction constructs a fixed permutation of two input vectors.
int getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type *> Tys) const
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace) const
F(f)
An instruction for reading from memory.
Definition: Instructions.h:168
MaxMin_match< FCmpInst, LHS, RHS, ufmax_pred_ty > m_UnordFMax(const LHS &L, const RHS &R)
Match an &#39;unordered&#39; floating point maximum function.
bool canMacroFuseCmp() const
Return true if the target can fuse a compare and branch.
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
bool enableAggressiveInterleaving(bool LoopHasReductions) const
Don&#39;t restrict interleaved unrolling to small loops.
llvm::Optional< unsigned > getCacheAssociativity(CacheLevel Level) const
bool isSingleSource() const
Return true if this shuffle chooses elements from exactly one source vector without changing the leng...
This file provides helpers for the implementation of a TargetTransformInfo-conforming class...
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
unsigned getJumpBufSize() const
Returns the target&#39;s jmp_buf size in bytes.
unsigned getMaxInterleaveFactor(unsigned VF) const
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Definition: BitVector.h:921
int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy=nullptr) const
Estimate the cost of a specific operation when lowered.
bool hasBranchDivergence() const
Return true if branch divergence exists.
This class represents the LLVM &#39;select&#39; instruction.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize) const
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr) const
static AnalysisKey * ID()
Returns an opaque, unique ID for this analysis type.
Definition: PassManager.h:398
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
MaxMin_match< FCmpInst, LHS, RHS, ufmin_pred_ty > m_UnordFMin(const LHS &L, const RHS &R)
Match an &#39;unordered&#39; floating point minimum function.
bool isLegalMaskedScatter(Type *DataType) const
Return true if the target supports masked gather/scatter AVX-512 fully supports gather and scatter fo...
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:408
Class to represent function types.
Definition: DerivedTypes.h:103
unsigned getMinVectorRegisterBitWidth() const
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it&#39;s free to truncate a value of type Ty1 to type Ty2.
PopcntSupportKind
Flags indicating the kind of support for population count.
bool hasDivRemOp(Type *DataType, bool IsSigned) const
Return true if the target has a unified operation to calculate division and remainder.
MaxMin_match< FCmpInst, LHS, RHS, ofmin_pred_ty > m_OrdFMin(const LHS &L, const RHS &R)
Match an &#39;ordered&#39; floating point minimum function.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
Selects elements from the corresponding lane of either source operand.
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef< Value *> Args, FastMathFlags FMF, unsigned VF=1) const
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
An instruction for storing to memory.
Definition: Instructions.h:310
void initializeTargetTransformInfoWrapperPassPass(PassRegistry &)
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
Return true if the given instruction (assumed to be a memory access instruction) has a volatile varia...
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index=-1) const
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
bool isLegalToVectorizeLoad(LoadInst *LI) const
Reverse the order of the vector.
VectorType * getType() const
Overload to return most specific vector type.
amdgpu Simplify well known AMD library false Value * Callee
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
unsigned getJumpBufAlignment() const
Returns the target&#39;s jmp_buf alignment in bytes.
Value * getOperand(unsigned i) const
Definition: User.h:170
int getExtCost(const Instruction *I, const Value *Src) const
Estimate the cost of a EXT operation when lowered.
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
static cl::opt< bool > EnableReduxCost("costmodel-reduxcost", cl::init(false), cl::Hidden, cl::desc("Recognize reduction patterns."))
unsigned getMaxPrefetchIterationsAhead() const
bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
This instruction inserts a single (scalar) element into a VectorType value.
int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment) const
If not nullptr, enable inline expansion of memcmp.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
Wrapper pass for TargetTransformInfo.
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
bool isLegalToVectorizeStore(StoreInst *SI) const
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:429
unsigned getRegisterBitWidth(bool Vector) const
Flags describing the kind of vector reduction.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
bool isAlwaysUniform(const Value *V) const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
unsigned getNumberOfParts(Type *Tp) const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
bool isIdentity() const
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
AMDGPU Lower Kernel Arguments
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target...
TargetIRAnalysis()
Default construct a target IR analysis.
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy=nullptr, const Instruction *I=nullptr) const
bool isLegalMaskedStore(Type *DataType) const
Return true if the target supports masked load/store AVX2 and AVX-512 targets allow masks for consecu...
Merge elements from two source vectors into one with any shuffle mask.
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:75
const Value * getCondition() const
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
int getCallCost(FunctionType *FTy, int NumArgs=-1) const
Estimate the cost of a function call when lowered.
const AMDGPUAS & AS
static std::pair< Value *, ShuffleVectorInst * > getShuffleAndOtherOprd(Value *L, Value *R)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:34
OperandValueProperties
Additional properties of an operand&#39;s values.
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
ReductionKind
Kind of the reduction data.
int getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type...
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
ImmutablePass class - This class is used to provide information that does not need to be run...
Definition: Pass.h:256
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C1.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
int getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) const
Calculate the cost of performing a vector reduction.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
Module.h This file contains the declarations for the Module class.
unsigned getCostOfKeepingLiveOverCall(ArrayRef< Type *> Tys) const
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
int getIntImmCost(const APInt &Imm, Type *Ty) const
Return the expected cost of materializing for the given integer immediate of the specified type...
bool isLegalMaskedGather(Type *DataType) const
AddressSpace
Definition: NVPTXBaseInfo.h:22
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const
bool changesLength() const
Return true if this shuffle returns a vector with a different number of elements than its source elem...
bool isSourceOfDivergence(const Value *V) const
Returns whether V is a source of divergence.
bool LSRWithInstrQueries() const
Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...
static ReductionKind matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, unsigned &Opcode, Type *&Ty)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:539
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:462
bool isZeroEltSplat() const
Return true if all elements of this shuffle are the same value as the first element of exactly one so...
Class to represent vector types.
Definition: DerivedTypes.h:393
Class for arbitrary precision integers.
Definition: APInt.h:69
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index=-1) const
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
Result run(const Function &F, FunctionAnalysisManager &)
bool shouldExpandReduction(const IntrinsicInst *II) const
int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const
Return the expected cost for the given integer when optimising for size.
amdgpu Simplify well known AMD library false Value Value * Arg
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, unsigned Alignment=1, bool *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isTranspose() const
Return true if this shuffle transposes the elements of its inputs without changing the length of the ...
bool isLegalMaskedLoad(Type *DataType) const
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:121
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
unsigned getAtomicMemIntrinsicMaxElementSize() const
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:230
This class represents an analyzed expression in the program.
int getUserCost(const User *U, ArrayRef< const Value *> Operands) const
Estimate the cost of a given IR user when lowered.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:459
Parameters that control the generic loop unrolling transformation.
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
#define I(x, y, z)
Definition: MD5.cpp:58
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
const MemCmpExpansionOptions * enableMemCmpExpansion(bool IsZeroCmp) const
iterator_range< value_op_iterator > operand_values()
Definition: User.h:262
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
This instruction extracts a single (scalar) element from a VectorType value.
int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index=0, Type *SubTp=nullptr) const
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>()) const
This is an approximation of reciprocal throughput of a math/logic op.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:355
bool shouldMaximizeVectorBandwidth(bool OptSize) const
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:280
const unsigned Kind
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
Multiway switch.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
TargetTransformInfo Result
MaxMin_match< FCmpInst, LHS, RHS, ofmax_pred_ty > m_OrdFMax(const LHS &L, const RHS &R)
Match an &#39;ordered&#39; floating point maximum function.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> ParamTys) const
Estimate the cost of an intrinsic when lowered.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
LLVM Value Representation.
Definition: Value.h:73
unsigned getInliningThresholdMultiplier() const
bool useReductionIntrinsic(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
bool enableInterleavedAccessVectorization() const
Enable matching of interleaved access groups.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
Broadcast element 0 to all other elements.
static Optional< ReductionData > getReductionData(Instruction *I)
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type *> &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const
int getCFInstrCost(unsigned Opcode) const
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
Return true if it is faster to check if a floating-point value is NaN (or not-NaN) versus a compariso...
bool prefersVectorizedAddressing() const
Return true if target doesn&#39;t mind addresses in vectors.
unsigned getOperandsScalarizationOverhead(ArrayRef< const Value *> Args, unsigned VF) const
print Print MemDeps of function
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP) const
Get target-customized preferences for the generic loop unrolling transformation.
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:160
OperandValueKind
Additional information about an operand&#39;s possible values.
A container for analyses that lazily runs them and caches their results.
unsigned getMinimumVF(unsigned ElemWidth) const
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
This pass exposes codegen information to IR-level passes.
TargetTransformInfo & getTTI(const Function &F)
CacheLevel
The possible cache levels.
static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V, TargetTransformInfo::OperandValueProperties &OpProps)
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr) const
VectorType * getType() const
Overload to return most specific vector type.
unsigned getFlatAddressSpace() const
Returns the address space ID for a target&#39;s &#39;flat&#39; address space.
Information about a load/store intrinsic defined by the target.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:70
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm, bool IsUnsigned) const
llvm::Optional< unsigned > getCacheSize(CacheLevel Level) const
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Shuffle elements of single source vector with any shuffle mask.