LLVM  9.0.0svn
TargetTransformInfo.cpp
Go to the documentation of this file.
1 //===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
11 #include "llvm/IR/CallSite.h"
12 #include "llvm/IR/DataLayout.h"
13 #include "llvm/IR/Instruction.h"
14 #include "llvm/IR/Instructions.h"
15 #include "llvm/IR/IntrinsicInst.h"
16 #include "llvm/IR/Module.h"
17 #include "llvm/IR/Operator.h"
18 #include "llvm/IR/PatternMatch.h"
21 #include <utility>
22 
23 using namespace llvm;
24 using namespace PatternMatch;
25 
26 #define DEBUG_TYPE "tti"
27 
28 static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
29  cl::Hidden,
30  cl::desc("Recognize reduction patterns."));
31 
32 namespace {
33 /// No-op implementation of the TTI interface using the utility base
34 /// classes.
35 ///
36 /// This is used when no target specific information is available.
37 struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> {
38  explicit NoTTIImpl(const DataLayout &DL)
39  : TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {}
40 };
41 }
42 
44  : TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {}
45 
47 
49  : TTIImpl(std::move(Arg.TTIImpl)) {}
50 
52  TTIImpl = std::move(RHS.TTIImpl);
53  return *this;
54 }
55 
57  Type *OpTy) const {
58  int Cost = TTIImpl->getOperationCost(Opcode, Ty, OpTy);
59  assert(Cost >= 0 && "TTI should not produce negative costs!");
60  return Cost;
61 }
62 
63 int TargetTransformInfo::getCallCost(FunctionType *FTy, int NumArgs) const {
64  int Cost = TTIImpl->getCallCost(FTy, NumArgs);
65  assert(Cost >= 0 && "TTI should not produce negative costs!");
66  return Cost;
67 }
68 
71  int Cost = TTIImpl->getCallCost(F, Arguments);
72  assert(Cost >= 0 && "TTI should not produce negative costs!");
73  return Cost;
74 }
75 
77  return TTIImpl->getInliningThresholdMultiplier();
78 }
79 
80 int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
81  ArrayRef<const Value *> Operands) const {
82  return TTIImpl->getGEPCost(PointeeType, Ptr, Operands);
83 }
84 
86  const Value *Src) const {
87  return TTIImpl->getExtCost(I, Src);
88 }
89 
92  int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments);
93  assert(Cost >= 0 && "TTI should not produce negative costs!");
94  return Cost;
95 }
96 
97 unsigned
99  unsigned &JTSize) const {
100  return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize);
101 }
102 
104  ArrayRef<const Value *> Operands) const {
105  int Cost = TTIImpl->getUserCost(U, Operands);
106  assert(Cost >= 0 && "TTI should not produce negative costs!");
107  return Cost;
108 }
109 
111  return TTIImpl->hasBranchDivergence();
112 }
113 
115  return TTIImpl->isSourceOfDivergence(V);
116 }
117 
119  return TTIImpl->isAlwaysUniform(V);
120 }
121 
123  return TTIImpl->getFlatAddressSpace();
124 }
125 
127  return TTIImpl->isLoweredToCall(F);
128 }
129 
131  Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const {
132  return TTIImpl->getUnrollingPreferences(L, SE, UP);
133 }
134 
136  return TTIImpl->isLegalAddImmediate(Imm);
137 }
138 
140  return TTIImpl->isLegalICmpImmediate(Imm);
141 }
142 
144  int64_t BaseOffset,
145  bool HasBaseReg,
146  int64_t Scale,
147  unsigned AddrSpace,
148  Instruction *I) const {
149  return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
150  Scale, AddrSpace, I);
151 }
152 
154  return TTIImpl->isLSRCostLess(C1, C2);
155 }
156 
158  return TTIImpl->canMacroFuseCmp();
159 }
160 
162  return TTIImpl->shouldFavorPostInc();
163 }
164 
166  return TTIImpl->isLegalMaskedStore(DataType);
167 }
168 
170  return TTIImpl->isLegalMaskedLoad(DataType);
171 }
172 
174  return TTIImpl->isLegalMaskedGather(DataType);
175 }
176 
178  return TTIImpl->isLegalMaskedScatter(DataType);
179 }
180 
181 bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
182  return TTIImpl->hasDivRemOp(DataType, IsSigned);
183 }
184 
186  unsigned AddrSpace) const {
187  return TTIImpl->hasVolatileVariant(I, AddrSpace);
188 }
189 
191  return TTIImpl->prefersVectorizedAddressing();
192 }
193 
195  int64_t BaseOffset,
196  bool HasBaseReg,
197  int64_t Scale,
198  unsigned AddrSpace) const {
199  int Cost = TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
200  Scale, AddrSpace);
201  assert(Cost >= 0 && "TTI should not produce negative costs!");
202  return Cost;
203 }
204 
206  return TTIImpl->LSRWithInstrQueries();
207 }
208 
210  return TTIImpl->isTruncateFree(Ty1, Ty2);
211 }
212 
214  return TTIImpl->isProfitableToHoist(I);
215 }
216 
217 bool TargetTransformInfo::useAA() const { return TTIImpl->useAA(); }
218 
220  return TTIImpl->isTypeLegal(Ty);
221 }
222 
224  return TTIImpl->getJumpBufAlignment();
225 }
226 
228  return TTIImpl->getJumpBufSize();
229 }
230 
232  return TTIImpl->shouldBuildLookupTables();
233 }
235  return TTIImpl->shouldBuildLookupTablesForConstant(C);
236 }
237 
239  return TTIImpl->useColdCCForColdCall(F);
240 }
241 
242 unsigned TargetTransformInfo::
243 getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const {
244  return TTIImpl->getScalarizationOverhead(Ty, Insert, Extract);
245 }
246 
247 unsigned TargetTransformInfo::
249  unsigned VF) const {
250  return TTIImpl->getOperandsScalarizationOverhead(Args, VF);
251 }
252 
254  return TTIImpl->supportsEfficientVectorElementLoadStore();
255 }
256 
257 bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const {
258  return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
259 }
260 
263  return TTIImpl->enableMemCmpExpansion(IsZeroCmp);
264 }
265 
267  return TTIImpl->enableInterleavedAccessVectorization();
268 }
269 
271  return TTIImpl->enableMaskedInterleavedAccessVectorization();
272 }
273 
275  return TTIImpl->isFPVectorizationPotentiallyUnsafe();
276 }
277 
279  unsigned BitWidth,
280  unsigned AddressSpace,
281  unsigned Alignment,
282  bool *Fast) const {
283  return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
284  Alignment, Fast);
285 }
286 
288 TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
289  return TTIImpl->getPopcntSupport(IntTyWidthInBit);
290 }
291 
293  return TTIImpl->haveFastSqrt(Ty);
294 }
295 
297  return TTIImpl->isFCmpOrdCheaperThanFCmpZero(Ty);
298 }
299 
301  int Cost = TTIImpl->getFPOpCost(Ty);
302  assert(Cost >= 0 && "TTI should not produce negative costs!");
303  return Cost;
304 }
305 
306 int TargetTransformInfo::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
307  const APInt &Imm,
308  Type *Ty) const {
309  int Cost = TTIImpl->getIntImmCodeSizeCost(Opcode, Idx, Imm, Ty);
310  assert(Cost >= 0 && "TTI should not produce negative costs!");
311  return Cost;
312 }
313 
314 int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
315  int Cost = TTIImpl->getIntImmCost(Imm, Ty);
316  assert(Cost >= 0 && "TTI should not produce negative costs!");
317  return Cost;
318 }
319 
320 int TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx,
321  const APInt &Imm, Type *Ty) const {
322  int Cost = TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty);
323  assert(Cost >= 0 && "TTI should not produce negative costs!");
324  return Cost;
325 }
326 
328  const APInt &Imm, Type *Ty) const {
329  int Cost = TTIImpl->getIntImmCost(IID, Idx, Imm, Ty);
330  assert(Cost >= 0 && "TTI should not produce negative costs!");
331  return Cost;
332 }
333 
334 unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
335  return TTIImpl->getNumberOfRegisters(Vector);
336 }
337 
338 unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
339  return TTIImpl->getRegisterBitWidth(Vector);
340 }
341 
343  return TTIImpl->getMinVectorRegisterBitWidth();
344 }
345 
347  return TTIImpl->shouldMaximizeVectorBandwidth(OptSize);
348 }
349 
350 unsigned TargetTransformInfo::getMinimumVF(unsigned ElemWidth) const {
351  return TTIImpl->getMinimumVF(ElemWidth);
352 }
353 
355  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
356  return TTIImpl->shouldConsiderAddressTypePromotion(
357  I, AllowPromotionWithoutCommonHeader);
358 }
359 
361  return TTIImpl->getCacheLineSize();
362 }
363 
365  const {
366  return TTIImpl->getCacheSize(Level);
367 }
368 
370  CacheLevel Level) const {
371  return TTIImpl->getCacheAssociativity(Level);
372 }
373 
375  return TTIImpl->getPrefetchDistance();
376 }
377 
379  return TTIImpl->getMinPrefetchStride();
380 }
381 
383  return TTIImpl->getMaxPrefetchIterationsAhead();
384 }
385 
386 unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
387  return TTIImpl->getMaxInterleaveFactor(VF);
388 }
389 
392  OperandValueKind OpInfo = OK_AnyValue;
393  OpProps = OP_None;
394 
395  if (auto *CI = dyn_cast<ConstantInt>(V)) {
396  if (CI->getValue().isPowerOf2())
397  OpProps = OP_PowerOf2;
399  }
400 
401  // A broadcast shuffle creates a uniform value.
402  // TODO: Add support for non-zero index broadcasts.
403  // TODO: Add support for different source vector width.
404  if (auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V))
405  if (ShuffleInst->isZeroEltSplat())
406  OpInfo = OK_UniformValue;
407 
408  const Value *Splat = getSplatValue(V);
409 
410  // Check for a splat of a constant or for a non uniform vector of constants
411  // and check if the constant(s) are all powers of two.
412  if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
414  if (Splat) {
415  OpInfo = OK_UniformConstantValue;
416  if (auto *CI = dyn_cast<ConstantInt>(Splat))
417  if (CI->getValue().isPowerOf2())
418  OpProps = OP_PowerOf2;
419  } else if (auto *CDS = dyn_cast<ConstantDataSequential>(V)) {
420  OpProps = OP_PowerOf2;
421  for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
422  if (auto *CI = dyn_cast<ConstantInt>(CDS->getElementAsConstant(I)))
423  if (CI->getValue().isPowerOf2())
424  continue;
425  OpProps = OP_None;
426  break;
427  }
428  }
429  }
430 
431  // Check for a splat of a uniform value. This is not loop aware, so return
432  // true only for the obviously uniform cases (argument, globalvalue)
433  if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
434  OpInfo = OK_UniformValue;
435 
436  return OpInfo;
437 }
438 
440  unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
441  OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
442  OperandValueProperties Opd2PropInfo,
444  int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
445  Opd1PropInfo, Opd2PropInfo, Args);
446  assert(Cost >= 0 && "TTI should not produce negative costs!");
447  return Cost;
448 }
449 
451  Type *SubTp) const {
452  int Cost = TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp);
453  assert(Cost >= 0 && "TTI should not produce negative costs!");
454  return Cost;
455 }
456 
457 int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst,
458  Type *Src, const Instruction *I) const {
459  assert ((I == nullptr || I->getOpcode() == Opcode) &&
460  "Opcode should reflect passed instruction.");
461  int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, I);
462  assert(Cost >= 0 && "TTI should not produce negative costs!");
463  return Cost;
464 }
465 
467  VectorType *VecTy,
468  unsigned Index) const {
469  int Cost = TTIImpl->getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
470  assert(Cost >= 0 && "TTI should not produce negative costs!");
471  return Cost;
472 }
473 
474 int TargetTransformInfo::getCFInstrCost(unsigned Opcode) const {
475  int Cost = TTIImpl->getCFInstrCost(Opcode);
476  assert(Cost >= 0 && "TTI should not produce negative costs!");
477  return Cost;
478 }
479 
480 int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
481  Type *CondTy, const Instruction *I) const {
482  assert ((I == nullptr || I->getOpcode() == Opcode) &&
483  "Opcode should reflect passed instruction.");
484  int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
485  assert(Cost >= 0 && "TTI should not produce negative costs!");
486  return Cost;
487 }
488 
490  unsigned Index) const {
491  int Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index);
492  assert(Cost >= 0 && "TTI should not produce negative costs!");
493  return Cost;
494 }
495 
496 int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
497  unsigned Alignment,
498  unsigned AddressSpace,
499  const Instruction *I) const {
500  assert ((I == nullptr || I->getOpcode() == Opcode) &&
501  "Opcode should reflect passed instruction.");
502  int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
503  assert(Cost >= 0 && "TTI should not produce negative costs!");
504  return Cost;
505 }
506 
508  unsigned Alignment,
509  unsigned AddressSpace) const {
510  int Cost =
511  TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
512  assert(Cost >= 0 && "TTI should not produce negative costs!");
513  return Cost;
514 }
515 
517  Value *Ptr, bool VariableMask,
518  unsigned Alignment) const {
519  int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
520  Alignment);
521  assert(Cost >= 0 && "TTI should not produce negative costs!");
522  return Cost;
523 }
524 
526  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
527  unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond,
528  bool UseMaskForGaps) const {
529  int Cost = TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
530  Alignment, AddressSpace,
531  UseMaskForCond,
532  UseMaskForGaps);
533  assert(Cost >= 0 && "TTI should not produce negative costs!");
534  return Cost;
535 }
536 
539  unsigned ScalarizationCostPassed) const {
540  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
541  ScalarizationCostPassed);
542  assert(Cost >= 0 && "TTI should not produce negative costs!");
543  return Cost;
544 }
545 
547  ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) const {
548  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
549  assert(Cost >= 0 && "TTI should not produce negative costs!");
550  return Cost;
551 }
552 
554  ArrayRef<Type *> Tys) const {
555  int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys);
556  assert(Cost >= 0 && "TTI should not produce negative costs!");
557  return Cost;
558 }
559 
561  return TTIImpl->getNumberOfParts(Tp);
562 }
563 
565  ScalarEvolution *SE,
566  const SCEV *Ptr) const {
567  int Cost = TTIImpl->getAddressComputationCost(Tp, SE, Ptr);
568  assert(Cost >= 0 && "TTI should not produce negative costs!");
569  return Cost;
570 }
571 
573  bool IsPairwiseForm) const {
574  int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
575  assert(Cost >= 0 && "TTI should not produce negative costs!");
576  return Cost;
577 }
578 
580  bool IsPairwiseForm,
581  bool IsUnsigned) const {
582  int Cost =
583  TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
584  assert(Cost >= 0 && "TTI should not produce negative costs!");
585  return Cost;
586 }
587 
588 unsigned
590  return TTIImpl->getCostOfKeepingLiveOverCall(Tys);
591 }
592 
594  MemIntrinsicInfo &Info) const {
595  return TTIImpl->getTgtMemIntrinsic(Inst, Info);
596 }
597 
599  return TTIImpl->getAtomicMemIntrinsicMaxElementSize();
600 }
601 
603  IntrinsicInst *Inst, Type *ExpectedType) const {
604  return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
605 }
606 
608  Value *Length,
609  unsigned SrcAlign,
610  unsigned DestAlign) const {
611  return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAlign,
612  DestAlign);
613 }
614 
617  unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const {
618  TTIImpl->getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
619  SrcAlign, DestAlign);
620 }
621 
623  const Function *Callee) const {
624  return TTIImpl->areInlineCompatible(Caller, Callee);
625 }
626 
628  const Function *Caller, const Function *Callee,
630  return TTIImpl->areFunctionArgsABICompatible(Caller, Callee, Args);
631 }
632 
634  Type *Ty) const {
635  return TTIImpl->isIndexedLoadLegal(Mode, Ty);
636 }
637 
639  Type *Ty) const {
640  return TTIImpl->isIndexedStoreLegal(Mode, Ty);
641 }
642 
644  return TTIImpl->getLoadStoreVecRegBitWidth(AS);
645 }
646 
648  return TTIImpl->isLegalToVectorizeLoad(LI);
649 }
650 
652  return TTIImpl->isLegalToVectorizeStore(SI);
653 }
654 
656  unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
657  return TTIImpl->isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
658  AddrSpace);
659 }
660 
662  unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
663  return TTIImpl->isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
664  AddrSpace);
665 }
666 
668  unsigned LoadSize,
669  unsigned ChainSizeInBytes,
670  VectorType *VecTy) const {
671  return TTIImpl->getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
672 }
673 
675  unsigned StoreSize,
676  unsigned ChainSizeInBytes,
677  VectorType *VecTy) const {
678  return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
679 }
680 
682  Type *Ty, ReductionFlags Flags) const {
683  return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags);
684 }
685 
687  return TTIImpl->shouldExpandReduction(II);
688 }
689 
690 int TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
691  return TTIImpl->getInstructionLatency(I);
692 }
693 
695  unsigned Level) {
696  // We don't need a shuffle if we just want to have element 0 in position 0 of
697  // the vector.
698  if (!SI && Level == 0 && IsLeft)
699  return true;
700  else if (!SI)
701  return false;
702 
704 
705  // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
706  // we look at the left or right side.
707  for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2)
708  Mask[i] = val;
709 
710  SmallVector<int, 16> ActualMask = SI->getShuffleMask();
711  return Mask == ActualMask;
712 }
713 
714 namespace {
715 /// Kind of the reduction data.
717  RK_None, /// Not a reduction.
718  RK_Arithmetic, /// Binary reduction data.
719  RK_MinMax, /// Min/max reduction data.
720  RK_UnsignedMinMax, /// Unsigned min/max reduction data.
721 };
722 /// Contains opcode + LHS/RHS parts of the reduction operations.
723 struct ReductionData {
724  ReductionData() = delete;
725  ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS)
726  : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) {
727  assert(Kind != RK_None && "expected binary or min/max reduction only.");
728  }
729  unsigned Opcode = 0;
730  Value *LHS = nullptr;
731  Value *RHS = nullptr;
732  ReductionKind Kind = RK_None;
733  bool hasSameData(ReductionData &RD) const {
734  return Kind == RD.Kind && Opcode == RD.Opcode;
735  }
736 };
737 } // namespace
738 
740  Value *L, *R;
741  if (m_BinOp(m_Value(L), m_Value(R)).match(I))
742  return ReductionData(RK_Arithmetic, I->getOpcode(), L, R);
743  if (auto *SI = dyn_cast<SelectInst>(I)) {
744  if (m_SMin(m_Value(L), m_Value(R)).match(SI) ||
745  m_SMax(m_Value(L), m_Value(R)).match(SI) ||
746  m_OrdFMin(m_Value(L), m_Value(R)).match(SI) ||
747  m_OrdFMax(m_Value(L), m_Value(R)).match(SI) ||
748  m_UnordFMin(m_Value(L), m_Value(R)).match(SI) ||
749  m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) {
750  auto *CI = cast<CmpInst>(SI->getCondition());
751  return ReductionData(RK_MinMax, CI->getOpcode(), L, R);
752  }
753  if (m_UMin(m_Value(L), m_Value(R)).match(SI) ||
754  m_UMax(m_Value(L), m_Value(R)).match(SI)) {
755  auto *CI = cast<CmpInst>(SI->getCondition());
756  return ReductionData(RK_UnsignedMinMax, CI->getOpcode(), L, R);
757  }
758  }
759  return llvm::None;
760 }
761 
763  unsigned Level,
764  unsigned NumLevels) {
765  // Match one level of pairwise operations.
766  // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
767  // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
768  // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
769  // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
770  // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
771  if (!I)
772  return RK_None;
773 
774  assert(I->getType()->isVectorTy() && "Expecting a vector type");
775 
777  if (!RD)
778  return RK_None;
779 
781  if (!LS && Level)
782  return RK_None;
784  if (!RS && Level)
785  return RK_None;
786 
787  // On level 0 we can omit one shufflevector instruction.
788  if (!Level && !RS && !LS)
789  return RK_None;
790 
791  // Shuffle inputs must match.
792  Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr;
793  Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr;
794  Value *NextLevelOp = nullptr;
795  if (NextLevelOpR && NextLevelOpL) {
796  // If we have two shuffles their operands must match.
797  if (NextLevelOpL != NextLevelOpR)
798  return RK_None;
799 
800  NextLevelOp = NextLevelOpL;
801  } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) {
802  // On the first level we can omit the shufflevector <0, undef,...>. So the
803  // input to the other shufflevector <1, undef> must match with one of the
804  // inputs to the current binary operation.
805  // Example:
806  // %NextLevelOpL = shufflevector %R, <1, undef ...>
807  // %BinOp = fadd %NextLevelOpL, %R
808  if (NextLevelOpL && NextLevelOpL != RD->RHS)
809  return RK_None;
810  else if (NextLevelOpR && NextLevelOpR != RD->LHS)
811  return RK_None;
812 
813  NextLevelOp = NextLevelOpL ? RD->RHS : RD->LHS;
814  } else
815  return RK_None;
816 
817  // Check that the next levels binary operation exists and matches with the
818  // current one.
819  if (Level + 1 != NumLevels) {
820  Optional<ReductionData> NextLevelRD =
821  getReductionData(cast<Instruction>(NextLevelOp));
822  if (!NextLevelRD || !RD->hasSameData(*NextLevelRD))
823  return RK_None;
824  }
825 
826  // Shuffle mask for pairwise operation must match.
827  if (matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)) {
828  if (!matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level))
829  return RK_None;
830  } else if (matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)) {
831  if (!matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level))
832  return RK_None;
833  } else {
834  return RK_None;
835  }
836 
837  if (++Level == NumLevels)
838  return RD->Kind;
839 
840  // Match next level.
841  return matchPairwiseReductionAtLevel(cast<Instruction>(NextLevelOp), Level,
842  NumLevels);
843 }
844 
846  unsigned &Opcode, Type *&Ty) {
847  if (!EnableReduxCost)
848  return RK_None;
849 
850  // Need to extract the first element.
851  ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
852  unsigned Idx = ~0u;
853  if (CI)
854  Idx = CI->getZExtValue();
855  if (Idx != 0)
856  return RK_None;
857 
858  auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
859  if (!RdxStart)
860  return RK_None;
862  if (!RD)
863  return RK_None;
864 
865  Type *VecTy = RdxStart->getType();
866  unsigned NumVecElems = VecTy->getVectorNumElements();
867  if (!isPowerOf2_32(NumVecElems))
868  return RK_None;
869 
870  // We look for a sequence of shuffle,shuffle,add triples like the following
871  // that builds a pairwise reduction tree.
872  //
873  // (X0, X1, X2, X3)
874  // (X0 + X1, X2 + X3, undef, undef)
875  // ((X0 + X1) + (X2 + X3), undef, undef, undef)
876  //
877  // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
878  // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
879  // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
880  // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
881  // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
882  // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
883  // <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
884  // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
885  // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
886  // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
887  // %r = extractelement <4 x float> %bin.rdx8, i32 0
888  if (matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) ==
889  RK_None)
890  return RK_None;
891 
892  Opcode = RD->Opcode;
893  Ty = VecTy;
894 
895  return RD->Kind;
896 }
897 
898 static std::pair<Value *, ShuffleVectorInst *>
900  ShuffleVectorInst *S = nullptr;
901 
902  if ((S = dyn_cast<ShuffleVectorInst>(L)))
903  return std::make_pair(R, S);
904 
905  S = dyn_cast<ShuffleVectorInst>(R);
906  return std::make_pair(L, S);
907 }
908 
909 static ReductionKind
911  unsigned &Opcode, Type *&Ty) {
912  if (!EnableReduxCost)
913  return RK_None;
914 
915  // Need to extract the first element.
916  ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
917  unsigned Idx = ~0u;
918  if (CI)
919  Idx = CI->getZExtValue();
920  if (Idx != 0)
921  return RK_None;
922 
923  auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
924  if (!RdxStart)
925  return RK_None;
927  if (!RD)
928  return RK_None;
929 
930  Type *VecTy = ReduxRoot->getOperand(0)->getType();
931  unsigned NumVecElems = VecTy->getVectorNumElements();
932  if (!isPowerOf2_32(NumVecElems))
933  return RK_None;
934 
935  // We look for a sequence of shuffles and adds like the following matching one
936  // fadd, shuffle vector pair at a time.
937  //
938  // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
939  // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
940  // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
941  // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
942  // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
943  // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
944  // %r = extractelement <4 x float> %bin.rdx8, i32 0
945 
946  unsigned MaskStart = 1;
947  Instruction *RdxOp = RdxStart;
948  SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
949  unsigned NumVecElemsRemain = NumVecElems;
950  while (NumVecElemsRemain - 1) {
951  // Check for the right reduction operation.
952  if (!RdxOp)
953  return RK_None;
954  Optional<ReductionData> RDLevel = getReductionData(RdxOp);
955  if (!RDLevel || !RDLevel->hasSameData(*RD))
956  return RK_None;
957 
958  Value *NextRdxOp;
959  ShuffleVectorInst *Shuffle;
960  std::tie(NextRdxOp, Shuffle) =
961  getShuffleAndOtherOprd(RDLevel->LHS, RDLevel->RHS);
962 
963  // Check the current reduction operation and the shuffle use the same value.
964  if (Shuffle == nullptr)
965  return RK_None;
966  if (Shuffle->getOperand(0) != NextRdxOp)
967  return RK_None;
968 
969  // Check that shuffle masks matches.
970  for (unsigned j = 0; j != MaskStart; ++j)
971  ShuffleMask[j] = MaskStart + j;
972  // Fill the rest of the mask with -1 for undef.
973  std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);
974 
976  if (ShuffleMask != Mask)
977  return RK_None;
978 
979  RdxOp = dyn_cast<Instruction>(NextRdxOp);
980  NumVecElemsRemain /= 2;
981  MaskStart *= 2;
982  }
983 
984  Opcode = RD->Opcode;
985  Ty = VecTy;
986  return RD->Kind;
987 }
988 
989 int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
990  switch (I->getOpcode()) {
991  case Instruction::GetElementPtr:
992  return getUserCost(I);
993 
994  case Instruction::Ret:
995  case Instruction::PHI:
996  case Instruction::Br: {
997  return getCFInstrCost(I->getOpcode());
998  }
999  case Instruction::Add:
1000  case Instruction::FAdd:
1001  case Instruction::Sub:
1002  case Instruction::FSub:
1003  case Instruction::Mul:
1004  case Instruction::FMul:
1005  case Instruction::UDiv:
1006  case Instruction::SDiv:
1007  case Instruction::FDiv:
1008  case Instruction::URem:
1009  case Instruction::SRem:
1010  case Instruction::FRem:
1011  case Instruction::Shl:
1012  case Instruction::LShr:
1013  case Instruction::AShr:
1014  case Instruction::And:
1015  case Instruction::Or:
1016  case Instruction::Xor: {
1019  Op1VK = getOperandInfo(I->getOperand(0), Op1VP);
1020  Op2VK = getOperandInfo(I->getOperand(1), Op2VP);
1022  return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
1023  Op1VP, Op2VP, Operands);
1024  }
1025  case Instruction::Select: {
1026  const SelectInst *SI = cast<SelectInst>(I);
1027  Type *CondTy = SI->getCondition()->getType();
1028  return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I);
1029  }
1030  case Instruction::ICmp:
1031  case Instruction::FCmp: {
1032  Type *ValTy = I->getOperand(0)->getType();
1033  return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I);
1034  }
1035  case Instruction::Store: {
1036  const StoreInst *SI = cast<StoreInst>(I);
1037  Type *ValTy = SI->getValueOperand()->getType();
1038  return getMemoryOpCost(I->getOpcode(), ValTy,
1039  SI->getAlignment(),
1040  SI->getPointerAddressSpace(), I);
1041  }
1042  case Instruction::Load: {
1043  const LoadInst *LI = cast<LoadInst>(I);
1044  return getMemoryOpCost(I->getOpcode(), I->getType(),
1045  LI->getAlignment(),
1046  LI->getPointerAddressSpace(), I);
1047  }
1048  case Instruction::ZExt:
1049  case Instruction::SExt:
1050  case Instruction::FPToUI:
1051  case Instruction::FPToSI:
1052  case Instruction::FPExt:
1053  case Instruction::PtrToInt:
1054  case Instruction::IntToPtr:
1055  case Instruction::SIToFP:
1056  case Instruction::UIToFP:
1057  case Instruction::Trunc:
1058  case Instruction::FPTrunc:
1059  case Instruction::BitCast:
1060  case Instruction::AddrSpaceCast: {
1061  Type *SrcTy = I->getOperand(0)->getType();
1062  return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I);
1063  }
1064  case Instruction::ExtractElement: {
1065  const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
1067  unsigned Idx = -1;
1068  if (CI)
1069  Idx = CI->getZExtValue();
1070 
1071  // Try to match a reduction sequence (series of shufflevector and vector
1072  // adds followed by a extractelement).
1073  unsigned ReduxOpCode;
1074  Type *ReduxType;
1075 
1076  switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) {
1077  case RK_Arithmetic:
1078  return getArithmeticReductionCost(ReduxOpCode, ReduxType,
1079  /*IsPairwiseForm=*/false);
1080  case RK_MinMax:
1081  return getMinMaxReductionCost(
1082  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1083  /*IsPairwiseForm=*/false, /*IsUnsigned=*/false);
1084  case RK_UnsignedMinMax:
1085  return getMinMaxReductionCost(
1086  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1087  /*IsPairwiseForm=*/false, /*IsUnsigned=*/true);
1088  case RK_None:
1089  break;
1090  }
1091 
1092  switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) {
1093  case RK_Arithmetic:
1094  return getArithmeticReductionCost(ReduxOpCode, ReduxType,
1095  /*IsPairwiseForm=*/true);
1096  case RK_MinMax:
1097  return getMinMaxReductionCost(
1098  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1099  /*IsPairwiseForm=*/true, /*IsUnsigned=*/false);
1100  case RK_UnsignedMinMax:
1101  return getMinMaxReductionCost(
1102  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1103  /*IsPairwiseForm=*/true, /*IsUnsigned=*/true);
1104  case RK_None:
1105  break;
1106  }
1107 
1108  return getVectorInstrCost(I->getOpcode(),
1109  EEI->getOperand(0)->getType(), Idx);
1110  }
1111  case Instruction::InsertElement: {
1112  const InsertElementInst * IE = cast<InsertElementInst>(I);
1113  ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
1114  unsigned Idx = -1;
1115  if (CI)
1116  Idx = CI->getZExtValue();
1117  return getVectorInstrCost(I->getOpcode(),
1118  IE->getType(), Idx);
1119  }
1120  case Instruction::ShuffleVector: {
1121  const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
1122  Type *Ty = Shuffle->getType();
1123  Type *SrcTy = Shuffle->getOperand(0)->getType();
1124 
1125  // TODO: Identify and add costs for insert subvector, etc.
1126  int SubIndex;
1127  if (Shuffle->isExtractSubvectorMask(SubIndex))
1128  return TTIImpl->getShuffleCost(SK_ExtractSubvector, SrcTy, SubIndex, Ty);
1129 
1130  if (Shuffle->changesLength())
1131  return -1;
1132 
1133  if (Shuffle->isIdentity())
1134  return 0;
1135 
1136  if (Shuffle->isReverse())
1137  return TTIImpl->getShuffleCost(SK_Reverse, Ty, 0, nullptr);
1138 
1139  if (Shuffle->isSelect())
1140  return TTIImpl->getShuffleCost(SK_Select, Ty, 0, nullptr);
1141 
1142  if (Shuffle->isTranspose())
1143  return TTIImpl->getShuffleCost(SK_Transpose, Ty, 0, nullptr);
1144 
1145  if (Shuffle->isZeroEltSplat())
1146  return TTIImpl->getShuffleCost(SK_Broadcast, Ty, 0, nullptr);
1147 
1148  if (Shuffle->isSingleSource())
1149  return TTIImpl->getShuffleCost(SK_PermuteSingleSrc, Ty, 0, nullptr);
1150 
1151  return TTIImpl->getShuffleCost(SK_PermuteTwoSrc, Ty, 0, nullptr);
1152  }
1153  case Instruction::Call:
1154  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1155  SmallVector<Value *, 4> Args(II->arg_operands());
1156 
1157  FastMathFlags FMF;
1158  if (auto *FPMO = dyn_cast<FPMathOperator>(II))
1159  FMF = FPMO->getFastMathFlags();
1160 
1161  return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
1162  Args, FMF);
1163  }
1164  return -1;
1165  default:
1166  // We don't have any information on this instruction.
1167  return -1;
1168  }
1169 }
1170 
1172 
1173 TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
1174 
1176  std::function<Result(const Function &)> TTICallback)
1177  : TTICallback(std::move(TTICallback)) {}
1178 
1181  return TTICallback(F);
1182 }
1183 
1184 AnalysisKey TargetIRAnalysis::Key;
1185 
1186 TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) {
1187  return Result(F.getParent()->getDataLayout());
1188 }
1189 
1190 // Register the basic pass.
1192  "Target Transform Information", false, true)
1194 
1195 void TargetTransformInfoWrapperPass::anchor() {}
1196 
1198  : ImmutablePass(ID) {
1201 }
1202 
1204  TargetIRAnalysis TIRA)
1205  : ImmutablePass(ID), TIRA(std::move(TIRA)) {
1208 }
1209 
1211  FunctionAnalysisManager DummyFAM;
1212  TTI = TIRA.run(F, DummyFAM);
1213  return *TTI;
1214 }
1215 
1216 ImmutablePass *
1218  return new TargetTransformInfoWrapperPass(std::move(TIRA));
1219 }
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
uint64_t CallInst * C
Value * getValueOperand()
Definition: Instructions.h:409
bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:70
static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, unsigned Level)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
LLVMContext & Context
static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot, unsigned &Opcode, Type *&Ty)
TargetTransformInfo & operator=(TargetTransformInfo &&RHS)
SI Whole Quad Mode
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:888
This class represents lattice values for constants.
Definition: AllocatorList.h:23
bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isFPVectorizationPotentiallyUnsafe() const
Indicate that it is potentially unsafe to automatically vectorize floating-point operations because t...
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
const Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool supportsEfficientVectorElementLoadStore() const
If target has efficient vector element load/store instructions, it can return true here so that inser...
static ReductionKind matchPairwiseReductionAtLevel(Instruction *I, unsigned Level, unsigned NumLevels)
bool useColdCCForColdCall(Function &F) const
Return true if the input function which is cold at all call sites, should use coldcc calling conventi...
The main scalar evolution driver.
TargetTransformInfoWrapperPass()
We must provide a default constructor for the pass but it should never be used.
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value *> Operands) const
Estimate the cost of a GEP operation when lowered.
MemIndexedMode
The type of load/store indexing.
unsigned getNumberOfRegisters(bool Vector) const
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
bool isReverse() const
Return true if this shuffle swaps the order of elements from exactly one source vector.
Analysis pass providing the TargetTransformInfo.
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAlign, unsigned DestAlign) const
This instruction constructs a fixed permutation of two input vectors.
int getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type *> Tys) const
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
F(f)
An instruction for reading from memory.
Definition: Instructions.h:167
MaxMin_match< FCmpInst, LHS, RHS, ufmax_pred_ty > m_UnordFMax(const LHS &L, const RHS &R)
Match an &#39;unordered&#39; floating point maximum function.
bool canMacroFuseCmp() const
Return true if the target can fuse a compare and branch.
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
bool enableAggressiveInterleaving(bool LoopHasReductions) const
Don&#39;t restrict interleaved unrolling to small loops.
llvm::Optional< unsigned > getCacheAssociativity(CacheLevel Level) const
bool isSingleSource() const
Return true if this shuffle chooses elements from exactly one source vector without changing the leng...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
This file provides helpers for the implementation of a TargetTransformInfo-conforming class...
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
unsigned getJumpBufSize() const
Returns the target&#39;s jmp_buf size in bytes.
unsigned getMaxInterleaveFactor(unsigned VF) const
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
bool enableMaskedInterleavedAccessVectorization() const
Enable matching of interleaved access groups that contain predicated accesses or gaps and therefore v...
bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Definition: BitVector.h:937
int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy=nullptr) const
Estimate the cost of a specific operation when lowered.
bool hasBranchDivergence() const
Return true if branch divergence exists.
This class represents the LLVM &#39;select&#39; instruction.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:370
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize) const
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr) const
static AnalysisKey * ID()
Returns an opaque, unique ID for this analysis type.
Definition: PassManager.h:398
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
MaxMin_match< FCmpInst, LHS, RHS, ufmin_pred_ty > m_UnordFMin(const LHS &L, const RHS &R)
Match an &#39;unordered&#39; floating point minimum function.
bool isLegalMaskedScatter(Type *DataType) const
Return true if the target supports masked gather/scatter AVX-512 fully supports gather and scatter fo...
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:418
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
Class to represent function types.
Definition: DerivedTypes.h:102
unsigned getMinVectorRegisterBitWidth() const
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it&#39;s free to truncate a value of type Ty1 to type Ty2.
PopcntSupportKind
Flags indicating the kind of support for population count.
bool hasDivRemOp(Type *DataType, bool IsSigned) const
Return true if the target has a unified operation to calculate division and remainder.
MaxMin_match< FCmpInst, LHS, RHS, ofmin_pred_ty > m_OrdFMin(const LHS &L, const RHS &R)
Match an &#39;ordered&#39; floating point minimum function.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
Selects elements from the corresponding lane of either source operand.
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef< Value *> Args, FastMathFlags FMF, unsigned VF=1) const
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
An instruction for storing to memory.
Definition: Instructions.h:320
void initializeTargetTransformInfoWrapperPassPass(PassRegistry &)
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
Return true if the given instruction (assumed to be a memory access instruction) has a volatile varia...
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index=-1) const
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
bool isLegalToVectorizeLoad(LoadInst *LI) const
Reverse the order of the vector.
VectorType * getType() const
Overload to return most specific vector type.
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
unsigned getJumpBufAlignment() const
Returns the target&#39;s jmp_buf alignment in bytes.
Value * getOperand(unsigned i) const
Definition: User.h:169
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
int getExtCost(const Instruction *I, const Value *Src) const
Estimate the cost of a EXT operation when lowered.
static bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
ExtractSubvector Index indicates start offset.
static cl::opt< bool > EnableReduxCost("costmodel-reduxcost", cl::init(false), cl::Hidden, cl::desc("Recognize reduction patterns."))
unsigned getMaxPrefetchIterationsAhead() const
bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:422
This instruction inserts a single (scalar) element into a VectorType value.
int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment) const
If not nullptr, enable inline expansion of memcmp.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
Wrapper pass for TargetTransformInfo.
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
bool isLegalToVectorizeStore(StoreInst *SI) const
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
unsigned getRegisterBitWidth(bool Vector) const
Flags describing the kind of vector reduction.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
bool isAlwaysUniform(const Value *V) const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
unsigned getNumberOfParts(Type *Tp) const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
bool isIdentity() const
Return true if this shuffle chooses elements from exactly one source vector without lane crossings an...
AMDGPU Lower Kernel Arguments
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target...
TargetIRAnalysis()
Default construct a target IR analysis.
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy=nullptr, const Instruction *I=nullptr) const
bool isLegalMaskedStore(Type *DataType) const
Return true if the target supports masked load/store AVX2 and AVX-512 targets allow masks for consecu...
Merge elements from two source vectors into one with any shuffle mask.
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:73
const Value * getCondition() const
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
int getCallCost(FunctionType *FTy, int NumArgs=-1) const
Estimate the cost of a function call when lowered.
bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, SmallPtrSetImpl< Argument *> &Args) const
static std::pair< Value *, ShuffleVectorInst * > getShuffleAndOtherOprd(Value *L, Value *R)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:33
OperandValueProperties
Additional properties of an operand&#39;s values.
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
ReductionKind
Kind of the reduction data.
int getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type...
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
ImmutablePass class - This class is used to provide information that does not need to be run...
Definition: Pass.h:255
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C1.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
int getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) const
Calculate the cost of performing a vector reduction.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:839
Module.h This file contains the declarations for the Module class.
unsigned getCostOfKeepingLiveOverCall(ArrayRef< Type *> Tys) const
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
int getIntImmCost(const APInt &Imm, Type *Ty) const
Return the expected cost of materializing for the given integer immediate of the specified type...
bool isLegalMaskedGather(Type *DataType) const
AddressSpace
Definition: NVPTXBaseInfo.h:21
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const
bool changesLength() const
Return true if this shuffle returns a vector with a different number of elements than its source vect...
bool isSourceOfDivergence(const Value *V) const
Returns whether V is a source of divergence.
bool LSRWithInstrQueries() const
Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...
static ReductionKind matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, unsigned &Opcode, Type *&Ty)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:461
bool isZeroEltSplat() const
Return true if all elements of this shuffle are the same value as the first element of exactly one so...
Class to represent vector types.
Definition: DerivedTypes.h:392
Class for arbitrary precision integers.
Definition: APInt.h:69
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index=-1) const
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
Result run(const Function &F, FunctionAnalysisManager &)
bool shouldExpandReduction(const IntrinsicInst *II) const
int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const
Return the expected cost for the given integer when optimising for size.
amdgpu Simplify well known AMD library false Value Value * Arg
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, unsigned Alignment=1, bool *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isTranspose() const
Return true if this shuffle transposes the elements of its inputs without changing the length of the ...
bool isLegalMaskedLoad(Type *DataType) const
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
unsigned getAtomicMemIntrinsicMaxElementSize() const
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:240
This class represents an analyzed expression in the program.
int getUserCost(const User *U, ArrayRef< const Value *> Operands) const
Estimate the cost of a given IR user when lowered.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:464
Parameters that control the generic loop unrolling transformation.
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
#define I(x, y, z)
Definition: MD5.cpp:58
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
const MemCmpExpansionOptions * enableMemCmpExpansion(bool IsZeroCmp) const
iterator_range< value_op_iterator > operand_values()
Definition: User.h:261
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
This instruction extracts a single (scalar) element from a VectorType value.
int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index=0, Type *SubTp=nullptr) const
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>()) const
This is an approximation of reciprocal throughput of a math/logic op.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:365
bool shouldMaximizeVectorBandwidth(bool OptSize) const
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:290
const unsigned Kind
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
Multiway switch.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
TargetTransformInfo Result
MaxMin_match< FCmpInst, LHS, RHS, ofmax_pred_ty > m_OrdFMax(const LHS &L, const RHS &R)
Match an &#39;ordered&#39; floating point maximum function.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> ParamTys) const
Estimate the cost of an intrinsic when lowered.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
LLVM Value Representation.
Definition: Value.h:72
unsigned getInliningThresholdMultiplier() const
bool useReductionIntrinsic(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
bool enableInterleavedAccessVectorization() const
Enable matching of interleaved access groups.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
Broadcast element 0 to all other elements.
static Optional< ReductionData > getReductionData(Instruction *I)
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type *> &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const
int getCFInstrCost(unsigned Opcode) const
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
Return true if it is faster to check if a floating-point value is NaN (or not-NaN) versus a compariso...
bool prefersVectorizedAddressing() const
Return true if target doesn&#39;t mind addresses in vectors.
unsigned getOperandsScalarizationOverhead(ArrayRef< const Value *> Args, unsigned VF) const
print Print MemDeps of function
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP) const
Get target-customized preferences for the generic loop unrolling transformation.
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:159
OperandValueKind
Additional information about an operand&#39;s possible values.
A container for analyses that lazily runs them and caches their results.
unsigned getMinimumVF(unsigned ElemWidth) const
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
This pass exposes codegen information to IR-level passes.
TargetTransformInfo & getTTI(const Function &F)
CacheLevel
The possible cache levels.
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr) const
VectorType * getType() const
Overload to return most specific vector type.
unsigned getFlatAddressSpace() const
Returns the address space ID for a target&#39;s &#39;flat&#39; address space.
Information about a load/store intrinsic defined by the target.
static OperandValueKind getOperandInfo(Value *V, OperandValueProperties &OpProps)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:70
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm, bool IsUnsigned) const
llvm::Optional< unsigned > getCacheSize(CacheLevel Level) const
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Shuffle elements of single source vector with any shuffle mask.