LLVM  9.0.0svn
TargetTransformInfo.cpp
Go to the documentation of this file.
1 //===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
11 #include "llvm/IR/CallSite.h"
12 #include "llvm/IR/DataLayout.h"
13 #include "llvm/IR/Instruction.h"
14 #include "llvm/IR/Instructions.h"
15 #include "llvm/IR/IntrinsicInst.h"
16 #include "llvm/IR/Module.h"
17 #include "llvm/IR/Operator.h"
18 #include "llvm/IR/PatternMatch.h"
21 #include <utility>
22 
23 using namespace llvm;
24 using namespace PatternMatch;
25 
26 #define DEBUG_TYPE "tti"
27 
28 static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
29  cl::Hidden,
30  cl::desc("Recognize reduction patterns."));
31 
32 namespace {
33 /// No-op implementation of the TTI interface using the utility base
34 /// classes.
35 ///
36 /// This is used when no target specific information is available.
37 struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> {
38  explicit NoTTIImpl(const DataLayout &DL)
39  : TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {}
40 };
41 }
42 
44  : TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {}
45 
47 
49  : TTIImpl(std::move(Arg.TTIImpl)) {}
50 
52  TTIImpl = std::move(RHS.TTIImpl);
53  return *this;
54 }
55 
57  Type *OpTy) const {
58  int Cost = TTIImpl->getOperationCost(Opcode, Ty, OpTy);
59  assert(Cost >= 0 && "TTI should not produce negative costs!");
60  return Cost;
61 }
62 
64  const User *U) const {
65  int Cost = TTIImpl->getCallCost(FTy, NumArgs, U);
66  assert(Cost >= 0 && "TTI should not produce negative costs!");
67  return Cost;
68 }
69 
72  const User *U) const {
73  int Cost = TTIImpl->getCallCost(F, Arguments, U);
74  assert(Cost >= 0 && "TTI should not produce negative costs!");
75  return Cost;
76 }
77 
79  return TTIImpl->getInliningThresholdMultiplier();
80 }
81 
82 int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
83  ArrayRef<const Value *> Operands) const {
84  return TTIImpl->getGEPCost(PointeeType, Ptr, Operands);
85 }
86 
88  const Value *Src) const {
89  return TTIImpl->getExtCost(I, Src);
90 }
91 
94  const User *U) const {
95  int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments, U);
96  assert(Cost >= 0 && "TTI should not produce negative costs!");
97  return Cost;
98 }
99 
100 unsigned
102  unsigned &JTSize) const {
103  return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize);
104 }
105 
107  ArrayRef<const Value *> Operands) const {
108  int Cost = TTIImpl->getUserCost(U, Operands);
109  assert(Cost >= 0 && "TTI should not produce negative costs!");
110  return Cost;
111 }
112 
114  return TTIImpl->hasBranchDivergence();
115 }
116 
118  return TTIImpl->isSourceOfDivergence(V);
119 }
120 
122  return TTIImpl->isAlwaysUniform(V);
123 }
124 
126  return TTIImpl->getFlatAddressSpace();
127 }
128 
130  return TTIImpl->isLoweredToCall(F);
131 }
132 
134  Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const {
135  return TTIImpl->getUnrollingPreferences(L, SE, UP);
136 }
137 
139  return TTIImpl->isLegalAddImmediate(Imm);
140 }
141 
143  return TTIImpl->isLegalICmpImmediate(Imm);
144 }
145 
147  int64_t BaseOffset,
148  bool HasBaseReg,
149  int64_t Scale,
150  unsigned AddrSpace,
151  Instruction *I) const {
152  return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
153  Scale, AddrSpace, I);
154 }
155 
157  return TTIImpl->isLSRCostLess(C1, C2);
158 }
159 
161  return TTIImpl->canMacroFuseCmp();
162 }
163 
165  return TTIImpl->shouldFavorPostInc();
166 }
167 
169  return TTIImpl->shouldFavorBackedgeIndex(L);
170 }
171 
173  return TTIImpl->isLegalMaskedStore(DataType);
174 }
175 
177  return TTIImpl->isLegalMaskedLoad(DataType);
178 }
179 
181  return TTIImpl->isLegalMaskedGather(DataType);
182 }
183 
185  return TTIImpl->isLegalMaskedScatter(DataType);
186 }
187 
189  return TTIImpl->isLegalMaskedCompressStore(DataType);
190 }
191 
193  return TTIImpl->isLegalMaskedExpandLoad(DataType);
194 }
195 
196 bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
197  return TTIImpl->hasDivRemOp(DataType, IsSigned);
198 }
199 
201  unsigned AddrSpace) const {
202  return TTIImpl->hasVolatileVariant(I, AddrSpace);
203 }
204 
206  return TTIImpl->prefersVectorizedAddressing();
207 }
208 
210  int64_t BaseOffset,
211  bool HasBaseReg,
212  int64_t Scale,
213  unsigned AddrSpace) const {
214  int Cost = TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
215  Scale, AddrSpace);
216  assert(Cost >= 0 && "TTI should not produce negative costs!");
217  return Cost;
218 }
219 
221  return TTIImpl->LSRWithInstrQueries();
222 }
223 
225  return TTIImpl->isTruncateFree(Ty1, Ty2);
226 }
227 
229  return TTIImpl->isProfitableToHoist(I);
230 }
231 
232 bool TargetTransformInfo::useAA() const { return TTIImpl->useAA(); }
233 
235  return TTIImpl->isTypeLegal(Ty);
236 }
237 
239  return TTIImpl->getJumpBufAlignment();
240 }
241 
243  return TTIImpl->getJumpBufSize();
244 }
245 
247  return TTIImpl->shouldBuildLookupTables();
248 }
250  return TTIImpl->shouldBuildLookupTablesForConstant(C);
251 }
252 
254  return TTIImpl->useColdCCForColdCall(F);
255 }
256 
257 unsigned TargetTransformInfo::
258 getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const {
259  return TTIImpl->getScalarizationOverhead(Ty, Insert, Extract);
260 }
261 
262 unsigned TargetTransformInfo::
264  unsigned VF) const {
265  return TTIImpl->getOperandsScalarizationOverhead(Args, VF);
266 }
267 
269  return TTIImpl->supportsEfficientVectorElementLoadStore();
270 }
271 
272 bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const {
273  return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
274 }
275 
278  return TTIImpl->enableMemCmpExpansion(IsZeroCmp);
279 }
280 
282  return TTIImpl->enableInterleavedAccessVectorization();
283 }
284 
286  return TTIImpl->enableMaskedInterleavedAccessVectorization();
287 }
288 
290  return TTIImpl->isFPVectorizationPotentiallyUnsafe();
291 }
292 
294  unsigned BitWidth,
295  unsigned AddressSpace,
296  unsigned Alignment,
297  bool *Fast) const {
298  return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
299  Alignment, Fast);
300 }
301 
303 TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
304  return TTIImpl->getPopcntSupport(IntTyWidthInBit);
305 }
306 
308  return TTIImpl->haveFastSqrt(Ty);
309 }
310 
312  return TTIImpl->isFCmpOrdCheaperThanFCmpZero(Ty);
313 }
314 
316  int Cost = TTIImpl->getFPOpCost(Ty);
317  assert(Cost >= 0 && "TTI should not produce negative costs!");
318  return Cost;
319 }
320 
321 int TargetTransformInfo::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
322  const APInt &Imm,
323  Type *Ty) const {
324  int Cost = TTIImpl->getIntImmCodeSizeCost(Opcode, Idx, Imm, Ty);
325  assert(Cost >= 0 && "TTI should not produce negative costs!");
326  return Cost;
327 }
328 
329 int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
330  int Cost = TTIImpl->getIntImmCost(Imm, Ty);
331  assert(Cost >= 0 && "TTI should not produce negative costs!");
332  return Cost;
333 }
334 
335 int TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx,
336  const APInt &Imm, Type *Ty) const {
337  int Cost = TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty);
338  assert(Cost >= 0 && "TTI should not produce negative costs!");
339  return Cost;
340 }
341 
343  const APInt &Imm, Type *Ty) const {
344  int Cost = TTIImpl->getIntImmCost(IID, Idx, Imm, Ty);
345  assert(Cost >= 0 && "TTI should not produce negative costs!");
346  return Cost;
347 }
348 
349 unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
350  return TTIImpl->getNumberOfRegisters(Vector);
351 }
352 
353 unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
354  return TTIImpl->getRegisterBitWidth(Vector);
355 }
356 
358  return TTIImpl->getMinVectorRegisterBitWidth();
359 }
360 
362  return TTIImpl->shouldMaximizeVectorBandwidth(OptSize);
363 }
364 
365 unsigned TargetTransformInfo::getMinimumVF(unsigned ElemWidth) const {
366  return TTIImpl->getMinimumVF(ElemWidth);
367 }
368 
370  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
371  return TTIImpl->shouldConsiderAddressTypePromotion(
372  I, AllowPromotionWithoutCommonHeader);
373 }
374 
376  return TTIImpl->getCacheLineSize();
377 }
378 
380  const {
381  return TTIImpl->getCacheSize(Level);
382 }
383 
385  CacheLevel Level) const {
386  return TTIImpl->getCacheAssociativity(Level);
387 }
388 
390  return TTIImpl->getPrefetchDistance();
391 }
392 
394  return TTIImpl->getMinPrefetchStride();
395 }
396 
398  return TTIImpl->getMaxPrefetchIterationsAhead();
399 }
400 
401 unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
402  return TTIImpl->getMaxInterleaveFactor(VF);
403 }
404 
407  OperandValueKind OpInfo = OK_AnyValue;
408  OpProps = OP_None;
409 
410  if (auto *CI = dyn_cast<ConstantInt>(V)) {
411  if (CI->getValue().isPowerOf2())
412  OpProps = OP_PowerOf2;
414  }
415 
416  // A broadcast shuffle creates a uniform value.
417  // TODO: Add support for non-zero index broadcasts.
418  // TODO: Add support for different source vector width.
419  if (auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V))
420  if (ShuffleInst->isZeroEltSplat())
421  OpInfo = OK_UniformValue;
422 
423  const Value *Splat = getSplatValue(V);
424 
425  // Check for a splat of a constant or for a non uniform vector of constants
426  // and check if the constant(s) are all powers of two.
427  if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
429  if (Splat) {
430  OpInfo = OK_UniformConstantValue;
431  if (auto *CI = dyn_cast<ConstantInt>(Splat))
432  if (CI->getValue().isPowerOf2())
433  OpProps = OP_PowerOf2;
434  } else if (auto *CDS = dyn_cast<ConstantDataSequential>(V)) {
435  OpProps = OP_PowerOf2;
436  for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
437  if (auto *CI = dyn_cast<ConstantInt>(CDS->getElementAsConstant(I)))
438  if (CI->getValue().isPowerOf2())
439  continue;
440  OpProps = OP_None;
441  break;
442  }
443  }
444  }
445 
446  // Check for a splat of a uniform value. This is not loop aware, so return
447  // true only for the obviously uniform cases (argument, globalvalue)
448  if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
449  OpInfo = OK_UniformValue;
450 
451  return OpInfo;
452 }
453 
455  unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
456  OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
457  OperandValueProperties Opd2PropInfo,
459  int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
460  Opd1PropInfo, Opd2PropInfo, Args);
461  assert(Cost >= 0 && "TTI should not produce negative costs!");
462  return Cost;
463 }
464 
466  Type *SubTp) const {
467  int Cost = TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp);
468  assert(Cost >= 0 && "TTI should not produce negative costs!");
469  return Cost;
470 }
471 
472 int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst,
473  Type *Src, const Instruction *I) const {
474  assert ((I == nullptr || I->getOpcode() == Opcode) &&
475  "Opcode should reflect passed instruction.");
476  int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, I);
477  assert(Cost >= 0 && "TTI should not produce negative costs!");
478  return Cost;
479 }
480 
482  VectorType *VecTy,
483  unsigned Index) const {
484  int Cost = TTIImpl->getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
485  assert(Cost >= 0 && "TTI should not produce negative costs!");
486  return Cost;
487 }
488 
489 int TargetTransformInfo::getCFInstrCost(unsigned Opcode) const {
490  int Cost = TTIImpl->getCFInstrCost(Opcode);
491  assert(Cost >= 0 && "TTI should not produce negative costs!");
492  return Cost;
493 }
494 
495 int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
496  Type *CondTy, const Instruction *I) const {
497  assert ((I == nullptr || I->getOpcode() == Opcode) &&
498  "Opcode should reflect passed instruction.");
499  int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
500  assert(Cost >= 0 && "TTI should not produce negative costs!");
501  return Cost;
502 }
503 
505  unsigned Index) const {
506  int Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index);
507  assert(Cost >= 0 && "TTI should not produce negative costs!");
508  return Cost;
509 }
510 
511 int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
512  unsigned Alignment,
513  unsigned AddressSpace,
514  const Instruction *I) const {
515  assert ((I == nullptr || I->getOpcode() == Opcode) &&
516  "Opcode should reflect passed instruction.");
517  int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
518  assert(Cost >= 0 && "TTI should not produce negative costs!");
519  return Cost;
520 }
521 
523  unsigned Alignment,
524  unsigned AddressSpace) const {
525  int Cost =
526  TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
527  assert(Cost >= 0 && "TTI should not produce negative costs!");
528  return Cost;
529 }
530 
532  Value *Ptr, bool VariableMask,
533  unsigned Alignment) const {
534  int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
535  Alignment);
536  assert(Cost >= 0 && "TTI should not produce negative costs!");
537  return Cost;
538 }
539 
541  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
542  unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond,
543  bool UseMaskForGaps) const {
544  int Cost = TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
545  Alignment, AddressSpace,
546  UseMaskForCond,
547  UseMaskForGaps);
548  assert(Cost >= 0 && "TTI should not produce negative costs!");
549  return Cost;
550 }
551 
554  unsigned ScalarizationCostPassed) const {
555  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
556  ScalarizationCostPassed);
557  assert(Cost >= 0 && "TTI should not produce negative costs!");
558  return Cost;
559 }
560 
562  ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) const {
563  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
564  assert(Cost >= 0 && "TTI should not produce negative costs!");
565  return Cost;
566 }
567 
569  ArrayRef<Type *> Tys) const {
570  int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys);
571  assert(Cost >= 0 && "TTI should not produce negative costs!");
572  return Cost;
573 }
574 
576  return TTIImpl->getNumberOfParts(Tp);
577 }
578 
580  ScalarEvolution *SE,
581  const SCEV *Ptr) const {
582  int Cost = TTIImpl->getAddressComputationCost(Tp, SE, Ptr);
583  assert(Cost >= 0 && "TTI should not produce negative costs!");
584  return Cost;
585 }
586 
588  bool IsPairwiseForm) const {
589  int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
590  assert(Cost >= 0 && "TTI should not produce negative costs!");
591  return Cost;
592 }
593 
595  bool IsPairwiseForm,
596  bool IsUnsigned) const {
597  int Cost =
598  TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
599  assert(Cost >= 0 && "TTI should not produce negative costs!");
600  return Cost;
601 }
602 
603 unsigned
605  return TTIImpl->getCostOfKeepingLiveOverCall(Tys);
606 }
607 
609  MemIntrinsicInfo &Info) const {
610  return TTIImpl->getTgtMemIntrinsic(Inst, Info);
611 }
612 
614  return TTIImpl->getAtomicMemIntrinsicMaxElementSize();
615 }
616 
618  IntrinsicInst *Inst, Type *ExpectedType) const {
619  return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
620 }
621 
623  Value *Length,
624  unsigned SrcAlign,
625  unsigned DestAlign) const {
626  return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAlign,
627  DestAlign);
628 }
629 
632  unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const {
633  TTIImpl->getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
634  SrcAlign, DestAlign);
635 }
636 
638  const Function *Callee) const {
639  return TTIImpl->areInlineCompatible(Caller, Callee);
640 }
641 
643  const Function *Caller, const Function *Callee,
645  return TTIImpl->areFunctionArgsABICompatible(Caller, Callee, Args);
646 }
647 
649  Type *Ty) const {
650  return TTIImpl->isIndexedLoadLegal(Mode, Ty);
651 }
652 
654  Type *Ty) const {
655  return TTIImpl->isIndexedStoreLegal(Mode, Ty);
656 }
657 
659  return TTIImpl->getLoadStoreVecRegBitWidth(AS);
660 }
661 
663  return TTIImpl->isLegalToVectorizeLoad(LI);
664 }
665 
667  return TTIImpl->isLegalToVectorizeStore(SI);
668 }
669 
671  unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
672  return TTIImpl->isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
673  AddrSpace);
674 }
675 
677  unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
678  return TTIImpl->isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
679  AddrSpace);
680 }
681 
683  unsigned LoadSize,
684  unsigned ChainSizeInBytes,
685  VectorType *VecTy) const {
686  return TTIImpl->getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
687 }
688 
690  unsigned StoreSize,
691  unsigned ChainSizeInBytes,
692  VectorType *VecTy) const {
693  return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
694 }
695 
697  Type *Ty, ReductionFlags Flags) const {
698  return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags);
699 }
700 
702  return TTIImpl->shouldExpandReduction(II);
703 }
704 
705 int TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
706  return TTIImpl->getInstructionLatency(I);
707 }
708 
710  unsigned Level) {
711  // We don't need a shuffle if we just want to have element 0 in position 0 of
712  // the vector.
713  if (!SI && Level == 0 && IsLeft)
714  return true;
715  else if (!SI)
716  return false;
717 
719 
720  // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
721  // we look at the left or right side.
722  for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2)
723  Mask[i] = val;
724 
725  SmallVector<int, 16> ActualMask = SI->getShuffleMask();
726  return Mask == ActualMask;
727 }
728 
729 namespace {
730 /// Kind of the reduction data.
732  RK_None, /// Not a reduction.
733  RK_Arithmetic, /// Binary reduction data.
734  RK_MinMax, /// Min/max reduction data.
735  RK_UnsignedMinMax, /// Unsigned min/max reduction data.
736 };
737 /// Contains opcode + LHS/RHS parts of the reduction operations.
738 struct ReductionData {
739  ReductionData() = delete;
740  ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS)
741  : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) {
742  assert(Kind != RK_None && "expected binary or min/max reduction only.");
743  }
744  unsigned Opcode = 0;
745  Value *LHS = nullptr;
746  Value *RHS = nullptr;
747  ReductionKind Kind = RK_None;
748  bool hasSameData(ReductionData &RD) const {
749  return Kind == RD.Kind && Opcode == RD.Opcode;
750  }
751 };
752 } // namespace
753 
755  Value *L, *R;
756  if (m_BinOp(m_Value(L), m_Value(R)).match(I))
757  return ReductionData(RK_Arithmetic, I->getOpcode(), L, R);
758  if (auto *SI = dyn_cast<SelectInst>(I)) {
759  if (m_SMin(m_Value(L), m_Value(R)).match(SI) ||
760  m_SMax(m_Value(L), m_Value(R)).match(SI) ||
761  m_OrdFMin(m_Value(L), m_Value(R)).match(SI) ||
762  m_OrdFMax(m_Value(L), m_Value(R)).match(SI) ||
763  m_UnordFMin(m_Value(L), m_Value(R)).match(SI) ||
764  m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) {
765  auto *CI = cast<CmpInst>(SI->getCondition());
766  return ReductionData(RK_MinMax, CI->getOpcode(), L, R);
767  }
768  if (m_UMin(m_Value(L), m_Value(R)).match(SI) ||
769  m_UMax(m_Value(L), m_Value(R)).match(SI)) {
770  auto *CI = cast<CmpInst>(SI->getCondition());
771  return ReductionData(RK_UnsignedMinMax, CI->getOpcode(), L, R);
772  }
773  }
774  return llvm::None;
775 }
776 
778  unsigned Level,
779  unsigned NumLevels) {
780  // Match one level of pairwise operations.
781  // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
782  // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
783  // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
784  // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
785  // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
786  if (!I)
787  return RK_None;
788 
789  assert(I->getType()->isVectorTy() && "Expecting a vector type");
790 
792  if (!RD)
793  return RK_None;
794 
796  if (!LS && Level)
797  return RK_None;
799  if (!RS && Level)
800  return RK_None;
801 
802  // On level 0 we can omit one shufflevector instruction.
803  if (!Level && !RS && !LS)
804  return RK_None;
805 
806  // Shuffle inputs must match.
807  Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr;
808  Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr;
809  Value *NextLevelOp = nullptr;
810  if (NextLevelOpR && NextLevelOpL) {
811  // If we have two shuffles their operands must match.
812  if (NextLevelOpL != NextLevelOpR)
813  return RK_None;
814 
815  NextLevelOp = NextLevelOpL;
816  } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) {
817  // On the first level we can omit the shufflevector <0, undef,...>. So the
818  // input to the other shufflevector <1, undef> must match with one of the
819  // inputs to the current binary operation.
820  // Example:
821  // %NextLevelOpL = shufflevector %R, <1, undef ...>
822  // %BinOp = fadd %NextLevelOpL, %R
823  if (NextLevelOpL && NextLevelOpL != RD->RHS)
824  return RK_None;
825  else if (NextLevelOpR && NextLevelOpR != RD->LHS)
826  return RK_None;
827 
828  NextLevelOp = NextLevelOpL ? RD->RHS : RD->LHS;
829  } else
830  return RK_None;
831 
832  // Check that the next levels binary operation exists and matches with the
833  // current one.
834  if (Level + 1 != NumLevels) {
835  Optional<ReductionData> NextLevelRD =
836  getReductionData(cast<Instruction>(NextLevelOp));
837  if (!NextLevelRD || !RD->hasSameData(*NextLevelRD))
838  return RK_None;
839  }
840 
841  // Shuffle mask for pairwise operation must match.
842  if (matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)) {
843  if (!matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level))
844  return RK_None;
845  } else if (matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)) {
846  if (!matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level))
847  return RK_None;
848  } else {
849  return RK_None;
850  }
851 
852  if (++Level == NumLevels)
853  return RD->Kind;
854 
855  // Match next level.
856  return matchPairwiseReductionAtLevel(cast<Instruction>(NextLevelOp), Level,
857  NumLevels);
858 }
859 
861  unsigned &Opcode, Type *&Ty) {
862  if (!EnableReduxCost)
863  return RK_None;
864 
865  // Need to extract the first element.
866  ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
867  unsigned Idx = ~0u;
868  if (CI)
869  Idx = CI->getZExtValue();
870  if (Idx != 0)
871  return RK_None;
872 
873  auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
874  if (!RdxStart)
875  return RK_None;
877  if (!RD)
878  return RK_None;
879 
880  Type *VecTy = RdxStart->getType();
881  unsigned NumVecElems = VecTy->getVectorNumElements();
882  if (!isPowerOf2_32(NumVecElems))
883  return RK_None;
884 
885  // We look for a sequence of shuffle,shuffle,add triples like the following
886  // that builds a pairwise reduction tree.
887  //
888  // (X0, X1, X2, X3)
889  // (X0 + X1, X2 + X3, undef, undef)
890  // ((X0 + X1) + (X2 + X3), undef, undef, undef)
891  //
892  // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
893  // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
894  // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
895  // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
896  // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
897  // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
898  // <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
899  // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
900  // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
901  // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
902  // %r = extractelement <4 x float> %bin.rdx8, i32 0
903  if (matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) ==
904  RK_None)
905  return RK_None;
906 
907  Opcode = RD->Opcode;
908  Ty = VecTy;
909 
910  return RD->Kind;
911 }
912 
913 static std::pair<Value *, ShuffleVectorInst *>
915  ShuffleVectorInst *S = nullptr;
916 
917  if ((S = dyn_cast<ShuffleVectorInst>(L)))
918  return std::make_pair(R, S);
919 
920  S = dyn_cast<ShuffleVectorInst>(R);
921  return std::make_pair(L, S);
922 }
923 
924 static ReductionKind
926  unsigned &Opcode, Type *&Ty) {
927  if (!EnableReduxCost)
928  return RK_None;
929 
930  // Need to extract the first element.
931  ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
932  unsigned Idx = ~0u;
933  if (CI)
934  Idx = CI->getZExtValue();
935  if (Idx != 0)
936  return RK_None;
937 
938  auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
939  if (!RdxStart)
940  return RK_None;
942  if (!RD)
943  return RK_None;
944 
945  Type *VecTy = ReduxRoot->getOperand(0)->getType();
946  unsigned NumVecElems = VecTy->getVectorNumElements();
947  if (!isPowerOf2_32(NumVecElems))
948  return RK_None;
949 
950  // We look for a sequence of shuffles and adds like the following matching one
951  // fadd, shuffle vector pair at a time.
952  //
953  // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
954  // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
955  // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
956  // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
957  // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
958  // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
959  // %r = extractelement <4 x float> %bin.rdx8, i32 0
960 
961  unsigned MaskStart = 1;
962  Instruction *RdxOp = RdxStart;
963  SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
964  unsigned NumVecElemsRemain = NumVecElems;
965  while (NumVecElemsRemain - 1) {
966  // Check for the right reduction operation.
967  if (!RdxOp)
968  return RK_None;
969  Optional<ReductionData> RDLevel = getReductionData(RdxOp);
970  if (!RDLevel || !RDLevel->hasSameData(*RD))
971  return RK_None;
972 
973  Value *NextRdxOp;
974  ShuffleVectorInst *Shuffle;
975  std::tie(NextRdxOp, Shuffle) =
976  getShuffleAndOtherOprd(RDLevel->LHS, RDLevel->RHS);
977 
978  // Check the current reduction operation and the shuffle use the same value.
979  if (Shuffle == nullptr)
980  return RK_None;
981  if (Shuffle->getOperand(0) != NextRdxOp)
982  return RK_None;
983 
984  // Check that shuffle masks matches.
985  for (unsigned j = 0; j != MaskStart; ++j)
986  ShuffleMask[j] = MaskStart + j;
987  // Fill the rest of the mask with -1 for undef.
988  std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);
989 
991  if (ShuffleMask != Mask)
992  return RK_None;
993 
994  RdxOp = dyn_cast<Instruction>(NextRdxOp);
995  NumVecElemsRemain /= 2;
996  MaskStart *= 2;
997  }
998 
999  Opcode = RD->Opcode;
1000  Ty = VecTy;
1001  return RD->Kind;
1002 }
1003 
1004 int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
1005  switch (I->getOpcode()) {
1006  case Instruction::GetElementPtr:
1007  return getUserCost(I);
1008 
1009  case Instruction::Ret:
1010  case Instruction::PHI:
1011  case Instruction::Br: {
1012  return getCFInstrCost(I->getOpcode());
1013  }
1014  case Instruction::Add:
1015  case Instruction::FAdd:
1016  case Instruction::Sub:
1017  case Instruction::FSub:
1018  case Instruction::Mul:
1019  case Instruction::FMul:
1020  case Instruction::UDiv:
1021  case Instruction::SDiv:
1022  case Instruction::FDiv:
1023  case Instruction::URem:
1024  case Instruction::SRem:
1025  case Instruction::FRem:
1026  case Instruction::Shl:
1027  case Instruction::LShr:
1028  case Instruction::AShr:
1029  case Instruction::And:
1030  case Instruction::Or:
1031  case Instruction::Xor: {
1034  Op1VK = getOperandInfo(I->getOperand(0), Op1VP);
1035  Op2VK = getOperandInfo(I->getOperand(1), Op2VP);
1037  return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
1038  Op1VP, Op2VP, Operands);
1039  }
1040  case Instruction::Select: {
1041  const SelectInst *SI = cast<SelectInst>(I);
1042  Type *CondTy = SI->getCondition()->getType();
1043  return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I);
1044  }
1045  case Instruction::ICmp:
1046  case Instruction::FCmp: {
1047  Type *ValTy = I->getOperand(0)->getType();
1048  return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I);
1049  }
1050  case Instruction::Store: {
1051  const StoreInst *SI = cast<StoreInst>(I);
1052  Type *ValTy = SI->getValueOperand()->getType();
1053  return getMemoryOpCost(I->getOpcode(), ValTy,
1054  SI->getAlignment(),
1055  SI->getPointerAddressSpace(), I);
1056  }
1057  case Instruction::Load: {
1058  const LoadInst *LI = cast<LoadInst>(I);
1059  return getMemoryOpCost(I->getOpcode(), I->getType(),
1060  LI->getAlignment(),
1061  LI->getPointerAddressSpace(), I);
1062  }
1063  case Instruction::ZExt:
1064  case Instruction::SExt:
1065  case Instruction::FPToUI:
1066  case Instruction::FPToSI:
1067  case Instruction::FPExt:
1068  case Instruction::PtrToInt:
1069  case Instruction::IntToPtr:
1070  case Instruction::SIToFP:
1071  case Instruction::UIToFP:
1072  case Instruction::Trunc:
1073  case Instruction::FPTrunc:
1074  case Instruction::BitCast:
1075  case Instruction::AddrSpaceCast: {
1076  Type *SrcTy = I->getOperand(0)->getType();
1077  return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I);
1078  }
1079  case Instruction::ExtractElement: {
1080  const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
1082  unsigned Idx = -1;
1083  if (CI)
1084  Idx = CI->getZExtValue();
1085 
1086  // Try to match a reduction sequence (series of shufflevector and vector
1087  // adds followed by a extractelement).
1088  unsigned ReduxOpCode;
1089  Type *ReduxType;
1090 
1091  switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) {
1092  case RK_Arithmetic:
1093  return getArithmeticReductionCost(ReduxOpCode, ReduxType,
1094  /*IsPairwiseForm=*/false);
1095  case RK_MinMax:
1096  return getMinMaxReductionCost(
1097  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1098  /*IsPairwiseForm=*/false, /*IsUnsigned=*/false);
1099  case RK_UnsignedMinMax:
1100  return getMinMaxReductionCost(
1101  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1102  /*IsPairwiseForm=*/false, /*IsUnsigned=*/true);
1103  case RK_None:
1104  break;
1105  }
1106 
1107  switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) {
1108  case RK_Arithmetic:
1109  return getArithmeticReductionCost(ReduxOpCode, ReduxType,
1110  /*IsPairwiseForm=*/true);
1111  case RK_MinMax:
1112  return getMinMaxReductionCost(
1113  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1114  /*IsPairwiseForm=*/true, /*IsUnsigned=*/false);
1115  case RK_UnsignedMinMax:
1116  return getMinMaxReductionCost(
1117  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1118  /*IsPairwiseForm=*/true, /*IsUnsigned=*/true);
1119  case RK_None:
1120  break;
1121  }
1122 
1123  return getVectorInstrCost(I->getOpcode(),
1124  EEI->getOperand(0)->getType(), Idx);
1125  }
1126  case Instruction::InsertElement: {
1127  const InsertElementInst * IE = cast<InsertElementInst>(I);
1128  ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
1129  unsigned Idx = -1;
1130  if (CI)
1131  Idx = CI->getZExtValue();
1132  return getVectorInstrCost(I->getOpcode(),
1133  IE->getType(), Idx);
1134  }
1135  case Instruction::ShuffleVector: {
1136  const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
1137  Type *Ty = Shuffle->getType();
1138  Type *SrcTy = Shuffle->getOperand(0)->getType();
1139 
1140  // TODO: Identify and add costs for insert subvector, etc.
1141  int SubIndex;
1142  if (Shuffle->isExtractSubvectorMask(SubIndex))
1143  return TTIImpl->getShuffleCost(SK_ExtractSubvector, SrcTy, SubIndex, Ty);
1144 
1145  if (Shuffle->changesLength())
1146  return -1;
1147 
1148  if (Shuffle->isIdentity())
1149  return 0;
1150 
1151  if (Shuffle->isReverse())
1152  return TTIImpl->getShuffleCost(SK_Reverse, Ty, 0, nullptr);
1153 
1154  if (Shuffle->isSelect())
1155  return TTIImpl->getShuffleCost(SK_Select, Ty, 0, nullptr);
1156 
1157  if (Shuffle->isTranspose())
1158  return TTIImpl->getShuffleCost(SK_Transpose, Ty, 0, nullptr);
1159 
1160  if (Shuffle->isZeroEltSplat())
1161  return TTIImpl->getShuffleCost(SK_Broadcast, Ty, 0, nullptr);
1162 
1163  if (Shuffle->isSingleSource())
1164  return TTIImpl->getShuffleCost(SK_PermuteSingleSrc, Ty, 0, nullptr);
1165 
1166  return TTIImpl->getShuffleCost(SK_PermuteTwoSrc, Ty, 0, nullptr);
1167  }
1168  case Instruction::Call:
1169  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1170  SmallVector<Value *, 4> Args(II->arg_operands());
1171 
1172  FastMathFlags FMF;
1173  if (auto *FPMO = dyn_cast<FPMathOperator>(II))
1174  FMF = FPMO->getFastMathFlags();
1175 
1176  return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
1177  Args, FMF);
1178  }
1179  return -1;
1180  default:
1181  // We don't have any information on this instruction.
1182  return -1;
1183  }
1184 }
1185 
1187 
1188 TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
1189 
1191  std::function<Result(const Function &)> TTICallback)
1192  : TTICallback(std::move(TTICallback)) {}
1193 
1196  return TTICallback(F);
1197 }
1198 
1199 AnalysisKey TargetIRAnalysis::Key;
1200 
1201 TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) {
1202  return Result(F.getParent()->getDataLayout());
1203 }
1204 
1205 // Register the basic pass.
1207  "Target Transform Information", false, true)
1209 
1210 void TargetTransformInfoWrapperPass::anchor() {}
1211 
1213  : ImmutablePass(ID) {
1216 }
1217 
1219  TargetIRAnalysis TIRA)
1220  : ImmutablePass(ID), TIRA(std::move(TIRA)) {
1223 }
1224 
1226  FunctionAnalysisManager DummyFAM;
1227  TTI = TIRA.run(F, DummyFAM);
1228  return *TTI;
1229 }
1230 
1231 ImmutablePass *
1233  return new TargetTransformInfoWrapperPass(std::move(TIRA));
1234 }
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
uint64_t CallInst * C
Value * getValueOperand()
Definition: Instructions.h:409
bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:70
static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, unsigned Level)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
LLVMContext & Context
static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot, unsigned &Opcode, Type *&Ty)
TargetTransformInfo & operator=(TargetTransformInfo &&RHS)
SI Whole Quad Mode
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:889
This class represents lattice values for constants.
Definition: AllocatorList.h:23
int getCallCost(FunctionType *FTy, int NumArgs=-1, const User *U=nullptr) const
Estimate the cost of a function call when lowered.
bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isFPVectorizationPotentiallyUnsafe() const
Indicate that it is potentially unsafe to automatically vectorize floating-point operations because t...
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
const Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool supportsEfficientVectorElementLoadStore() const
If target has efficient vector element load/store instructions, it can return true here so that inser...
static ReductionKind matchPairwiseReductionAtLevel(Instruction *I, unsigned Level, unsigned NumLevels)
bool useColdCCForColdCall(Function &F) const
Return true if the input function which is cold at all call sites, should use coldcc calling conventi...
The main scalar evolution driver.
TargetTransformInfoWrapperPass()
We must provide a default constructor for the pass but it should never be used.
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value *> Operands) const
Estimate the cost of a GEP operation when lowered.
MemIndexedMode
The type of load/store indexing.
unsigned getNumberOfRegisters(bool Vector) const
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
bool isReverse() const
Return true if this shuffle swaps the order of elements from exactly one source vector.
Analysis pass providing the TargetTransformInfo.
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAlign, unsigned DestAlign) const
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
This instruction constructs a fixed permutation of two input vectors.
int getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type *> Tys) const
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
F(f)
An instruction for reading from memory.
Definition: Instructions.h:167
MaxMin_match< FCmpInst, LHS, RHS, ufmax_pred_ty > m_UnordFMax(const LHS &L, const RHS &R)
Match an &#39;unordered&#39; floating point maximum function.
bool canMacroFuseCmp() const
Return true if the target can fuse a compare and branch.
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
bool enableAggressiveInterleaving(bool LoopHasReductions) const
Don&#39;t restrict interleaved unrolling to small loops.
llvm::Optional< unsigned > getCacheAssociativity(CacheLevel Level) const
bool isSingleSource() const
Return true if this shuffle chooses elements from exactly one source vector without changing the leng...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
This file provides helpers for the implementation of a TargetTransformInfo-conforming class...
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
unsigned getJumpBufSize() const
Returns the target&#39;s jmp_buf size in bytes.
unsigned getMaxInterleaveFactor(unsigned VF) const
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
bool enableMaskedInterleavedAccessVectorization() const
Enable matching of interleaved access groups that contain predicated accesses or gaps and therefore v...
bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Definition: BitVector.h:937
int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy=nullptr) const
Estimate the cost of a specific operation when lowered.
bool hasBranchDivergence() const
Return true if branch divergence exists.
This class represents the LLVM &#39;select&#39; instruction.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize) const
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr) const
static AnalysisKey * ID()
Returns an opaque, unique ID for this analysis type.
Definition: PassManager.h:398
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
MaxMin_match< FCmpInst, LHS, RHS, ufmin_pred_ty > m_UnordFMin(const LHS &L, const RHS &R)
Match an &#39;unordered&#39; floating point minimum function.
bool isLegalMaskedScatter(Type *DataType) const
Return true if the target supports masked scatter.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:418
Class to represent function types.
Definition: DerivedTypes.h:102
unsigned getMinVectorRegisterBitWidth() const
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it&#39;s free to truncate a value of type Ty1 to type Ty2.
PopcntSupportKind
Flags indicating the kind of support for population count.
bool hasDivRemOp(Type *DataType, bool IsSigned) const
Return true if the target has a unified operation to calculate division and remainder.
MaxMin_match< FCmpInst, LHS, RHS, ofmin_pred_ty > m_OrdFMin(const LHS &L, const RHS &R)
Match an &#39;ordered&#39; floating point minimum function.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
Selects elements from the corresponding lane of either source operand.
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef< Value *> Args, FastMathFlags FMF, unsigned VF=1) const
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
An instruction for storing to memory.
Definition: Instructions.h:320
void initializeTargetTransformInfoWrapperPassPass(PassRegistry &)
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
Return true if the given instruction (assumed to be a memory access instruction) has a volatile varia...
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index=-1) const
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
bool isLegalToVectorizeLoad(LoadInst *LI) const
Reverse the order of the vector.
VectorType * getType() const
Overload to return most specific vector type.
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
unsigned getJumpBufAlignment() const
Returns the target&#39;s jmp_buf alignment in bytes.
Value * getOperand(unsigned i) const
Definition: User.h:169
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
int getExtCost(const Instruction *I, const Value *Src) const
Estimate the cost of a EXT operation when lowered.
static bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
ExtractSubvector Index indicates start offset.
static cl::opt< bool > EnableReduxCost("costmodel-reduxcost", cl::init(false), cl::Hidden, cl::desc("Recognize reduction patterns."))
unsigned getMaxPrefetchIterationsAhead() const
bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
This instruction inserts a single (scalar) element into a VectorType value.
int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment) const
If not nullptr, enable inline expansion of memcmp.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
Wrapper pass for TargetTransformInfo.
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
bool isLegalToVectorizeStore(StoreInst *SI) const
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
unsigned getRegisterBitWidth(bool Vector) const
Flags describing the kind of vector reduction.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
bool isAlwaysUniform(const Value *V) const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
unsigned getNumberOfParts(Type *Tp) const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
bool isIdentity() const
Return true if this shuffle chooses elements from exactly one source vector without lane crossings an...
AMDGPU Lower Kernel Arguments
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target...
TargetIRAnalysis()
Default construct a target IR analysis.
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy=nullptr, const Instruction *I=nullptr) const
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
bool isLegalMaskedStore(Type *DataType) const
Return true if the target supports masked load.
Merge elements from two source vectors into one with any shuffle mask.
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:73
const Value * getCondition() const
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, SmallPtrSetImpl< Argument *> &Args) const
static std::pair< Value *, ShuffleVectorInst * > getShuffleAndOtherOprd(Value *L, Value *R)
bool isLegalMaskedCompressStore(Type *DataType) const
Return true if the target supports masked compress store.
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:33
OperandValueProperties
Additional properties of an operand&#39;s values.
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
ReductionKind
Kind of the reduction data.
int getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type...
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
ImmutablePass class - This class is used to provide information that does not need to be run...
Definition: Pass.h:255
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C1.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
int getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) const
Calculate the cost of performing a vector reduction.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
Module.h This file contains the declarations for the Module class.
unsigned getCostOfKeepingLiveOverCall(ArrayRef< Type *> Tys) const
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
int getIntImmCost(const APInt &Imm, Type *Ty) const
Return the expected cost of materializing for the given integer immediate of the specified type...
bool isLegalMaskedGather(Type *DataType) const
Return true if the target supports masked gather.
AddressSpace
Definition: NVPTXBaseInfo.h:21
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const
bool changesLength() const
Return true if this shuffle returns a vector with a different number of elements than its source vect...
bool isSourceOfDivergence(const Value *V) const
Returns whether V is a source of divergence.
bool LSRWithInstrQueries() const
Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...
static ReductionKind matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, unsigned &Opcode, Type *&Ty)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:493
bool isZeroEltSplat() const
Return true if all elements of this shuffle are the same value as the first element of exactly one so...
Class to represent vector types.
Definition: DerivedTypes.h:424
Class for arbitrary precision integers.
Definition: APInt.h:69
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index=-1) const
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
Result run(const Function &F, FunctionAnalysisManager &)
bool shouldExpandReduction(const IntrinsicInst *II) const
int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const
Return the expected cost for the given integer when optimising for size.
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> ParamTys, const User *U=nullptr) const
Estimate the cost of an intrinsic when lowered.
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, unsigned Alignment=1, bool *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isTranspose() const
Return true if this shuffle transposes the elements of its inputs without changing the length of the ...
bool isLegalMaskedLoad(Type *DataType) const
Return true if the target supports masked store.
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
unsigned getAtomicMemIntrinsicMaxElementSize() const
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:240
This class represents an analyzed expression in the program.
int getUserCost(const User *U, ArrayRef< const Value *> Operands) const
Estimate the cost of a given IR user when lowered.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:464
Parameters that control the generic loop unrolling transformation.
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
#define I(x, y, z)
Definition: MD5.cpp:58
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
const MemCmpExpansionOptions * enableMemCmpExpansion(bool IsZeroCmp) const
iterator_range< value_op_iterator > operand_values()
Definition: User.h:261
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
This instruction extracts a single (scalar) element from a VectorType value.
int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index=0, Type *SubTp=nullptr) const
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>()) const
This is an approximation of reciprocal throughput of a math/logic op.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:365
bool isLegalMaskedExpandLoad(Type *DataType) const
Return true if the target supports masked expand load.
bool shouldMaximizeVectorBandwidth(bool OptSize) const
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:290
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
Multiway switch.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
TargetTransformInfo Result
MaxMin_match< FCmpInst, LHS, RHS, ofmax_pred_ty > m_OrdFMax(const LHS &L, const RHS &R)
Match an &#39;ordered&#39; floating point maximum function.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
LLVM Value Representation.
Definition: Value.h:72
unsigned getInliningThresholdMultiplier() const
bool useReductionIntrinsic(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
bool enableInterleavedAccessVectorization() const
Enable matching of interleaved access groups.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
Broadcast element 0 to all other elements.
static Optional< ReductionData > getReductionData(Instruction *I)
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type *> &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const
int getCFInstrCost(unsigned Opcode) const
bool shouldFavorBackedgeIndex(const Loop *L) const
Return true if LSR should make efforts to generate indexed addressing modes that operate across loop ...
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
Return true if it is faster to check if a floating-point value is NaN (or not-NaN) versus a compariso...
bool prefersVectorizedAddressing() const
Return true if target doesn&#39;t mind addresses in vectors.
unsigned getOperandsScalarizationOverhead(ArrayRef< const Value *> Args, unsigned VF) const
print Print MemDeps of function
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP) const
Get target-customized preferences for the generic loop unrolling transformation.
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:159
OperandValueKind
Additional information about an operand&#39;s possible values.
A container for analyses that lazily runs them and caches their results.
unsigned getMinimumVF(unsigned ElemWidth) const
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
This pass exposes codegen information to IR-level passes.
TargetTransformInfo & getTTI(const Function &F)
CacheLevel
The possible cache levels.
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr) const
VectorType * getType() const
Overload to return most specific vector type.
unsigned getFlatAddressSpace() const
Returns the address space ID for a target&#39;s &#39;flat&#39; address space.
Information about a load/store intrinsic defined by the target.
static OperandValueKind getOperandInfo(Value *V, OperandValueProperties &OpProps)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:70
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm, bool IsUnsigned) const
llvm::Optional< unsigned > getCacheSize(CacheLevel Level) const
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Shuffle elements of single source vector with any shuffle mask.