LLVM  9.0.0svn
TargetTransformInfo.cpp
Go to the documentation of this file.
1 //===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
11 #include "llvm/IR/CallSite.h"
12 #include "llvm/IR/DataLayout.h"
13 #include "llvm/IR/Instruction.h"
14 #include "llvm/IR/Instructions.h"
15 #include "llvm/IR/IntrinsicInst.h"
16 #include "llvm/IR/Module.h"
17 #include "llvm/IR/Operator.h"
18 #include "llvm/IR/PatternMatch.h"
21 #include <utility>
22 
23 using namespace llvm;
24 using namespace PatternMatch;
25 
26 #define DEBUG_TYPE "tti"
27 
28 static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
29  cl::Hidden,
30  cl::desc("Recognize reduction patterns."));
31 
32 namespace {
33 /// No-op implementation of the TTI interface using the utility base
34 /// classes.
35 ///
36 /// This is used when no target specific information is available.
37 struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> {
38  explicit NoTTIImpl(const DataLayout &DL)
39  : TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {}
40 };
41 }
42 
44  : TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {}
45 
47 
49  : TTIImpl(std::move(Arg.TTIImpl)) {}
50 
52  TTIImpl = std::move(RHS.TTIImpl);
53  return *this;
54 }
55 
57  Type *OpTy) const {
58  int Cost = TTIImpl->getOperationCost(Opcode, Ty, OpTy);
59  assert(Cost >= 0 && "TTI should not produce negative costs!");
60  return Cost;
61 }
62 
64  const User *U) const {
65  int Cost = TTIImpl->getCallCost(FTy, NumArgs, U);
66  assert(Cost >= 0 && "TTI should not produce negative costs!");
67  return Cost;
68 }
69 
72  const User *U) const {
73  int Cost = TTIImpl->getCallCost(F, Arguments, U);
74  assert(Cost >= 0 && "TTI should not produce negative costs!");
75  return Cost;
76 }
77 
79  return TTIImpl->getInliningThresholdMultiplier();
80 }
81 
82 int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
83  ArrayRef<const Value *> Operands) const {
84  return TTIImpl->getGEPCost(PointeeType, Ptr, Operands);
85 }
86 
88  const Value *Src) const {
89  return TTIImpl->getExtCost(I, Src);
90 }
91 
94  const User *U) const {
95  int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments, U);
96  assert(Cost >= 0 && "TTI should not produce negative costs!");
97  return Cost;
98 }
99 
100 unsigned
102  unsigned &JTSize) const {
103  return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize);
104 }
105 
107  ArrayRef<const Value *> Operands) const {
108  int Cost = TTIImpl->getUserCost(U, Operands);
109  assert(Cost >= 0 && "TTI should not produce negative costs!");
110  return Cost;
111 }
112 
114  return TTIImpl->hasBranchDivergence();
115 }
116 
118  return TTIImpl->isSourceOfDivergence(V);
119 }
120 
122  return TTIImpl->isAlwaysUniform(V);
123 }
124 
126  return TTIImpl->getFlatAddressSpace();
127 }
128 
130  return TTIImpl->isLoweredToCall(F);
131 }
132 
134  Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const {
135  return TTIImpl->getUnrollingPreferences(L, SE, UP);
136 }
137 
139  return TTIImpl->isLegalAddImmediate(Imm);
140 }
141 
143  return TTIImpl->isLegalICmpImmediate(Imm);
144 }
145 
147  int64_t BaseOffset,
148  bool HasBaseReg,
149  int64_t Scale,
150  unsigned AddrSpace,
151  Instruction *I) const {
152  return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
153  Scale, AddrSpace, I);
154 }
155 
157  return TTIImpl->isLSRCostLess(C1, C2);
158 }
159 
161  return TTIImpl->canMacroFuseCmp();
162 }
163 
165  return TTIImpl->shouldFavorPostInc();
166 }
167 
169  return TTIImpl->shouldFavorBackedgeIndex(L);
170 }
171 
173  return TTIImpl->isLegalMaskedStore(DataType);
174 }
175 
177  return TTIImpl->isLegalMaskedLoad(DataType);
178 }
179 
181  return TTIImpl->isLegalMaskedGather(DataType);
182 }
183 
185  return TTIImpl->isLegalMaskedScatter(DataType);
186 }
187 
189  return TTIImpl->isLegalMaskedCompressStore(DataType);
190 }
191 
193  return TTIImpl->isLegalMaskedExpandLoad(DataType);
194 }
195 
196 bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
197  return TTIImpl->hasDivRemOp(DataType, IsSigned);
198 }
199 
201  unsigned AddrSpace) const {
202  return TTIImpl->hasVolatileVariant(I, AddrSpace);
203 }
204 
206  return TTIImpl->prefersVectorizedAddressing();
207 }
208 
210  int64_t BaseOffset,
211  bool HasBaseReg,
212  int64_t Scale,
213  unsigned AddrSpace) const {
214  int Cost = TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
215  Scale, AddrSpace);
216  assert(Cost >= 0 && "TTI should not produce negative costs!");
217  return Cost;
218 }
219 
221  return TTIImpl->LSRWithInstrQueries();
222 }
223 
225  return TTIImpl->isTruncateFree(Ty1, Ty2);
226 }
227 
229  return TTIImpl->isProfitableToHoist(I);
230 }
231 
232 bool TargetTransformInfo::useAA() const { return TTIImpl->useAA(); }
233 
235  return TTIImpl->isTypeLegal(Ty);
236 }
237 
239  return TTIImpl->getJumpBufAlignment();
240 }
241 
243  return TTIImpl->getJumpBufSize();
244 }
245 
247  return TTIImpl->shouldBuildLookupTables();
248 }
250  return TTIImpl->shouldBuildLookupTablesForConstant(C);
251 }
252 
254  return TTIImpl->useColdCCForColdCall(F);
255 }
256 
257 unsigned TargetTransformInfo::
258 getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const {
259  return TTIImpl->getScalarizationOverhead(Ty, Insert, Extract);
260 }
261 
262 unsigned TargetTransformInfo::
264  unsigned VF) const {
265  return TTIImpl->getOperandsScalarizationOverhead(Args, VF);
266 }
267 
269  return TTIImpl->supportsEfficientVectorElementLoadStore();
270 }
271 
272 bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const {
273  return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
274 }
275 
278  return TTIImpl->enableMemCmpExpansion(IsZeroCmp);
279 }
280 
282  return TTIImpl->enableInterleavedAccessVectorization();
283 }
284 
286  return TTIImpl->enableMaskedInterleavedAccessVectorization();
287 }
288 
290  return TTIImpl->isFPVectorizationPotentiallyUnsafe();
291 }
292 
294  unsigned BitWidth,
295  unsigned AddressSpace,
296  unsigned Alignment,
297  bool *Fast) const {
298  return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
299  Alignment, Fast);
300 }
301 
303 TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
304  return TTIImpl->getPopcntSupport(IntTyWidthInBit);
305 }
306 
308  return TTIImpl->haveFastSqrt(Ty);
309 }
310 
312  return TTIImpl->isFCmpOrdCheaperThanFCmpZero(Ty);
313 }
314 
316  int Cost = TTIImpl->getFPOpCost(Ty);
317  assert(Cost >= 0 && "TTI should not produce negative costs!");
318  return Cost;
319 }
320 
321 int TargetTransformInfo::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
322  const APInt &Imm,
323  Type *Ty) const {
324  int Cost = TTIImpl->getIntImmCodeSizeCost(Opcode, Idx, Imm, Ty);
325  assert(Cost >= 0 && "TTI should not produce negative costs!");
326  return Cost;
327 }
328 
329 int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
330  int Cost = TTIImpl->getIntImmCost(Imm, Ty);
331  assert(Cost >= 0 && "TTI should not produce negative costs!");
332  return Cost;
333 }
334 
335 int TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx,
336  const APInt &Imm, Type *Ty) const {
337  int Cost = TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty);
338  assert(Cost >= 0 && "TTI should not produce negative costs!");
339  return Cost;
340 }
341 
343  const APInt &Imm, Type *Ty) const {
344  int Cost = TTIImpl->getIntImmCost(IID, Idx, Imm, Ty);
345  assert(Cost >= 0 && "TTI should not produce negative costs!");
346  return Cost;
347 }
348 
349 unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
350  return TTIImpl->getNumberOfRegisters(Vector);
351 }
352 
353 unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
354  return TTIImpl->getRegisterBitWidth(Vector);
355 }
356 
358  return TTIImpl->getMinVectorRegisterBitWidth();
359 }
360 
362  return TTIImpl->shouldMaximizeVectorBandwidth(OptSize);
363 }
364 
365 unsigned TargetTransformInfo::getMinimumVF(unsigned ElemWidth) const {
366  return TTIImpl->getMinimumVF(ElemWidth);
367 }
368 
370  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
371  return TTIImpl->shouldConsiderAddressTypePromotion(
372  I, AllowPromotionWithoutCommonHeader);
373 }
374 
376  return TTIImpl->getCacheLineSize();
377 }
378 
380  const {
381  return TTIImpl->getCacheSize(Level);
382 }
383 
385  CacheLevel Level) const {
386  return TTIImpl->getCacheAssociativity(Level);
387 }
388 
390  return TTIImpl->getPrefetchDistance();
391 }
392 
394  return TTIImpl->getMinPrefetchStride();
395 }
396 
398  return TTIImpl->getMaxPrefetchIterationsAhead();
399 }
400 
401 unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
402  return TTIImpl->getMaxInterleaveFactor(VF);
403 }
404 
407  OperandValueKind OpInfo = OK_AnyValue;
408  OpProps = OP_None;
409 
410  if (auto *CI = dyn_cast<ConstantInt>(V)) {
411  if (CI->getValue().isPowerOf2())
412  OpProps = OP_PowerOf2;
414  }
415 
416  // A broadcast shuffle creates a uniform value.
417  // TODO: Add support for non-zero index broadcasts.
418  // TODO: Add support for different source vector width.
419  if (auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V))
420  if (ShuffleInst->isZeroEltSplat())
421  OpInfo = OK_UniformValue;
422 
423  const Value *Splat = getSplatValue(V);
424 
425  // Check for a splat of a constant or for a non uniform vector of constants
426  // and check if the constant(s) are all powers of two.
427  if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
429  if (Splat) {
430  OpInfo = OK_UniformConstantValue;
431  if (auto *CI = dyn_cast<ConstantInt>(Splat))
432  if (CI->getValue().isPowerOf2())
433  OpProps = OP_PowerOf2;
434  } else if (auto *CDS = dyn_cast<ConstantDataSequential>(V)) {
435  OpProps = OP_PowerOf2;
436  for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
437  if (auto *CI = dyn_cast<ConstantInt>(CDS->getElementAsConstant(I)))
438  if (CI->getValue().isPowerOf2())
439  continue;
440  OpProps = OP_None;
441  break;
442  }
443  }
444  }
445 
446  // Check for a splat of a uniform value. This is not loop aware, so return
447  // true only for the obviously uniform cases (argument, globalvalue)
448  if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
449  OpInfo = OK_UniformValue;
450 
451  return OpInfo;
452 }
453 
455  unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
456  OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
457  OperandValueProperties Opd2PropInfo,
459  int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
460  Opd1PropInfo, Opd2PropInfo, Args);
461  assert(Cost >= 0 && "TTI should not produce negative costs!");
462  return Cost;
463 }
464 
466  Type *SubTp) const {
467  int Cost = TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp);
468  assert(Cost >= 0 && "TTI should not produce negative costs!");
469  return Cost;
470 }
471 
472 int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst,
473  Type *Src, const Instruction *I) const {
474  assert ((I == nullptr || I->getOpcode() == Opcode) &&
475  "Opcode should reflect passed instruction.");
476  int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, I);
477  assert(Cost >= 0 && "TTI should not produce negative costs!");
478  return Cost;
479 }
480 
482  VectorType *VecTy,
483  unsigned Index) const {
484  int Cost = TTIImpl->getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
485  assert(Cost >= 0 && "TTI should not produce negative costs!");
486  return Cost;
487 }
488 
489 int TargetTransformInfo::getCFInstrCost(unsigned Opcode) const {
490  int Cost = TTIImpl->getCFInstrCost(Opcode);
491  assert(Cost >= 0 && "TTI should not produce negative costs!");
492  return Cost;
493 }
494 
495 int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
496  Type *CondTy, const Instruction *I) const {
497  assert ((I == nullptr || I->getOpcode() == Opcode) &&
498  "Opcode should reflect passed instruction.");
499  int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
500  assert(Cost >= 0 && "TTI should not produce negative costs!");
501  return Cost;
502 }
503 
505  unsigned Index) const {
506  int Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index);
507  assert(Cost >= 0 && "TTI should not produce negative costs!");
508  return Cost;
509 }
510 
511 int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
512  unsigned Alignment,
513  unsigned AddressSpace,
514  const Instruction *I) const {
515  assert ((I == nullptr || I->getOpcode() == Opcode) &&
516  "Opcode should reflect passed instruction.");
517  int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
518  assert(Cost >= 0 && "TTI should not produce negative costs!");
519  return Cost;
520 }
521 
523  unsigned Alignment,
524  unsigned AddressSpace) const {
525  int Cost =
526  TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
527  assert(Cost >= 0 && "TTI should not produce negative costs!");
528  return Cost;
529 }
530 
532  Value *Ptr, bool VariableMask,
533  unsigned Alignment) const {
534  int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
535  Alignment);
536  assert(Cost >= 0 && "TTI should not produce negative costs!");
537  return Cost;
538 }
539 
541  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
542  unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond,
543  bool UseMaskForGaps) const {
544  int Cost = TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
545  Alignment, AddressSpace,
546  UseMaskForCond,
547  UseMaskForGaps);
548  assert(Cost >= 0 && "TTI should not produce negative costs!");
549  return Cost;
550 }
551 
554  unsigned ScalarizationCostPassed) const {
555  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
556  ScalarizationCostPassed);
557  assert(Cost >= 0 && "TTI should not produce negative costs!");
558  return Cost;
559 }
560 
562  ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) const {
563  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
564  assert(Cost >= 0 && "TTI should not produce negative costs!");
565  return Cost;
566 }
567 
569  ArrayRef<Type *> Tys) const {
570  int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys);
571  assert(Cost >= 0 && "TTI should not produce negative costs!");
572  return Cost;
573 }
574 
576  return TTIImpl->getNumberOfParts(Tp);
577 }
578 
580  ScalarEvolution *SE,
581  const SCEV *Ptr) const {
582  int Cost = TTIImpl->getAddressComputationCost(Tp, SE, Ptr);
583  assert(Cost >= 0 && "TTI should not produce negative costs!");
584  return Cost;
585 }
586 
588  int Cost = TTIImpl->getMemcpyCost(I);
589  assert(Cost >= 0 && "TTI should not produce negative costs!");
590  return Cost;
591 }
592 
594  bool IsPairwiseForm) const {
595  int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
596  assert(Cost >= 0 && "TTI should not produce negative costs!");
597  return Cost;
598 }
599 
601  bool IsPairwiseForm,
602  bool IsUnsigned) const {
603  int Cost =
604  TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
605  assert(Cost >= 0 && "TTI should not produce negative costs!");
606  return Cost;
607 }
608 
609 unsigned
611  return TTIImpl->getCostOfKeepingLiveOverCall(Tys);
612 }
613 
615  MemIntrinsicInfo &Info) const {
616  return TTIImpl->getTgtMemIntrinsic(Inst, Info);
617 }
618 
620  return TTIImpl->getAtomicMemIntrinsicMaxElementSize();
621 }
622 
624  IntrinsicInst *Inst, Type *ExpectedType) const {
625  return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
626 }
627 
629  Value *Length,
630  unsigned SrcAlign,
631  unsigned DestAlign) const {
632  return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAlign,
633  DestAlign);
634 }
635 
638  unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const {
639  TTIImpl->getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
640  SrcAlign, DestAlign);
641 }
642 
644  const Function *Callee) const {
645  return TTIImpl->areInlineCompatible(Caller, Callee);
646 }
647 
649  const Function *Caller, const Function *Callee,
651  return TTIImpl->areFunctionArgsABICompatible(Caller, Callee, Args);
652 }
653 
655  Type *Ty) const {
656  return TTIImpl->isIndexedLoadLegal(Mode, Ty);
657 }
658 
660  Type *Ty) const {
661  return TTIImpl->isIndexedStoreLegal(Mode, Ty);
662 }
663 
665  return TTIImpl->getLoadStoreVecRegBitWidth(AS);
666 }
667 
669  return TTIImpl->isLegalToVectorizeLoad(LI);
670 }
671 
673  return TTIImpl->isLegalToVectorizeStore(SI);
674 }
675 
677  unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
678  return TTIImpl->isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
679  AddrSpace);
680 }
681 
683  unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
684  return TTIImpl->isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
685  AddrSpace);
686 }
687 
689  unsigned LoadSize,
690  unsigned ChainSizeInBytes,
691  VectorType *VecTy) const {
692  return TTIImpl->getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
693 }
694 
696  unsigned StoreSize,
697  unsigned ChainSizeInBytes,
698  VectorType *VecTy) const {
699  return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
700 }
701 
703  Type *Ty, ReductionFlags Flags) const {
704  return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags);
705 }
706 
708  return TTIImpl->shouldExpandReduction(II);
709 }
710 
711 int TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
712  return TTIImpl->getInstructionLatency(I);
713 }
714 
716  unsigned Level) {
717  // We don't need a shuffle if we just want to have element 0 in position 0 of
718  // the vector.
719  if (!SI && Level == 0 && IsLeft)
720  return true;
721  else if (!SI)
722  return false;
723 
725 
726  // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
727  // we look at the left or right side.
728  for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2)
729  Mask[i] = val;
730 
731  SmallVector<int, 16> ActualMask = SI->getShuffleMask();
732  return Mask == ActualMask;
733 }
734 
735 namespace {
736 /// Kind of the reduction data.
738  RK_None, /// Not a reduction.
739  RK_Arithmetic, /// Binary reduction data.
740  RK_MinMax, /// Min/max reduction data.
741  RK_UnsignedMinMax, /// Unsigned min/max reduction data.
742 };
743 /// Contains opcode + LHS/RHS parts of the reduction operations.
744 struct ReductionData {
745  ReductionData() = delete;
746  ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS)
747  : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) {
748  assert(Kind != RK_None && "expected binary or min/max reduction only.");
749  }
750  unsigned Opcode = 0;
751  Value *LHS = nullptr;
752  Value *RHS = nullptr;
753  ReductionKind Kind = RK_None;
754  bool hasSameData(ReductionData &RD) const {
755  return Kind == RD.Kind && Opcode == RD.Opcode;
756  }
757 };
758 } // namespace
759 
761  Value *L, *R;
762  if (m_BinOp(m_Value(L), m_Value(R)).match(I))
763  return ReductionData(RK_Arithmetic, I->getOpcode(), L, R);
764  if (auto *SI = dyn_cast<SelectInst>(I)) {
765  if (m_SMin(m_Value(L), m_Value(R)).match(SI) ||
766  m_SMax(m_Value(L), m_Value(R)).match(SI) ||
767  m_OrdFMin(m_Value(L), m_Value(R)).match(SI) ||
768  m_OrdFMax(m_Value(L), m_Value(R)).match(SI) ||
769  m_UnordFMin(m_Value(L), m_Value(R)).match(SI) ||
770  m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) {
771  auto *CI = cast<CmpInst>(SI->getCondition());
772  return ReductionData(RK_MinMax, CI->getOpcode(), L, R);
773  }
774  if (m_UMin(m_Value(L), m_Value(R)).match(SI) ||
775  m_UMax(m_Value(L), m_Value(R)).match(SI)) {
776  auto *CI = cast<CmpInst>(SI->getCondition());
777  return ReductionData(RK_UnsignedMinMax, CI->getOpcode(), L, R);
778  }
779  }
780  return llvm::None;
781 }
782 
784  unsigned Level,
785  unsigned NumLevels) {
786  // Match one level of pairwise operations.
787  // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
788  // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
789  // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
790  // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
791  // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
792  if (!I)
793  return RK_None;
794 
795  assert(I->getType()->isVectorTy() && "Expecting a vector type");
796 
798  if (!RD)
799  return RK_None;
800 
802  if (!LS && Level)
803  return RK_None;
805  if (!RS && Level)
806  return RK_None;
807 
808  // On level 0 we can omit one shufflevector instruction.
809  if (!Level && !RS && !LS)
810  return RK_None;
811 
812  // Shuffle inputs must match.
813  Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr;
814  Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr;
815  Value *NextLevelOp = nullptr;
816  if (NextLevelOpR && NextLevelOpL) {
817  // If we have two shuffles their operands must match.
818  if (NextLevelOpL != NextLevelOpR)
819  return RK_None;
820 
821  NextLevelOp = NextLevelOpL;
822  } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) {
823  // On the first level we can omit the shufflevector <0, undef,...>. So the
824  // input to the other shufflevector <1, undef> must match with one of the
825  // inputs to the current binary operation.
826  // Example:
827  // %NextLevelOpL = shufflevector %R, <1, undef ...>
828  // %BinOp = fadd %NextLevelOpL, %R
829  if (NextLevelOpL && NextLevelOpL != RD->RHS)
830  return RK_None;
831  else if (NextLevelOpR && NextLevelOpR != RD->LHS)
832  return RK_None;
833 
834  NextLevelOp = NextLevelOpL ? RD->RHS : RD->LHS;
835  } else
836  return RK_None;
837 
838  // Check that the next levels binary operation exists and matches with the
839  // current one.
840  if (Level + 1 != NumLevels) {
841  Optional<ReductionData> NextLevelRD =
842  getReductionData(cast<Instruction>(NextLevelOp));
843  if (!NextLevelRD || !RD->hasSameData(*NextLevelRD))
844  return RK_None;
845  }
846 
847  // Shuffle mask for pairwise operation must match.
848  if (matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)) {
849  if (!matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level))
850  return RK_None;
851  } else if (matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)) {
852  if (!matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level))
853  return RK_None;
854  } else {
855  return RK_None;
856  }
857 
858  if (++Level == NumLevels)
859  return RD->Kind;
860 
861  // Match next level.
862  return matchPairwiseReductionAtLevel(cast<Instruction>(NextLevelOp), Level,
863  NumLevels);
864 }
865 
867  unsigned &Opcode, Type *&Ty) {
868  if (!EnableReduxCost)
869  return RK_None;
870 
871  // Need to extract the first element.
872  ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
873  unsigned Idx = ~0u;
874  if (CI)
875  Idx = CI->getZExtValue();
876  if (Idx != 0)
877  return RK_None;
878 
879  auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
880  if (!RdxStart)
881  return RK_None;
883  if (!RD)
884  return RK_None;
885 
886  Type *VecTy = RdxStart->getType();
887  unsigned NumVecElems = VecTy->getVectorNumElements();
888  if (!isPowerOf2_32(NumVecElems))
889  return RK_None;
890 
891  // We look for a sequence of shuffle,shuffle,add triples like the following
892  // that builds a pairwise reduction tree.
893  //
894  // (X0, X1, X2, X3)
895  // (X0 + X1, X2 + X3, undef, undef)
896  // ((X0 + X1) + (X2 + X3), undef, undef, undef)
897  //
898  // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
899  // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
900  // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
901  // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
902  // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
903  // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
904  // <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
905  // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
906  // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
907  // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
908  // %r = extractelement <4 x float> %bin.rdx8, i32 0
909  if (matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) ==
910  RK_None)
911  return RK_None;
912 
913  Opcode = RD->Opcode;
914  Ty = VecTy;
915 
916  return RD->Kind;
917 }
918 
919 static std::pair<Value *, ShuffleVectorInst *>
921  ShuffleVectorInst *S = nullptr;
922 
923  if ((S = dyn_cast<ShuffleVectorInst>(L)))
924  return std::make_pair(R, S);
925 
926  S = dyn_cast<ShuffleVectorInst>(R);
927  return std::make_pair(L, S);
928 }
929 
930 static ReductionKind
932  unsigned &Opcode, Type *&Ty) {
933  if (!EnableReduxCost)
934  return RK_None;
935 
936  // Need to extract the first element.
937  ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
938  unsigned Idx = ~0u;
939  if (CI)
940  Idx = CI->getZExtValue();
941  if (Idx != 0)
942  return RK_None;
943 
944  auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
945  if (!RdxStart)
946  return RK_None;
948  if (!RD)
949  return RK_None;
950 
951  Type *VecTy = ReduxRoot->getOperand(0)->getType();
952  unsigned NumVecElems = VecTy->getVectorNumElements();
953  if (!isPowerOf2_32(NumVecElems))
954  return RK_None;
955 
956  // We look for a sequence of shuffles and adds like the following matching one
957  // fadd, shuffle vector pair at a time.
958  //
959  // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
960  // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
961  // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
962  // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
963  // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
964  // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
965  // %r = extractelement <4 x float> %bin.rdx8, i32 0
966 
967  unsigned MaskStart = 1;
968  Instruction *RdxOp = RdxStart;
969  SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
970  unsigned NumVecElemsRemain = NumVecElems;
971  while (NumVecElemsRemain - 1) {
972  // Check for the right reduction operation.
973  if (!RdxOp)
974  return RK_None;
975  Optional<ReductionData> RDLevel = getReductionData(RdxOp);
976  if (!RDLevel || !RDLevel->hasSameData(*RD))
977  return RK_None;
978 
979  Value *NextRdxOp;
980  ShuffleVectorInst *Shuffle;
981  std::tie(NextRdxOp, Shuffle) =
982  getShuffleAndOtherOprd(RDLevel->LHS, RDLevel->RHS);
983 
984  // Check the current reduction operation and the shuffle use the same value.
985  if (Shuffle == nullptr)
986  return RK_None;
987  if (Shuffle->getOperand(0) != NextRdxOp)
988  return RK_None;
989 
990  // Check that shuffle masks matches.
991  for (unsigned j = 0; j != MaskStart; ++j)
992  ShuffleMask[j] = MaskStart + j;
993  // Fill the rest of the mask with -1 for undef.
994  std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);
995 
997  if (ShuffleMask != Mask)
998  return RK_None;
999 
1000  RdxOp = dyn_cast<Instruction>(NextRdxOp);
1001  NumVecElemsRemain /= 2;
1002  MaskStart *= 2;
1003  }
1004 
1005  Opcode = RD->Opcode;
1006  Ty = VecTy;
1007  return RD->Kind;
1008 }
1009 
1010 int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
1011  switch (I->getOpcode()) {
1012  case Instruction::GetElementPtr:
1013  return getUserCost(I);
1014 
1015  case Instruction::Ret:
1016  case Instruction::PHI:
1017  case Instruction::Br: {
1018  return getCFInstrCost(I->getOpcode());
1019  }
1020  case Instruction::Add:
1021  case Instruction::FAdd:
1022  case Instruction::Sub:
1023  case Instruction::FSub:
1024  case Instruction::Mul:
1025  case Instruction::FMul:
1026  case Instruction::UDiv:
1027  case Instruction::SDiv:
1028  case Instruction::FDiv:
1029  case Instruction::URem:
1030  case Instruction::SRem:
1031  case Instruction::FRem:
1032  case Instruction::Shl:
1033  case Instruction::LShr:
1034  case Instruction::AShr:
1035  case Instruction::And:
1036  case Instruction::Or:
1037  case Instruction::Xor: {
1040  Op1VK = getOperandInfo(I->getOperand(0), Op1VP);
1041  Op2VK = getOperandInfo(I->getOperand(1), Op2VP);
1043  return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
1044  Op1VP, Op2VP, Operands);
1045  }
1046  case Instruction::Select: {
1047  const SelectInst *SI = cast<SelectInst>(I);
1048  Type *CondTy = SI->getCondition()->getType();
1049  return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I);
1050  }
1051  case Instruction::ICmp:
1052  case Instruction::FCmp: {
1053  Type *ValTy = I->getOperand(0)->getType();
1054  return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I);
1055  }
1056  case Instruction::Store: {
1057  const StoreInst *SI = cast<StoreInst>(I);
1058  Type *ValTy = SI->getValueOperand()->getType();
1059  return getMemoryOpCost(I->getOpcode(), ValTy,
1060  SI->getAlignment(),
1061  SI->getPointerAddressSpace(), I);
1062  }
1063  case Instruction::Load: {
1064  const LoadInst *LI = cast<LoadInst>(I);
1065  return getMemoryOpCost(I->getOpcode(), I->getType(),
1066  LI->getAlignment(),
1067  LI->getPointerAddressSpace(), I);
1068  }
1069  case Instruction::ZExt:
1070  case Instruction::SExt:
1071  case Instruction::FPToUI:
1072  case Instruction::FPToSI:
1073  case Instruction::FPExt:
1074  case Instruction::PtrToInt:
1075  case Instruction::IntToPtr:
1076  case Instruction::SIToFP:
1077  case Instruction::UIToFP:
1078  case Instruction::Trunc:
1079  case Instruction::FPTrunc:
1080  case Instruction::BitCast:
1081  case Instruction::AddrSpaceCast: {
1082  Type *SrcTy = I->getOperand(0)->getType();
1083  return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I);
1084  }
1085  case Instruction::ExtractElement: {
1086  const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
1088  unsigned Idx = -1;
1089  if (CI)
1090  Idx = CI->getZExtValue();
1091 
1092  // Try to match a reduction sequence (series of shufflevector and vector
1093  // adds followed by a extractelement).
1094  unsigned ReduxOpCode;
1095  Type *ReduxType;
1096 
1097  switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) {
1098  case RK_Arithmetic:
1099  return getArithmeticReductionCost(ReduxOpCode, ReduxType,
1100  /*IsPairwiseForm=*/false);
1101  case RK_MinMax:
1102  return getMinMaxReductionCost(
1103  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1104  /*IsPairwiseForm=*/false, /*IsUnsigned=*/false);
1105  case RK_UnsignedMinMax:
1106  return getMinMaxReductionCost(
1107  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1108  /*IsPairwiseForm=*/false, /*IsUnsigned=*/true);
1109  case RK_None:
1110  break;
1111  }
1112 
1113  switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) {
1114  case RK_Arithmetic:
1115  return getArithmeticReductionCost(ReduxOpCode, ReduxType,
1116  /*IsPairwiseForm=*/true);
1117  case RK_MinMax:
1118  return getMinMaxReductionCost(
1119  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1120  /*IsPairwiseForm=*/true, /*IsUnsigned=*/false);
1121  case RK_UnsignedMinMax:
1122  return getMinMaxReductionCost(
1123  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1124  /*IsPairwiseForm=*/true, /*IsUnsigned=*/true);
1125  case RK_None:
1126  break;
1127  }
1128 
1129  return getVectorInstrCost(I->getOpcode(),
1130  EEI->getOperand(0)->getType(), Idx);
1131  }
1132  case Instruction::InsertElement: {
1133  const InsertElementInst * IE = cast<InsertElementInst>(I);
1134  ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
1135  unsigned Idx = -1;
1136  if (CI)
1137  Idx = CI->getZExtValue();
1138  return getVectorInstrCost(I->getOpcode(),
1139  IE->getType(), Idx);
1140  }
1141  case Instruction::ShuffleVector: {
1142  const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
1143  Type *Ty = Shuffle->getType();
1144  Type *SrcTy = Shuffle->getOperand(0)->getType();
1145 
1146  // TODO: Identify and add costs for insert subvector, etc.
1147  int SubIndex;
1148  if (Shuffle->isExtractSubvectorMask(SubIndex))
1149  return TTIImpl->getShuffleCost(SK_ExtractSubvector, SrcTy, SubIndex, Ty);
1150 
1151  if (Shuffle->changesLength())
1152  return -1;
1153 
1154  if (Shuffle->isIdentity())
1155  return 0;
1156 
1157  if (Shuffle->isReverse())
1158  return TTIImpl->getShuffleCost(SK_Reverse, Ty, 0, nullptr);
1159 
1160  if (Shuffle->isSelect())
1161  return TTIImpl->getShuffleCost(SK_Select, Ty, 0, nullptr);
1162 
1163  if (Shuffle->isTranspose())
1164  return TTIImpl->getShuffleCost(SK_Transpose, Ty, 0, nullptr);
1165 
1166  if (Shuffle->isZeroEltSplat())
1167  return TTIImpl->getShuffleCost(SK_Broadcast, Ty, 0, nullptr);
1168 
1169  if (Shuffle->isSingleSource())
1170  return TTIImpl->getShuffleCost(SK_PermuteSingleSrc, Ty, 0, nullptr);
1171 
1172  return TTIImpl->getShuffleCost(SK_PermuteTwoSrc, Ty, 0, nullptr);
1173  }
1174  case Instruction::Call:
1175  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1176  SmallVector<Value *, 4> Args(II->arg_operands());
1177 
1178  FastMathFlags FMF;
1179  if (auto *FPMO = dyn_cast<FPMathOperator>(II))
1180  FMF = FPMO->getFastMathFlags();
1181 
1182  return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
1183  Args, FMF);
1184  }
1185  return -1;
1186  default:
1187  // We don't have any information on this instruction.
1188  return -1;
1189  }
1190 }
1191 
1193 
1194 TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
1195 
1197  std::function<Result(const Function &)> TTICallback)
1198  : TTICallback(std::move(TTICallback)) {}
1199 
1202  return TTICallback(F);
1203 }
1204 
1205 AnalysisKey TargetIRAnalysis::Key;
1206 
1207 TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) {
1208  return Result(F.getParent()->getDataLayout());
1209 }
1210 
1211 // Register the basic pass.
1213  "Target Transform Information", false, true)
1215 
1216 void TargetTransformInfoWrapperPass::anchor() {}
1217 
1219  : ImmutablePass(ID) {
1222 }
1223 
1225  TargetIRAnalysis TIRA)
1226  : ImmutablePass(ID), TIRA(std::move(TIRA)) {
1229 }
1230 
1232  FunctionAnalysisManager DummyFAM;
1233  TTI = TIRA.run(F, DummyFAM);
1234  return *TTI;
1235 }
1236 
1237 ImmutablePass *
1239  return new TargetTransformInfoWrapperPass(std::move(TIRA));
1240 }
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
uint64_t CallInst * C
Value * getValueOperand()
Definition: Instructions.h:409
bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:70
static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, unsigned Level)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
LLVMContext & Context
static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot, unsigned &Opcode, Type *&Ty)
TargetTransformInfo & operator=(TargetTransformInfo &&RHS)
SI Whole Quad Mode
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:952
This class represents lattice values for constants.
Definition: AllocatorList.h:23
int getCallCost(FunctionType *FTy, int NumArgs=-1, const User *U=nullptr) const
Estimate the cost of a function call when lowered.
bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isFPVectorizationPotentiallyUnsafe() const
Indicate that it is potentially unsafe to automatically vectorize floating-point operations because t...
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
const Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool supportsEfficientVectorElementLoadStore() const
If target has efficient vector element load/store instructions, it can return true here so that inser...
static ReductionKind matchPairwiseReductionAtLevel(Instruction *I, unsigned Level, unsigned NumLevels)
bool useColdCCForColdCall(Function &F) const
Return true if the input function which is cold at all call sites, should use coldcc calling conventi...
The main scalar evolution driver.
TargetTransformInfoWrapperPass()
We must provide a default constructor for the pass but it should never be used.
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value *> Operands) const
Estimate the cost of a GEP operation when lowered.
MemIndexedMode
The type of load/store indexing.
unsigned getNumberOfRegisters(bool Vector) const
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
bool isReverse() const
Return true if this shuffle swaps the order of elements from exactly one source vector.
Analysis pass providing the TargetTransformInfo.
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAlign, unsigned DestAlign) const
This instruction constructs a fixed permutation of two input vectors.
int getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type *> Tys) const
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
F(f)
An instruction for reading from memory.
Definition: Instructions.h:167
MaxMin_match< FCmpInst, LHS, RHS, ufmax_pred_ty > m_UnordFMax(const LHS &L, const RHS &R)
Match an &#39;unordered&#39; floating point maximum function.
bool canMacroFuseCmp() const
Return true if the target can fuse a compare and branch.
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
bool enableAggressiveInterleaving(bool LoopHasReductions) const
Don&#39;t restrict interleaved unrolling to small loops.
llvm::Optional< unsigned > getCacheAssociativity(CacheLevel Level) const
bool isSingleSource() const
Return true if this shuffle chooses elements from exactly one source vector without changing the leng...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
This file provides helpers for the implementation of a TargetTransformInfo-conforming class...
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
unsigned getJumpBufSize() const
Returns the target&#39;s jmp_buf size in bytes.
unsigned getMaxInterleaveFactor(unsigned VF) const
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
bool enableMaskedInterleavedAccessVectorization() const
Enable matching of interleaved access groups that contain predicated accesses or gaps and therefore v...
bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Definition: BitVector.h:937
int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy=nullptr) const
Estimate the cost of a specific operation when lowered.
bool hasBranchDivergence() const
Return true if branch divergence exists.
This class represents the LLVM &#39;select&#39; instruction.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize) const
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr) const
static AnalysisKey * ID()
Returns an opaque, unique ID for this analysis type.
Definition: PassManager.h:405
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
MaxMin_match< FCmpInst, LHS, RHS, ufmin_pred_ty > m_UnordFMin(const LHS &L, const RHS &R)
Match an &#39;unordered&#39; floating point minimum function.
bool isLegalMaskedScatter(Type *DataType) const
Return true if the target supports masked scatter.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:418
Class to represent function types.
Definition: DerivedTypes.h:102
unsigned getMinVectorRegisterBitWidth() const
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it&#39;s free to truncate a value of type Ty1 to type Ty2.
PopcntSupportKind
Flags indicating the kind of support for population count.
bool hasDivRemOp(Type *DataType, bool IsSigned) const
Return true if the target has a unified operation to calculate division and remainder.
MaxMin_match< FCmpInst, LHS, RHS, ofmin_pred_ty > m_OrdFMin(const LHS &L, const RHS &R)
Match an &#39;ordered&#39; floating point minimum function.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
Selects elements from the corresponding lane of either source operand.
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef< Value *> Args, FastMathFlags FMF, unsigned VF=1) const
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
An instruction for storing to memory.
Definition: Instructions.h:320
void initializeTargetTransformInfoWrapperPassPass(PassRegistry &)
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
Return true if the given instruction (assumed to be a memory access instruction) has a volatile varia...
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index=-1) const
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
bool isLegalToVectorizeLoad(LoadInst *LI) const
Reverse the order of the vector.
VectorType * getType() const
Overload to return most specific vector type.
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
unsigned getJumpBufAlignment() const
Returns the target&#39;s jmp_buf alignment in bytes.
Value * getOperand(unsigned i) const
Definition: User.h:169
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
int getExtCost(const Instruction *I, const Value *Src) const
Estimate the cost of a EXT operation when lowered.
static bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
ExtractSubvector Index indicates start offset.
static cl::opt< bool > EnableReduxCost("costmodel-reduxcost", cl::init(false), cl::Hidden, cl::desc("Recognize reduction patterns."))
unsigned getMaxPrefetchIterationsAhead() const
bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
This instruction inserts a single (scalar) element into a VectorType value.
int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment) const
If not nullptr, enable inline expansion of memcmp.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
Wrapper pass for TargetTransformInfo.
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
bool isLegalToVectorizeStore(StoreInst *SI) const
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
unsigned getRegisterBitWidth(bool Vector) const
Flags describing the kind of vector reduction.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
bool isAlwaysUniform(const Value *V) const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
unsigned getNumberOfParts(Type *Tp) const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
bool isIdentity() const
Return true if this shuffle chooses elements from exactly one source vector without lane crossings an...
AMDGPU Lower Kernel Arguments
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target...
TargetIRAnalysis()
Default construct a target IR analysis.
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy=nullptr, const Instruction *I=nullptr) const
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
bool isLegalMaskedStore(Type *DataType) const
Return true if the target supports masked load.
Merge elements from two source vectors into one with any shuffle mask.
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:73
const Value * getCondition() const
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, SmallPtrSetImpl< Argument *> &Args) const
static std::pair< Value *, ShuffleVectorInst * > getShuffleAndOtherOprd(Value *L, Value *R)
bool isLegalMaskedCompressStore(Type *DataType) const
Return true if the target supports masked compress store.
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:33
OperandValueProperties
Additional properties of an operand&#39;s values.
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
ReductionKind
Kind of the reduction data.
int getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type...
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
ImmutablePass class - This class is used to provide information that does not need to be run...
Definition: Pass.h:255
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C1.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
int getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) const
Calculate the cost of performing a vector reduction.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
Module.h This file contains the declarations for the Module class.
unsigned getCostOfKeepingLiveOverCall(ArrayRef< Type *> Tys) const
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
int getIntImmCost(const APInt &Imm, Type *Ty) const
Return the expected cost of materializing for the given integer immediate of the specified type...
bool isLegalMaskedGather(Type *DataType) const
Return true if the target supports masked gather.
AddressSpace
Definition: NVPTXBaseInfo.h:21
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const
bool changesLength() const
Return true if this shuffle returns a vector with a different number of elements than its source vect...
bool isSourceOfDivergence(const Value *V) const
Returns whether V is a source of divergence.
bool LSRWithInstrQueries() const
Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...
static ReductionKind matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, unsigned &Opcode, Type *&Ty)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:493
bool isZeroEltSplat() const
Return true if all elements of this shuffle are the same value as the first element of exactly one so...
Class to represent vector types.
Definition: DerivedTypes.h:424
Class for arbitrary precision integers.
Definition: APInt.h:69
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index=-1) const
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
Result run(const Function &F, FunctionAnalysisManager &)
bool shouldExpandReduction(const IntrinsicInst *II) const
int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const
Return the expected cost for the given integer when optimising for size.
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> ParamTys, const User *U=nullptr) const
Estimate the cost of an intrinsic when lowered.
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, unsigned Alignment=1, bool *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isTranspose() const
Return true if this shuffle transposes the elements of its inputs without changing the length of the ...
int getMemcpyCost(const Instruction *I) const
bool isLegalMaskedLoad(Type *DataType) const
Return true if the target supports masked store.
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
unsigned getAtomicMemIntrinsicMaxElementSize() const
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:240
This class represents an analyzed expression in the program.
int getUserCost(const User *U, ArrayRef< const Value *> Operands) const
Estimate the cost of a given IR user when lowered.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:465
Parameters that control the generic loop unrolling transformation.
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
#define I(x, y, z)
Definition: MD5.cpp:58
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
const MemCmpExpansionOptions * enableMemCmpExpansion(bool IsZeroCmp) const
iterator_range< value_op_iterator > operand_values()
Definition: User.h:261
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
This instruction extracts a single (scalar) element from a VectorType value.
int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index=0, Type *SubTp=nullptr) const
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>()) const
This is an approximation of reciprocal throughput of a math/logic op.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:365
bool isLegalMaskedExpandLoad(Type *DataType) const
Return true if the target supports masked expand load.
bool shouldMaximizeVectorBandwidth(bool OptSize) const
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:290
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
Multiway switch.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
TargetTransformInfo Result
MaxMin_match< FCmpInst, LHS, RHS, ofmax_pred_ty > m_OrdFMax(const LHS &L, const RHS &R)
Match an &#39;ordered&#39; floating point maximum function.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
LLVM Value Representation.
Definition: Value.h:72
unsigned getInliningThresholdMultiplier() const
bool useReductionIntrinsic(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
bool enableInterleavedAccessVectorization() const
Enable matching of interleaved access groups.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
Broadcast element 0 to all other elements.
static Optional< ReductionData > getReductionData(Instruction *I)
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type *> &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const
int getCFInstrCost(unsigned Opcode) const
bool shouldFavorBackedgeIndex(const Loop *L) const
Return true if LSR should make efforts to generate indexed addressing modes that operate across loop ...
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
Return true if it is faster to check if a floating-point value is NaN (or not-NaN) versus a compariso...
bool prefersVectorizedAddressing() const
Return true if target doesn&#39;t mind addresses in vectors.
unsigned getOperandsScalarizationOverhead(ArrayRef< const Value *> Args, unsigned VF) const
print Print MemDeps of function
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP) const
Get target-customized preferences for the generic loop unrolling transformation.
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:159
OperandValueKind
Additional information about an operand&#39;s possible values.
A container for analyses that lazily runs them and caches their results.
unsigned getMinimumVF(unsigned ElemWidth) const
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
This pass exposes codegen information to IR-level passes.
TargetTransformInfo & getTTI(const Function &F)
CacheLevel
The possible cache levels.
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr) const
VectorType * getType() const
Overload to return most specific vector type.
unsigned getFlatAddressSpace() const
Returns the address space ID for a target&#39;s &#39;flat&#39; address space.
Information about a load/store intrinsic defined by the target.
static OperandValueKind getOperandInfo(Value *V, OperandValueProperties &OpProps)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:70
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm, bool IsUnsigned) const
llvm::Optional< unsigned > getCacheSize(CacheLevel Level) const
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Shuffle elements of single source vector with any shuffle mask.