LLVM  10.0.0svn
TargetTransformInfo.cpp
Go to the documentation of this file.
1 //===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
11 #include "llvm/IR/CallSite.h"
12 #include "llvm/IR/DataLayout.h"
13 #include "llvm/IR/Instruction.h"
14 #include "llvm/IR/Instructions.h"
15 #include "llvm/IR/IntrinsicInst.h"
16 #include "llvm/IR/Module.h"
17 #include "llvm/IR/Operator.h"
18 #include "llvm/IR/PatternMatch.h"
21 #include "llvm/Analysis/CFG.h"
23 #include <utility>
24 
25 using namespace llvm;
26 using namespace PatternMatch;
27 
28 #define DEBUG_TYPE "tti"
29 
30 static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
31  cl::Hidden,
32  cl::desc("Recognize reduction patterns."));
33 
34 namespace {
35 /// No-op implementation of the TTI interface using the utility base
36 /// classes.
37 ///
38 /// This is used when no target specific information is available.
39 struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> {
40  explicit NoTTIImpl(const DataLayout &DL)
41  : TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {}
42 };
43 }
44 
46  // If the loop has irreducible control flow, it can not be converted to
47  // Hardware loop.
48  LoopBlocksRPO RPOT(L);
49  RPOT.perform(&LI);
50  if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
51  return false;
52  return true;
53 }
54 
56  LoopInfo &LI, DominatorTree &DT,
57  bool ForceNestedLoop,
58  bool ForceHardwareLoopPHI) {
59  SmallVector<BasicBlock *, 4> ExitingBlocks;
60  L->getExitingBlocks(ExitingBlocks);
61 
62  for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
63  IE = ExitingBlocks.end();
64  I != IE; ++I) {
65  BasicBlock *BB = *I;
66 
67  // If we pass the updated counter back through a phi, we need to know
68  // which latch the updated value will be coming from.
69  if (!L->isLoopLatch(BB)) {
70  if (ForceHardwareLoopPHI || CounterInReg)
71  continue;
72  }
73 
74  const SCEV *EC = SE.getExitCount(L, BB);
75  if (isa<SCEVCouldNotCompute>(EC))
76  continue;
77  if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
78  if (ConstEC->getValue()->isZero())
79  continue;
80  } else if (!SE.isLoopInvariant(EC, L))
81  continue;
82 
83  if (SE.getTypeSizeInBits(EC->getType()) > CountType->getBitWidth())
84  continue;
85 
86  // If this exiting block is contained in a nested loop, it is not eligible
87  // for insertion of the branch-and-decrement since the inner loop would
88  // end up messing up the value in the CTR.
89  if (!IsNestingLegal && LI.getLoopFor(BB) != L && !ForceNestedLoop)
90  continue;
91 
92  // We now have a loop-invariant count of loop iterations (which is not the
93  // constant zero) for which we know that this loop will not exit via this
94  // existing block.
95 
96  // We need to make sure that this block will run on every loop iteration.
97  // For this to be true, we must dominate all blocks with backedges. Such
98  // blocks are in-loop predecessors to the header block.
99  bool NotAlways = false;
100  for (pred_iterator PI = pred_begin(L->getHeader()),
101  PIE = pred_end(L->getHeader());
102  PI != PIE; ++PI) {
103  if (!L->contains(*PI))
104  continue;
105 
106  if (!DT.dominates(*I, *PI)) {
107  NotAlways = true;
108  break;
109  }
110  }
111 
112  if (NotAlways)
113  continue;
114 
115  // Make sure this blocks ends with a conditional branch.
116  Instruction *TI = BB->getTerminator();
117  if (!TI)
118  continue;
119 
120  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
121  if (!BI->isConditional())
122  continue;
123 
124  ExitBranch = BI;
125  } else
126  continue;
127 
128  // Note that this block may not be the loop latch block, even if the loop
129  // has a latch block.
130  ExitBlock = *I;
131  ExitCount = EC;
132  break;
133  }
134 
135  if (!ExitBlock)
136  return false;
137  return true;
138 }
139 
141  : TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {}
142 
144 
146  : TTIImpl(std::move(Arg.TTIImpl)) {}
147 
149  TTIImpl = std::move(RHS.TTIImpl);
150  return *this;
151 }
152 
154  Type *OpTy) const {
155  int Cost = TTIImpl->getOperationCost(Opcode, Ty, OpTy);
156  assert(Cost >= 0 && "TTI should not produce negative costs!");
157  return Cost;
158 }
159 
161  const User *U) const {
162  int Cost = TTIImpl->getCallCost(FTy, NumArgs, U);
163  assert(Cost >= 0 && "TTI should not produce negative costs!");
164  return Cost;
165 }
166 
169  const User *U) const {
170  int Cost = TTIImpl->getCallCost(F, Arguments, U);
171  assert(Cost >= 0 && "TTI should not produce negative costs!");
172  return Cost;
173 }
174 
176  return TTIImpl->getInliningThresholdMultiplier();
177 }
178 
180  return TTIImpl->getInlinerVectorBonusPercent();
181 }
182 
183 int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
185  return TTIImpl->getGEPCost(PointeeType, Ptr, Operands);
186 }
187 
189  const Value *Src) const {
190  return TTIImpl->getExtCost(I, Src);
191 }
192 
195  const User *U) const {
196  int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments, U);
197  assert(Cost >= 0 && "TTI should not produce negative costs!");
198  return Cost;
199 }
200 
201 unsigned
203  unsigned &JTSize) const {
204  return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize);
205 }
206 
209  int Cost = TTIImpl->getUserCost(U, Operands);
210  assert(Cost >= 0 && "TTI should not produce negative costs!");
211  return Cost;
212 }
213 
215  return TTIImpl->hasBranchDivergence();
216 }
217 
219  return TTIImpl->isSourceOfDivergence(V);
220 }
221 
223  return TTIImpl->isAlwaysUniform(V);
224 }
225 
227  return TTIImpl->getFlatAddressSpace();
228 }
229 
231  SmallVectorImpl<int> &OpIndexes, Intrinsic::ID IID) const {
232  return TTIImpl->collectFlatAddressOperands(OpIndexes, IID);
233 }
234 
236  IntrinsicInst *II, Value *OldV, Value *NewV) const {
237  return TTIImpl->rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
238 }
239 
241  return TTIImpl->isLoweredToCall(F);
242 }
243 
245  Loop *L, ScalarEvolution &SE, AssumptionCache &AC,
246  TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const {
247  return TTIImpl->isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
248 }
249 
251  Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const {
252  return TTIImpl->getUnrollingPreferences(L, SE, UP);
253 }
254 
256  return TTIImpl->isLegalAddImmediate(Imm);
257 }
258 
260  return TTIImpl->isLegalICmpImmediate(Imm);
261 }
262 
264  int64_t BaseOffset,
265  bool HasBaseReg,
266  int64_t Scale,
267  unsigned AddrSpace,
268  Instruction *I) const {
269  return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
270  Scale, AddrSpace, I);
271 }
272 
274  return TTIImpl->isLSRCostLess(C1, C2);
275 }
276 
278  return TTIImpl->canMacroFuseCmp();
279 }
280 
282  ScalarEvolution *SE, LoopInfo *LI,
284  TargetLibraryInfo *LibInfo) const {
285  return TTIImpl->canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
286 }
287 
289  return TTIImpl->shouldFavorPostInc();
290 }
291 
293  return TTIImpl->shouldFavorBackedgeIndex(L);
294 }
295 
297  return TTIImpl->isLegalMaskedStore(DataType);
298 }
299 
301  return TTIImpl->isLegalMaskedLoad(DataType);
302 }
303 
305  llvm::Align Alignment) const {
306  return TTIImpl->isLegalNTStore(DataType, Alignment);
307 }
308 
310  llvm::Align Alignment) const {
311  return TTIImpl->isLegalNTLoad(DataType, Alignment);
312 }
313 
315  return TTIImpl->isLegalMaskedGather(DataType);
316 }
317 
319  return TTIImpl->isLegalMaskedScatter(DataType);
320 }
321 
323  return TTIImpl->isLegalMaskedCompressStore(DataType);
324 }
325 
327  return TTIImpl->isLegalMaskedExpandLoad(DataType);
328 }
329 
330 bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
331  return TTIImpl->hasDivRemOp(DataType, IsSigned);
332 }
333 
335  unsigned AddrSpace) const {
336  return TTIImpl->hasVolatileVariant(I, AddrSpace);
337 }
338 
340  return TTIImpl->prefersVectorizedAddressing();
341 }
342 
344  int64_t BaseOffset,
345  bool HasBaseReg,
346  int64_t Scale,
347  unsigned AddrSpace) const {
348  int Cost = TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
349  Scale, AddrSpace);
350  assert(Cost >= 0 && "TTI should not produce negative costs!");
351  return Cost;
352 }
353 
355  return TTIImpl->LSRWithInstrQueries();
356 }
357 
359  return TTIImpl->isTruncateFree(Ty1, Ty2);
360 }
361 
363  return TTIImpl->isProfitableToHoist(I);
364 }
365 
366 bool TargetTransformInfo::useAA() const { return TTIImpl->useAA(); }
367 
369  return TTIImpl->isTypeLegal(Ty);
370 }
371 
373  return TTIImpl->shouldBuildLookupTables();
374 }
376  return TTIImpl->shouldBuildLookupTablesForConstant(C);
377 }
378 
380  return TTIImpl->useColdCCForColdCall(F);
381 }
382 
383 unsigned TargetTransformInfo::
384 getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const {
385  return TTIImpl->getScalarizationOverhead(Ty, Insert, Extract);
386 }
387 
388 unsigned TargetTransformInfo::
390  unsigned VF) const {
391  return TTIImpl->getOperandsScalarizationOverhead(Args, VF);
392 }
393 
395  return TTIImpl->supportsEfficientVectorElementLoadStore();
396 }
397 
398 bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const {
399  return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
400 }
401 
403 TargetTransformInfo::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
404  return TTIImpl->enableMemCmpExpansion(OptSize, IsZeroCmp);
405 }
406 
408  return TTIImpl->enableInterleavedAccessVectorization();
409 }
410 
412  return TTIImpl->enableMaskedInterleavedAccessVectorization();
413 }
414 
416  return TTIImpl->isFPVectorizationPotentiallyUnsafe();
417 }
418 
420  unsigned BitWidth,
421  unsigned AddressSpace,
422  unsigned Alignment,
423  bool *Fast) const {
424  return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
425  Alignment, Fast);
426 }
427 
429 TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
430  return TTIImpl->getPopcntSupport(IntTyWidthInBit);
431 }
432 
434  return TTIImpl->haveFastSqrt(Ty);
435 }
436 
438  return TTIImpl->isFCmpOrdCheaperThanFCmpZero(Ty);
439 }
440 
442  int Cost = TTIImpl->getFPOpCost(Ty);
443  assert(Cost >= 0 && "TTI should not produce negative costs!");
444  return Cost;
445 }
446 
447 int TargetTransformInfo::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
448  const APInt &Imm,
449  Type *Ty) const {
450  int Cost = TTIImpl->getIntImmCodeSizeCost(Opcode, Idx, Imm, Ty);
451  assert(Cost >= 0 && "TTI should not produce negative costs!");
452  return Cost;
453 }
454 
455 int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
456  int Cost = TTIImpl->getIntImmCost(Imm, Ty);
457  assert(Cost >= 0 && "TTI should not produce negative costs!");
458  return Cost;
459 }
460 
461 int TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx,
462  const APInt &Imm, Type *Ty) const {
463  int Cost = TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty);
464  assert(Cost >= 0 && "TTI should not produce negative costs!");
465  return Cost;
466 }
467 
469  const APInt &Imm, Type *Ty) const {
470  int Cost = TTIImpl->getIntImmCost(IID, Idx, Imm, Ty);
471  assert(Cost >= 0 && "TTI should not produce negative costs!");
472  return Cost;
473 }
474 
476  return TTIImpl->getNumberOfRegisters(Vector);
477 }
478 
480  return TTIImpl->getRegisterBitWidth(Vector);
481 }
482 
484  return TTIImpl->getMinVectorRegisterBitWidth();
485 }
486 
488  return TTIImpl->shouldMaximizeVectorBandwidth(OptSize);
489 }
490 
491 unsigned TargetTransformInfo::getMinimumVF(unsigned ElemWidth) const {
492  return TTIImpl->getMinimumVF(ElemWidth);
493 }
494 
496  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
497  return TTIImpl->shouldConsiderAddressTypePromotion(
498  I, AllowPromotionWithoutCommonHeader);
499 }
500 
502  return TTIImpl->getCacheLineSize();
503 }
504 
506  const {
507  return TTIImpl->getCacheSize(Level);
508 }
509 
511  CacheLevel Level) const {
512  return TTIImpl->getCacheAssociativity(Level);
513 }
514 
516  return TTIImpl->getPrefetchDistance();
517 }
518 
520  return TTIImpl->getMinPrefetchStride();
521 }
522 
524  return TTIImpl->getMaxPrefetchIterationsAhead();
525 }
526 
527 unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
528  return TTIImpl->getMaxInterleaveFactor(VF);
529 }
530 
533  OperandValueKind OpInfo = OK_AnyValue;
534  OpProps = OP_None;
535 
536  if (auto *CI = dyn_cast<ConstantInt>(V)) {
537  if (CI->getValue().isPowerOf2())
538  OpProps = OP_PowerOf2;
540  }
541 
542  // A broadcast shuffle creates a uniform value.
543  // TODO: Add support for non-zero index broadcasts.
544  // TODO: Add support for different source vector width.
545  if (auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V))
546  if (ShuffleInst->isZeroEltSplat())
547  OpInfo = OK_UniformValue;
548 
549  const Value *Splat = getSplatValue(V);
550 
551  // Check for a splat of a constant or for a non uniform vector of constants
552  // and check if the constant(s) are all powers of two.
553  if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
555  if (Splat) {
556  OpInfo = OK_UniformConstantValue;
557  if (auto *CI = dyn_cast<ConstantInt>(Splat))
558  if (CI->getValue().isPowerOf2())
559  OpProps = OP_PowerOf2;
560  } else if (auto *CDS = dyn_cast<ConstantDataSequential>(V)) {
561  OpProps = OP_PowerOf2;
562  for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
563  if (auto *CI = dyn_cast<ConstantInt>(CDS->getElementAsConstant(I)))
564  if (CI->getValue().isPowerOf2())
565  continue;
566  OpProps = OP_None;
567  break;
568  }
569  }
570  }
571 
572  // Check for a splat of a uniform value. This is not loop aware, so return
573  // true only for the obviously uniform cases (argument, globalvalue)
574  if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
575  OpInfo = OK_UniformValue;
576 
577  return OpInfo;
578 }
579 
581  unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
582  OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
583  OperandValueProperties Opd2PropInfo,
585  int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
586  Opd1PropInfo, Opd2PropInfo, Args);
587  assert(Cost >= 0 && "TTI should not produce negative costs!");
588  return Cost;
589 }
590 
592  Type *SubTp) const {
593  int Cost = TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp);
594  assert(Cost >= 0 && "TTI should not produce negative costs!");
595  return Cost;
596 }
597 
598 int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst,
599  Type *Src, const Instruction *I) const {
600  assert ((I == nullptr || I->getOpcode() == Opcode) &&
601  "Opcode should reflect passed instruction.");
602  int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, I);
603  assert(Cost >= 0 && "TTI should not produce negative costs!");
604  return Cost;
605 }
606 
608  VectorType *VecTy,
609  unsigned Index) const {
610  int Cost = TTIImpl->getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
611  assert(Cost >= 0 && "TTI should not produce negative costs!");
612  return Cost;
613 }
614 
615 int TargetTransformInfo::getCFInstrCost(unsigned Opcode) const {
616  int Cost = TTIImpl->getCFInstrCost(Opcode);
617  assert(Cost >= 0 && "TTI should not produce negative costs!");
618  return Cost;
619 }
620 
621 int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
622  Type *CondTy, const Instruction *I) const {
623  assert ((I == nullptr || I->getOpcode() == Opcode) &&
624  "Opcode should reflect passed instruction.");
625  int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
626  assert(Cost >= 0 && "TTI should not produce negative costs!");
627  return Cost;
628 }
629 
631  unsigned Index) const {
632  int Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index);
633  assert(Cost >= 0 && "TTI should not produce negative costs!");
634  return Cost;
635 }
636 
637 int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
638  unsigned Alignment,
639  unsigned AddressSpace,
640  const Instruction *I) const {
641  assert ((I == nullptr || I->getOpcode() == Opcode) &&
642  "Opcode should reflect passed instruction.");
643  int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
644  assert(Cost >= 0 && "TTI should not produce negative costs!");
645  return Cost;
646 }
647 
649  unsigned Alignment,
650  unsigned AddressSpace) const {
651  int Cost =
652  TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
653  assert(Cost >= 0 && "TTI should not produce negative costs!");
654  return Cost;
655 }
656 
658  Value *Ptr, bool VariableMask,
659  unsigned Alignment) const {
660  int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
661  Alignment);
662  assert(Cost >= 0 && "TTI should not produce negative costs!");
663  return Cost;
664 }
665 
667  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
668  unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond,
669  bool UseMaskForGaps) const {
670  int Cost = TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
671  Alignment, AddressSpace,
672  UseMaskForCond,
673  UseMaskForGaps);
674  assert(Cost >= 0 && "TTI should not produce negative costs!");
675  return Cost;
676 }
677 
680  unsigned ScalarizationCostPassed) const {
681  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
682  ScalarizationCostPassed);
683  assert(Cost >= 0 && "TTI should not produce negative costs!");
684  return Cost;
685 }
686 
688  ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) const {
689  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
690  assert(Cost >= 0 && "TTI should not produce negative costs!");
691  return Cost;
692 }
693 
695  ArrayRef<Type *> Tys) const {
696  int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys);
697  assert(Cost >= 0 && "TTI should not produce negative costs!");
698  return Cost;
699 }
700 
702  return TTIImpl->getNumberOfParts(Tp);
703 }
704 
706  ScalarEvolution *SE,
707  const SCEV *Ptr) const {
708  int Cost = TTIImpl->getAddressComputationCost(Tp, SE, Ptr);
709  assert(Cost >= 0 && "TTI should not produce negative costs!");
710  return Cost;
711 }
712 
714  int Cost = TTIImpl->getMemcpyCost(I);
715  assert(Cost >= 0 && "TTI should not produce negative costs!");
716  return Cost;
717 }
718 
720  bool IsPairwiseForm) const {
721  int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
722  assert(Cost >= 0 && "TTI should not produce negative costs!");
723  return Cost;
724 }
725 
727  bool IsPairwiseForm,
728  bool IsUnsigned) const {
729  int Cost =
730  TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
731  assert(Cost >= 0 && "TTI should not produce negative costs!");
732  return Cost;
733 }
734 
735 unsigned
737  return TTIImpl->getCostOfKeepingLiveOverCall(Tys);
738 }
739 
741  MemIntrinsicInfo &Info) const {
742  return TTIImpl->getTgtMemIntrinsic(Inst, Info);
743 }
744 
746  return TTIImpl->getAtomicMemIntrinsicMaxElementSize();
747 }
748 
750  IntrinsicInst *Inst, Type *ExpectedType) const {
751  return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
752 }
753 
755  Value *Length,
756  unsigned SrcAlign,
757  unsigned DestAlign) const {
758  return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAlign,
759  DestAlign);
760 }
761 
764  unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const {
765  TTIImpl->getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
766  SrcAlign, DestAlign);
767 }
768 
770  const Function *Callee) const {
771  return TTIImpl->areInlineCompatible(Caller, Callee);
772 }
773 
775  const Function *Caller, const Function *Callee,
777  return TTIImpl->areFunctionArgsABICompatible(Caller, Callee, Args);
778 }
779 
781  Type *Ty) const {
782  return TTIImpl->isIndexedLoadLegal(Mode, Ty);
783 }
784 
786  Type *Ty) const {
787  return TTIImpl->isIndexedStoreLegal(Mode, Ty);
788 }
789 
791  return TTIImpl->getLoadStoreVecRegBitWidth(AS);
792 }
793 
795  return TTIImpl->isLegalToVectorizeLoad(LI);
796 }
797 
799  return TTIImpl->isLegalToVectorizeStore(SI);
800 }
801 
803  unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
804  return TTIImpl->isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
805  AddrSpace);
806 }
807 
809  unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
810  return TTIImpl->isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
811  AddrSpace);
812 }
813 
815  unsigned LoadSize,
816  unsigned ChainSizeInBytes,
817  VectorType *VecTy) const {
818  return TTIImpl->getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
819 }
820 
822  unsigned StoreSize,
823  unsigned ChainSizeInBytes,
824  VectorType *VecTy) const {
825  return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
826 }
827 
829  Type *Ty, ReductionFlags Flags) const {
830  return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags);
831 }
832 
834  return TTIImpl->shouldExpandReduction(II);
835 }
836 
838  return TTIImpl->getGISelRematGlobalCost();
839 }
840 
841 int TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
842  return TTIImpl->getInstructionLatency(I);
843 }
844 
846  unsigned Level) {
847  // We don't need a shuffle if we just want to have element 0 in position 0 of
848  // the vector.
849  if (!SI && Level == 0 && IsLeft)
850  return true;
851  else if (!SI)
852  return false;
853 
855 
856  // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
857  // we look at the left or right side.
858  for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2)
859  Mask[i] = val;
860 
861  SmallVector<int, 16> ActualMask = SI->getShuffleMask();
862  return Mask == ActualMask;
863 }
864 
865 namespace {
866 /// Kind of the reduction data.
868  RK_None, /// Not a reduction.
869  RK_Arithmetic, /// Binary reduction data.
870  RK_MinMax, /// Min/max reduction data.
871  RK_UnsignedMinMax, /// Unsigned min/max reduction data.
872 };
873 /// Contains opcode + LHS/RHS parts of the reduction operations.
874 struct ReductionData {
875  ReductionData() = delete;
876  ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS)
877  : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) {
878  assert(Kind != RK_None && "expected binary or min/max reduction only.");
879  }
880  unsigned Opcode = 0;
881  Value *LHS = nullptr;
882  Value *RHS = nullptr;
883  ReductionKind Kind = RK_None;
884  bool hasSameData(ReductionData &RD) const {
885  return Kind == RD.Kind && Opcode == RD.Opcode;
886  }
887 };
888 } // namespace
889 
891  Value *L, *R;
892  if (m_BinOp(m_Value(L), m_Value(R)).match(I))
893  return ReductionData(RK_Arithmetic, I->getOpcode(), L, R);
894  if (auto *SI = dyn_cast<SelectInst>(I)) {
895  if (m_SMin(m_Value(L), m_Value(R)).match(SI) ||
896  m_SMax(m_Value(L), m_Value(R)).match(SI) ||
897  m_OrdFMin(m_Value(L), m_Value(R)).match(SI) ||
898  m_OrdFMax(m_Value(L), m_Value(R)).match(SI) ||
899  m_UnordFMin(m_Value(L), m_Value(R)).match(SI) ||
900  m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) {
901  auto *CI = cast<CmpInst>(SI->getCondition());
902  return ReductionData(RK_MinMax, CI->getOpcode(), L, R);
903  }
904  if (m_UMin(m_Value(L), m_Value(R)).match(SI) ||
905  m_UMax(m_Value(L), m_Value(R)).match(SI)) {
906  auto *CI = cast<CmpInst>(SI->getCondition());
907  return ReductionData(RK_UnsignedMinMax, CI->getOpcode(), L, R);
908  }
909  }
910  return llvm::None;
911 }
912 
914  unsigned Level,
915  unsigned NumLevels) {
916  // Match one level of pairwise operations.
917  // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
918  // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
919  // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
920  // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
921  // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
922  if (!I)
923  return RK_None;
924 
925  assert(I->getType()->isVectorTy() && "Expecting a vector type");
926 
928  if (!RD)
929  return RK_None;
930 
932  if (!LS && Level)
933  return RK_None;
935  if (!RS && Level)
936  return RK_None;
937 
938  // On level 0 we can omit one shufflevector instruction.
939  if (!Level && !RS && !LS)
940  return RK_None;
941 
942  // Shuffle inputs must match.
943  Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr;
944  Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr;
945  Value *NextLevelOp = nullptr;
946  if (NextLevelOpR && NextLevelOpL) {
947  // If we have two shuffles their operands must match.
948  if (NextLevelOpL != NextLevelOpR)
949  return RK_None;
950 
951  NextLevelOp = NextLevelOpL;
952  } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) {
953  // On the first level we can omit the shufflevector <0, undef,...>. So the
954  // input to the other shufflevector <1, undef> must match with one of the
955  // inputs to the current binary operation.
956  // Example:
957  // %NextLevelOpL = shufflevector %R, <1, undef ...>
958  // %BinOp = fadd %NextLevelOpL, %R
959  if (NextLevelOpL && NextLevelOpL != RD->RHS)
960  return RK_None;
961  else if (NextLevelOpR && NextLevelOpR != RD->LHS)
962  return RK_None;
963 
964  NextLevelOp = NextLevelOpL ? RD->RHS : RD->LHS;
965  } else
966  return RK_None;
967 
968  // Check that the next levels binary operation exists and matches with the
969  // current one.
970  if (Level + 1 != NumLevels) {
971  Optional<ReductionData> NextLevelRD =
972  getReductionData(cast<Instruction>(NextLevelOp));
973  if (!NextLevelRD || !RD->hasSameData(*NextLevelRD))
974  return RK_None;
975  }
976 
977  // Shuffle mask for pairwise operation must match.
978  if (matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)) {
979  if (!matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level))
980  return RK_None;
981  } else if (matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)) {
982  if (!matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level))
983  return RK_None;
984  } else {
985  return RK_None;
986  }
987 
988  if (++Level == NumLevels)
989  return RD->Kind;
990 
991  // Match next level.
992  return matchPairwiseReductionAtLevel(cast<Instruction>(NextLevelOp), Level,
993  NumLevels);
994 }
995 
997  unsigned &Opcode, Type *&Ty) {
998  if (!EnableReduxCost)
999  return RK_None;
1000 
1001  // Need to extract the first element.
1002  ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
1003  unsigned Idx = ~0u;
1004  if (CI)
1005  Idx = CI->getZExtValue();
1006  if (Idx != 0)
1007  return RK_None;
1008 
1009  auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
1010  if (!RdxStart)
1011  return RK_None;
1013  if (!RD)
1014  return RK_None;
1015 
1016  Type *VecTy = RdxStart->getType();
1017  unsigned NumVecElems = VecTy->getVectorNumElements();
1018  if (!isPowerOf2_32(NumVecElems))
1019  return RK_None;
1020 
1021  // We look for a sequence of shuffle,shuffle,add triples like the following
1022  // that builds a pairwise reduction tree.
1023  //
1024  // (X0, X1, X2, X3)
1025  // (X0 + X1, X2 + X3, undef, undef)
1026  // ((X0 + X1) + (X2 + X3), undef, undef, undef)
1027  //
1028  // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
1029  // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
1030  // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
1031  // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
1032  // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
1033  // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
1034  // <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
1035  // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
1036  // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1037  // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
1038  // %r = extractelement <4 x float> %bin.rdx8, i32 0
1039  if (matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) ==
1040  RK_None)
1041  return RK_None;
1042 
1043  Opcode = RD->Opcode;
1044  Ty = VecTy;
1045 
1046  return RD->Kind;
1047 }
1048 
1049 static std::pair<Value *, ShuffleVectorInst *>
1051  ShuffleVectorInst *S = nullptr;
1052 
1053  if ((S = dyn_cast<ShuffleVectorInst>(L)))
1054  return std::make_pair(R, S);
1055 
1056  S = dyn_cast<ShuffleVectorInst>(R);
1057  return std::make_pair(L, S);
1058 }
1059 
1060 static ReductionKind
1062  unsigned &Opcode, Type *&Ty) {
1063  if (!EnableReduxCost)
1064  return RK_None;
1065 
1066  // Need to extract the first element.
1067  ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
1068  unsigned Idx = ~0u;
1069  if (CI)
1070  Idx = CI->getZExtValue();
1071  if (Idx != 0)
1072  return RK_None;
1073 
1074  auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
1075  if (!RdxStart)
1076  return RK_None;
1078  if (!RD)
1079  return RK_None;
1080 
1081  Type *VecTy = ReduxRoot->getOperand(0)->getType();
1082  unsigned NumVecElems = VecTy->getVectorNumElements();
1083  if (!isPowerOf2_32(NumVecElems))
1084  return RK_None;
1085 
1086  // We look for a sequence of shuffles and adds like the following matching one
1087  // fadd, shuffle vector pair at a time.
1088  //
1089  // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
1090  // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
1091  // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
1092  // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
1093  // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1094  // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
1095  // %r = extractelement <4 x float> %bin.rdx8, i32 0
1096 
1097  unsigned MaskStart = 1;
1098  Instruction *RdxOp = RdxStart;
1099  SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
1100  unsigned NumVecElemsRemain = NumVecElems;
1101  while (NumVecElemsRemain - 1) {
1102  // Check for the right reduction operation.
1103  if (!RdxOp)
1104  return RK_None;
1105  Optional<ReductionData> RDLevel = getReductionData(RdxOp);
1106  if (!RDLevel || !RDLevel->hasSameData(*RD))
1107  return RK_None;
1108 
1109  Value *NextRdxOp;
1110  ShuffleVectorInst *Shuffle;
1111  std::tie(NextRdxOp, Shuffle) =
1112  getShuffleAndOtherOprd(RDLevel->LHS, RDLevel->RHS);
1113 
1114  // Check the current reduction operation and the shuffle use the same value.
1115  if (Shuffle == nullptr)
1116  return RK_None;
1117  if (Shuffle->getOperand(0) != NextRdxOp)
1118  return RK_None;
1119 
1120  // Check that shuffle masks matches.
1121  for (unsigned j = 0; j != MaskStart; ++j)
1122  ShuffleMask[j] = MaskStart + j;
1123  // Fill the rest of the mask with -1 for undef.
1124  std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);
1125 
1127  if (ShuffleMask != Mask)
1128  return RK_None;
1129 
1130  RdxOp = dyn_cast<Instruction>(NextRdxOp);
1131  NumVecElemsRemain /= 2;
1132  MaskStart *= 2;
1133  }
1134 
1135  Opcode = RD->Opcode;
1136  Ty = VecTy;
1137  return RD->Kind;
1138 }
1139 
1140 int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
1141  switch (I->getOpcode()) {
1142  case Instruction::GetElementPtr:
1143  return getUserCost(I);
1144 
1145  case Instruction::Ret:
1146  case Instruction::PHI:
1147  case Instruction::Br: {
1148  return getCFInstrCost(I->getOpcode());
1149  }
1150  case Instruction::Add:
1151  case Instruction::FAdd:
1152  case Instruction::Sub:
1153  case Instruction::FSub:
1154  case Instruction::Mul:
1155  case Instruction::FMul:
1156  case Instruction::UDiv:
1157  case Instruction::SDiv:
1158  case Instruction::FDiv:
1159  case Instruction::URem:
1160  case Instruction::SRem:
1161  case Instruction::FRem:
1162  case Instruction::Shl:
1163  case Instruction::LShr:
1164  case Instruction::AShr:
1165  case Instruction::And:
1166  case Instruction::Or:
1167  case Instruction::Xor: {
1170  Op1VK = getOperandInfo(I->getOperand(0), Op1VP);
1171  Op2VK = getOperandInfo(I->getOperand(1), Op2VP);
1173  return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
1174  Op1VP, Op2VP, Operands);
1175  }
1176  case Instruction::FNeg: {
1179  Op1VK = getOperandInfo(I->getOperand(0), Op1VP);
1180  Op2VK = OK_AnyValue;
1181  Op2VP = OP_None;
1183  return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
1184  Op1VP, Op2VP, Operands);
1185  }
1186  case Instruction::Select: {
1187  const SelectInst *SI = cast<SelectInst>(I);
1188  Type *CondTy = SI->getCondition()->getType();
1189  return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I);
1190  }
1191  case Instruction::ICmp:
1192  case Instruction::FCmp: {
1193  Type *ValTy = I->getOperand(0)->getType();
1194  return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I);
1195  }
1196  case Instruction::Store: {
1197  const StoreInst *SI = cast<StoreInst>(I);
1198  Type *ValTy = SI->getValueOperand()->getType();
1199  return getMemoryOpCost(I->getOpcode(), ValTy,
1200  SI->getAlignment(),
1201  SI->getPointerAddressSpace(), I);
1202  }
1203  case Instruction::Load: {
1204  const LoadInst *LI = cast<LoadInst>(I);
1205  return getMemoryOpCost(I->getOpcode(), I->getType(),
1206  LI->getAlignment(),
1207  LI->getPointerAddressSpace(), I);
1208  }
1209  case Instruction::ZExt:
1210  case Instruction::SExt:
1211  case Instruction::FPToUI:
1212  case Instruction::FPToSI:
1213  case Instruction::FPExt:
1214  case Instruction::PtrToInt:
1215  case Instruction::IntToPtr:
1216  case Instruction::SIToFP:
1217  case Instruction::UIToFP:
1218  case Instruction::Trunc:
1219  case Instruction::FPTrunc:
1220  case Instruction::BitCast:
1221  case Instruction::AddrSpaceCast: {
1222  Type *SrcTy = I->getOperand(0)->getType();
1223  return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I);
1224  }
1225  case Instruction::ExtractElement: {
1226  const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
1228  unsigned Idx = -1;
1229  if (CI)
1230  Idx = CI->getZExtValue();
1231 
1232  // Try to match a reduction sequence (series of shufflevector and vector
1233  // adds followed by a extractelement).
1234  unsigned ReduxOpCode;
1235  Type *ReduxType;
1236 
1237  switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) {
1238  case RK_Arithmetic:
1239  return getArithmeticReductionCost(ReduxOpCode, ReduxType,
1240  /*IsPairwiseForm=*/false);
1241  case RK_MinMax:
1242  return getMinMaxReductionCost(
1243  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1244  /*IsPairwiseForm=*/false, /*IsUnsigned=*/false);
1245  case RK_UnsignedMinMax:
1246  return getMinMaxReductionCost(
1247  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1248  /*IsPairwiseForm=*/false, /*IsUnsigned=*/true);
1249  case RK_None:
1250  break;
1251  }
1252 
1253  switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) {
1254  case RK_Arithmetic:
1255  return getArithmeticReductionCost(ReduxOpCode, ReduxType,
1256  /*IsPairwiseForm=*/true);
1257  case RK_MinMax:
1258  return getMinMaxReductionCost(
1259  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1260  /*IsPairwiseForm=*/true, /*IsUnsigned=*/false);
1261  case RK_UnsignedMinMax:
1262  return getMinMaxReductionCost(
1263  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1264  /*IsPairwiseForm=*/true, /*IsUnsigned=*/true);
1265  case RK_None:
1266  break;
1267  }
1268 
1269  return getVectorInstrCost(I->getOpcode(),
1270  EEI->getOperand(0)->getType(), Idx);
1271  }
1272  case Instruction::InsertElement: {
1273  const InsertElementInst * IE = cast<InsertElementInst>(I);
1274  ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
1275  unsigned Idx = -1;
1276  if (CI)
1277  Idx = CI->getZExtValue();
1278  return getVectorInstrCost(I->getOpcode(),
1279  IE->getType(), Idx);
1280  }
1281  case Instruction::ExtractValue:
1282  return 0; // Model all ExtractValue nodes as free.
1283  case Instruction::ShuffleVector: {
1284  const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
1285  Type *Ty = Shuffle->getType();
1286  Type *SrcTy = Shuffle->getOperand(0)->getType();
1287 
1288  // TODO: Identify and add costs for insert subvector, etc.
1289  int SubIndex;
1290  if (Shuffle->isExtractSubvectorMask(SubIndex))
1291  return TTIImpl->getShuffleCost(SK_ExtractSubvector, SrcTy, SubIndex, Ty);
1292 
1293  if (Shuffle->changesLength())
1294  return -1;
1295 
1296  if (Shuffle->isIdentity())
1297  return 0;
1298 
1299  if (Shuffle->isReverse())
1300  return TTIImpl->getShuffleCost(SK_Reverse, Ty, 0, nullptr);
1301 
1302  if (Shuffle->isSelect())
1303  return TTIImpl->getShuffleCost(SK_Select, Ty, 0, nullptr);
1304 
1305  if (Shuffle->isTranspose())
1306  return TTIImpl->getShuffleCost(SK_Transpose, Ty, 0, nullptr);
1307 
1308  if (Shuffle->isZeroEltSplat())
1309  return TTIImpl->getShuffleCost(SK_Broadcast, Ty, 0, nullptr);
1310 
1311  if (Shuffle->isSingleSource())
1312  return TTIImpl->getShuffleCost(SK_PermuteSingleSrc, Ty, 0, nullptr);
1313 
1314  return TTIImpl->getShuffleCost(SK_PermuteTwoSrc, Ty, 0, nullptr);
1315  }
1316  case Instruction::Call:
1317  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1318  SmallVector<Value *, 4> Args(II->arg_operands());
1319 
1320  FastMathFlags FMF;
1321  if (auto *FPMO = dyn_cast<FPMathOperator>(II))
1322  FMF = FPMO->getFastMathFlags();
1323 
1324  return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
1325  Args, FMF);
1326  }
1327  return -1;
1328  default:
1329  // We don't have any information on this instruction.
1330  return -1;
1331  }
1332 }
1333 
1335 
1336 TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
1337 
1339  std::function<Result(const Function &)> TTICallback)
1340  : TTICallback(std::move(TTICallback)) {}
1341 
1344  return TTICallback(F);
1345 }
1346 
1347 AnalysisKey TargetIRAnalysis::Key;
1348 
1349 TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) {
1350  return Result(F.getParent()->getDataLayout());
1351 }
1352 
1353 // Register the basic pass.
1355  "Target Transform Information", false, true)
1357 
1358 void TargetTransformInfoWrapperPass::anchor() {}
1359 
1361  : ImmutablePass(ID) {
1364 }
1365 
1367  TargetIRAnalysis TIRA)
1368  : ImmutablePass(ID), TIRA(std::move(TIRA)) {
1371 }
1372 
1374  FunctionAnalysisManager DummyFAM;
1375  TTI = TIRA.run(F, DummyFAM);
1376  return *TTI;
1377 }
1378 
1379 ImmutablePass *
1381  return new TargetTransformInfoWrapperPass(std::move(TIRA));
1382 }
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
uint64_t CallInst * C
Value * getValueOperand()
Definition: Instructions.h:409
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Return any intrinsic address operand indexes which may be rewritten if they use a flat address space ...
bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:70
static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, unsigned Level)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
LLVMContext & Context
static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot, unsigned &Opcode, Type *&Ty)
TargetTransformInfo & operator=(TargetTransformInfo &&RHS)
SI Whole Quad Mode
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:975
This class represents lattice values for constants.
Definition: AllocatorList.h:23
int getCallCost(FunctionType *FTy, int NumArgs=-1, const User *U=nullptr) const
Estimate the cost of a function call when lowered.
bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isFPVectorizationPotentiallyUnsafe() const
Indicate that it is potentially unsafe to automatically vectorize floating-point operations because t...
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
const Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool supportsEfficientVectorElementLoadStore() const
If target has efficient vector element load/store instructions, it can return true here so that inser...
static ReductionKind matchPairwiseReductionAtLevel(Instruction *I, unsigned Level, unsigned NumLevels)
bool useColdCCForColdCall(Function &F) const
Return true if the input function which is cold at all call sites, should use coldcc calling conventi...
The main scalar evolution driver.
TargetTransformInfoWrapperPass()
We must provide a default constructor for the pass but it should never be used.
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value *> Operands) const
Estimate the cost of a GEP operation when lowered.
MemIndexedMode
The type of load/store indexing.
unsigned getNumberOfRegisters(bool Vector) const
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
bool isReverse() const
Return true if this shuffle swaps the order of elements from exactly one source vector.
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAlign, unsigned DestAlign) const
This instruction constructs a fixed permutation of two input vectors.
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
int getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type *> Tys) const
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
F(f)
An instruction for reading from memory.
Definition: Instructions.h:167
MaxMin_match< FCmpInst, LHS, RHS, ufmax_pred_ty > m_UnordFMax(const LHS &L, const RHS &R)
Match an &#39;unordered&#39; floating point maximum function.
bool canMacroFuseCmp() const
Return true if the target can fuse a compare and branch.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
bool enableAggressiveInterleaving(bool LoopHasReductions) const
Don&#39;t restrict interleaved unrolling to small loops.
llvm::Optional< unsigned > getCacheAssociativity(CacheLevel Level) const
bool isSingleSource() const
Return true if this shuffle chooses elements from exactly one source vector without changing the leng...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
This file provides helpers for the implementation of a TargetTransformInfo-conforming class...
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
unsigned getMaxInterleaveFactor(unsigned VF) const
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
bool enableMaskedInterleavedAccessVectorization() const
Enable matching of interleaved access groups that contain predicated accesses or gaps and therefore v...
bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Definition: BitVector.h:937
int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy=nullptr) const
Estimate the cost of a specific operation when lowered.
bool hasBranchDivergence() const
Return true if branch divergence exists.
This class represents the LLVM &#39;select&#39; instruction.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize) const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:928
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr) const
static AnalysisKey * ID()
Returns an opaque, unique ID for this analysis type.
Definition: PassManager.h:405
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
MaxMin_match< FCmpInst, LHS, RHS, ufmin_pred_ty > m_UnordFMin(const LHS &L, const RHS &R)
Match an &#39;unordered&#39; floating point minimum function.
mir Rename Register Operands
bool isLegalMaskedScatter(Type *DataType) const
Return true if the target supports masked scatter.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:418
Class to represent function types.
Definition: DerivedTypes.h:103
unsigned getMinVectorRegisterBitWidth() const
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it&#39;s free to truncate a value of type Ty1 to type Ty2.
MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
PopcntSupportKind
Flags indicating the kind of support for population count.
bool isLegalNTLoad(Type *DataType, llvm::Align Alignment) const
Return true if the target supports nontemporal load.
bool hasDivRemOp(Type *DataType, bool IsSigned) const
Return true if the target has a unified operation to calculate division and remainder.
MaxMin_match< FCmpInst, LHS, RHS, ofmin_pred_ty > m_OrdFMin(const LHS &L, const RHS &R)
Match an &#39;ordered&#39; floating point minimum function.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
Selects elements from the corresponding lane of either source operand.
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef< Value *> Args, FastMathFlags FMF, unsigned VF=1) const
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
An instruction for storing to memory.
Definition: Instructions.h:320
void initializeTargetTransformInfoWrapperPassPass(PassRegistry &)
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
Return true if the given instruction (assumed to be a memory access instruction) has a volatile varia...
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index=-1) const
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
bool isLegalToVectorizeLoad(LoadInst *LI) const
Reverse the order of the vector.
VectorType * getType() const
Overload to return most specific vector type.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
Value * getOperand(unsigned i) const
Definition: User.h:169
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
int getExtCost(const Instruction *I, const Value *Src) const
Estimate the cost of a EXT operation when lowered.
static bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
Rewrite intrinsic call II such that OldV will be replaced with NewV, which has a different address sp...
ExtractSubvector Index indicates start offset.
static cl::opt< bool > EnableReduxCost("costmodel-reduxcost", cl::init(false), cl::Hidden, cl::desc("Recognize reduction patterns."))
unsigned getMaxPrefetchIterationsAhead() const
bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
This instruction inserts a single (scalar) element into a VectorType value.
int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment) const
Returns options for expansion of memcmp. IsZeroCmp is.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
Wrapper pass for TargetTransformInfo.
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
bool isLegalToVectorizeStore(StoreInst *SI) const
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
unsigned getRegisterBitWidth(bool Vector) const
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
Flags describing the kind of vector reduction.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
bool isAlwaysUniform(const Value *V) const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
Conditional or Unconditional Branch instruction.
unsigned getNumberOfParts(Type *Tp) const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
bool isIdentity() const
Return true if this shuffle chooses elements from exactly one source vector without lane crossings an...
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
AMDGPU Lower Kernel Arguments
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:112
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target...
TargetIRAnalysis()
Default construct a target IR analysis.
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy=nullptr, const Instruction *I=nullptr) const
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:115
bool isLegalMaskedStore(Type *DataType) const
Return true if the target supports masked load.
Merge elements from two source vectors into one with any shuffle mask.
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:73
bool isLegalNTStore(Type *DataType, llvm::Align Alignment) const
Return true if the target supports nontemporal store.
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
Attributes of a target dependent hardware loop.
const Value * getCondition() const
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, SmallPtrSetImpl< Argument *> &Args) const
static std::pair< Value *, ShuffleVectorInst * > getShuffleAndOtherOprd(Value *L, Value *R)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
bool isLegalMaskedCompressStore(Type *DataType) const
Return true if the target supports masked compress store.
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:33
OperandValueProperties
Additional properties of an operand&#39;s values.
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
ReductionKind
Kind of the reduction data.
int getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type...
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
ImmutablePass class - This class is used to provide information that does not need to be run...
Definition: Pass.h:255
Type * getType() const
Return the LLVM type of this SCEV expression.
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C1.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
int getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) const
Calculate the cost of performing a vector reduction.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:248
Module.h This file contains the declarations for the Module class.
unsigned getCostOfKeepingLiveOverCall(ArrayRef< Type *> Tys) const
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
int getIntImmCost(const APInt &Imm, Type *Ty) const
Return the expected cost of materializing for the given integer immediate of the specified type...
Provides information about what library functions are available for the current target.
bool isLegalMaskedGather(Type *DataType) const
Return true if the target supports masked gather.
AddressSpace
Definition: NVPTXBaseInfo.h:21
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const
bool changesLength() const
Return true if this shuffle returns a vector with a different number of elements than its source vect...
bool isSourceOfDivergence(const Value *V) const
Returns whether V is a source of divergence.
bool LSRWithInstrQueries() const
Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...
static ReductionKind matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, unsigned &Opcode, Type *&Ty)
bool canAnalyze(LoopInfo &LI)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:535
bool isZeroEltSplat() const
Return true if all elements of this shuffle are the same value as the first element of exactly one so...
Class to represent vector types.
Definition: DerivedTypes.h:427
Class for arbitrary precision integers.
Definition: APInt.h:69
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index=-1) const
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Query the target whether it would be profitable to convert the given loop into a hardware loop...
Result run(const Function &F, FunctionAnalysisManager &)
bool shouldExpandReduction(const IntrinsicInst *II) const
int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const
Return the expected cost for the given integer when optimising for size.
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> ParamTys, const User *U=nullptr) const
Estimate the cost of an intrinsic when lowered.
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, unsigned Alignment=1, bool *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isTranspose() const
Return true if this shuffle transposes the elements of its inputs without changing the length of the ...
int getMemcpyCost(const Instruction *I) const
bool isLegalMaskedLoad(Type *DataType) const
Return true if the target supports masked store.
unsigned getGISelRematGlobalCost() const
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
unsigned getAtomicMemIntrinsicMaxElementSize() const
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:240
This class represents an analyzed expression in the program.
int getUserCost(const User *U, ArrayRef< const Value *> Operands) const
Estimate the cost of a given IR user when lowered.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
Parameters that control the generic loop unrolling transformation.
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
#define I(x, y, z)
Definition: MD5.cpp:58
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
iterator_range< value_op_iterator > operand_values()
Definition: User.h:261
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
This instruction extracts a single (scalar) element from a VectorType value.
int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index=0, Type *SubTp=nullptr) const
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>()) const
This is an approximation of reciprocal throughput of a math/logic op.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:365
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:172
bool isLegalMaskedExpandLoad(Type *DataType) const
Return true if the target supports masked expand load.
bool shouldMaximizeVectorBandwidth(bool OptSize) const
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:290
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
Multiway switch.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
TargetTransformInfo Result
MaxMin_match< FCmpInst, LHS, RHS, ofmax_pred_ty > m_OrdFMax(const LHS &L, const RHS &R)
Match an &#39;ordered&#39; floating point maximum function.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:575
LLVM Value Representation.
Definition: Value.h:73
unsigned getInliningThresholdMultiplier() const
bool useReductionIntrinsic(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
bool enableInterleavedAccessVectorization() const
Enable matching of interleaved access groups.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
Broadcast element 0 to all other elements.
static Optional< ReductionData > getReductionData(Instruction *I)
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type *> &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const
int getCFInstrCost(unsigned Opcode) const
bool shouldFavorBackedgeIndex(const Loop *L) const
Return true if LSR should make efforts to generate indexed addressing modes that operate across loop ...
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
Return true if the target can save a compare for loop count, for example hardware loop saves a compar...
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
Return true if it is faster to check if a floating-point value is NaN (or not-NaN) versus a compariso...
bool prefersVectorizedAddressing() const
Return true if target doesn&#39;t mind addresses in vectors.
unsigned getOperandsScalarizationOverhead(ArrayRef< const Value *> Args, unsigned VF) const
print Print MemDeps of function
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP) const
Get target-customized preferences for the generic loop unrolling transformation.
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:159
OperandValueKind
Additional information about an operand&#39;s possible values.
A container for analyses that lazily runs them and caches their results.
unsigned getMinimumVF(unsigned ElemWidth) const
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
const SCEV * getExitCount(const Loop *L, BasicBlock *ExitingBlock)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
This pass exposes codegen information to IR-level passes.
TargetTransformInfo & getTTI(const Function &F)
CacheLevel
The possible cache levels.
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopIterator.h:180
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr) const
VectorType * getType() const
Overload to return most specific vector type.
unsigned getFlatAddressSpace() const
Returns the address space ID for a target&#39;s &#39;flat&#39; address space.
Information about a load/store intrinsic defined by the target.
static OperandValueKind getOperandInfo(Value *V, OperandValueProperties &OpProps)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:70
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm, bool IsUnsigned) const
llvm::Optional< unsigned > getCacheSize(CacheLevel Level) const
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
This class represents a constant integer value.
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Shuffle elements of single source vector with any shuffle mask.