LLVM  6.0.0svn
TargetTransformInfo.cpp
Go to the documentation of this file.
1 //===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
12 #include "llvm/IR/CallSite.h"
13 #include "llvm/IR/DataLayout.h"
14 #include "llvm/IR/Instruction.h"
15 #include "llvm/IR/Instructions.h"
16 #include "llvm/IR/IntrinsicInst.h"
17 #include "llvm/IR/Module.h"
18 #include "llvm/IR/Operator.h"
19 #include "llvm/IR/PatternMatch.h"
22 #include <utility>
23 
24 using namespace llvm;
25 using namespace PatternMatch;
26 
27 #define DEBUG_TYPE "tti"
28 
30  "use-wide-memcpy-loop-lowering", cl::init(false),
31  cl::desc("Enables the new wide memcpy loop lowering in Transforms/Utils."),
32  cl::Hidden);
33 
34 static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
35  cl::Hidden,
36  cl::desc("Recognize reduction patterns."));
37 
38 namespace {
39 /// \brief No-op implementation of the TTI interface using the utility base
40 /// classes.
41 ///
42 /// This is used when no target specific information is available.
43 struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> {
44  explicit NoTTIImpl(const DataLayout &DL)
45  : TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {}
46 };
47 }
48 
50  : TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {}
51 
53 
55  : TTIImpl(std::move(Arg.TTIImpl)) {}
56 
58  TTIImpl = std::move(RHS.TTIImpl);
59  return *this;
60 }
61 
63  Type *OpTy) const {
64  int Cost = TTIImpl->getOperationCost(Opcode, Ty, OpTy);
65  assert(Cost >= 0 && "TTI should not produce negative costs!");
66  return Cost;
67 }
68 
69 int TargetTransformInfo::getCallCost(FunctionType *FTy, int NumArgs) const {
70  int Cost = TTIImpl->getCallCost(FTy, NumArgs);
71  assert(Cost >= 0 && "TTI should not produce negative costs!");
72  return Cost;
73 }
74 
77  int Cost = TTIImpl->getCallCost(F, Arguments);
78  assert(Cost >= 0 && "TTI should not produce negative costs!");
79  return Cost;
80 }
81 
83  return TTIImpl->getInliningThresholdMultiplier();
84 }
85 
86 int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
87  ArrayRef<const Value *> Operands) const {
88  return TTIImpl->getGEPCost(PointeeType, Ptr, Operands);
89 }
90 
92  const Value *Src) const {
93  return TTIImpl->getExtCost(I, Src);
94 }
95 
98  int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments);
99  assert(Cost >= 0 && "TTI should not produce negative costs!");
100  return Cost;
101 }
102 
103 unsigned
105  unsigned &JTSize) const {
106  return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize);
107 }
108 
110  ArrayRef<const Value *> Operands) const {
111  int Cost = TTIImpl->getUserCost(U, Operands);
112  assert(Cost >= 0 && "TTI should not produce negative costs!");
113  return Cost;
114 }
115 
117  return TTIImpl->hasBranchDivergence();
118 }
119 
121  return TTIImpl->isSourceOfDivergence(V);
122 }
123 
125  return TTIImpl->isAlwaysUniform(V);
126 }
127 
129  return TTIImpl->getFlatAddressSpace();
130 }
131 
133  return TTIImpl->isLoweredToCall(F);
134 }
135 
137  Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const {
138  return TTIImpl->getUnrollingPreferences(L, SE, UP);
139 }
140 
142  return TTIImpl->isLegalAddImmediate(Imm);
143 }
144 
146  return TTIImpl->isLegalICmpImmediate(Imm);
147 }
148 
150  int64_t BaseOffset,
151  bool HasBaseReg,
152  int64_t Scale,
153  unsigned AddrSpace,
154  Instruction *I) const {
155  return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
156  Scale, AddrSpace, I);
157 }
158 
160  return TTIImpl->isLSRCostLess(C1, C2);
161 }
162 
164  return TTIImpl->isLegalMaskedStore(DataType);
165 }
166 
168  return TTIImpl->isLegalMaskedLoad(DataType);
169 }
170 
172  return TTIImpl->isLegalMaskedGather(DataType);
173 }
174 
176  return TTIImpl->isLegalMaskedScatter(DataType);
177 }
178 
179 bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
180  return TTIImpl->hasDivRemOp(DataType, IsSigned);
181 }
182 
184  return TTIImpl->prefersVectorizedAddressing();
185 }
186 
188  int64_t BaseOffset,
189  bool HasBaseReg,
190  int64_t Scale,
191  unsigned AddrSpace) const {
192  int Cost = TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
193  Scale, AddrSpace);
194  assert(Cost >= 0 && "TTI should not produce negative costs!");
195  return Cost;
196 }
197 
199  return TTIImpl->LSRWithInstrQueries();
200 }
201 
203  return TTIImpl->isTruncateFree(Ty1, Ty2);
204 }
205 
207  return TTIImpl->isProfitableToHoist(I);
208 }
209 
211  return TTIImpl->isTypeLegal(Ty);
212 }
213 
215  return TTIImpl->getJumpBufAlignment();
216 }
217 
219  return TTIImpl->getJumpBufSize();
220 }
221 
223  return TTIImpl->shouldBuildLookupTables();
224 }
226  return TTIImpl->shouldBuildLookupTablesForConstant(C);
227 }
228 
229 unsigned TargetTransformInfo::
230 getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const {
231  return TTIImpl->getScalarizationOverhead(Ty, Insert, Extract);
232 }
233 
234 unsigned TargetTransformInfo::
236  unsigned VF) const {
237  return TTIImpl->getOperandsScalarizationOverhead(Args, VF);
238 }
239 
241  return TTIImpl->supportsEfficientVectorElementLoadStore();
242 }
243 
244 bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const {
245  return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
246 }
247 
248 bool TargetTransformInfo::enableMemCmpExpansion(unsigned &MaxLoadSize) const {
249  return TTIImpl->enableMemCmpExpansion(MaxLoadSize);
250 }
251 
253  return TTIImpl->enableInterleavedAccessVectorization();
254 }
255 
257  return TTIImpl->isFPVectorizationPotentiallyUnsafe();
258 }
259 
261  unsigned BitWidth,
262  unsigned AddressSpace,
263  unsigned Alignment,
264  bool *Fast) const {
265  return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
266  Alignment, Fast);
267 }
268 
270 TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
271  return TTIImpl->getPopcntSupport(IntTyWidthInBit);
272 }
273 
275  return TTIImpl->haveFastSqrt(Ty);
276 }
277 
279  int Cost = TTIImpl->getFPOpCost(Ty);
280  assert(Cost >= 0 && "TTI should not produce negative costs!");
281  return Cost;
282 }
283 
284 int TargetTransformInfo::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
285  const APInt &Imm,
286  Type *Ty) const {
287  int Cost = TTIImpl->getIntImmCodeSizeCost(Opcode, Idx, Imm, Ty);
288  assert(Cost >= 0 && "TTI should not produce negative costs!");
289  return Cost;
290 }
291 
292 int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
293  int Cost = TTIImpl->getIntImmCost(Imm, Ty);
294  assert(Cost >= 0 && "TTI should not produce negative costs!");
295  return Cost;
296 }
297 
298 int TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx,
299  const APInt &Imm, Type *Ty) const {
300  int Cost = TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty);
301  assert(Cost >= 0 && "TTI should not produce negative costs!");
302  return Cost;
303 }
304 
306  const APInt &Imm, Type *Ty) const {
307  int Cost = TTIImpl->getIntImmCost(IID, Idx, Imm, Ty);
308  assert(Cost >= 0 && "TTI should not produce negative costs!");
309  return Cost;
310 }
311 
312 unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
313  return TTIImpl->getNumberOfRegisters(Vector);
314 }
315 
316 unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
317  return TTIImpl->getRegisterBitWidth(Vector);
318 }
319 
321  return TTIImpl->getMinVectorRegisterBitWidth();
322 }
323 
325  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
326  return TTIImpl->shouldConsiderAddressTypePromotion(
327  I, AllowPromotionWithoutCommonHeader);
328 }
329 
331  return TTIImpl->getCacheLineSize();
332 }
333 
335  const {
336  return TTIImpl->getCacheSize(Level);
337 }
338 
340  CacheLevel Level) const {
341  return TTIImpl->getCacheAssociativity(Level);
342 }
343 
345  return TTIImpl->getPrefetchDistance();
346 }
347 
349  return TTIImpl->getMinPrefetchStride();
350 }
351 
353  return TTIImpl->getMaxPrefetchIterationsAhead();
354 }
355 
356 unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
357  return TTIImpl->getMaxInterleaveFactor(VF);
358 }
359 
361  unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
362  OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
363  OperandValueProperties Opd2PropInfo,
365  int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
366  Opd1PropInfo, Opd2PropInfo, Args);
367  assert(Cost >= 0 && "TTI should not produce negative costs!");
368  return Cost;
369 }
370 
372  Type *SubTp) const {
373  int Cost = TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp);
374  assert(Cost >= 0 && "TTI should not produce negative costs!");
375  return Cost;
376 }
377 
378 int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst,
379  Type *Src, const Instruction *I) const {
380  assert ((I == nullptr || I->getOpcode() == Opcode) &&
381  "Opcode should reflect passed instruction.");
382  int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, I);
383  assert(Cost >= 0 && "TTI should not produce negative costs!");
384  return Cost;
385 }
386 
388  VectorType *VecTy,
389  unsigned Index) const {
390  int Cost = TTIImpl->getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
391  assert(Cost >= 0 && "TTI should not produce negative costs!");
392  return Cost;
393 }
394 
395 int TargetTransformInfo::getCFInstrCost(unsigned Opcode) const {
396  int Cost = TTIImpl->getCFInstrCost(Opcode);
397  assert(Cost >= 0 && "TTI should not produce negative costs!");
398  return Cost;
399 }
400 
401 int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
402  Type *CondTy, const Instruction *I) const {
403  assert ((I == nullptr || I->getOpcode() == Opcode) &&
404  "Opcode should reflect passed instruction.");
405  int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
406  assert(Cost >= 0 && "TTI should not produce negative costs!");
407  return Cost;
408 }
409 
411  unsigned Index) const {
412  int Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index);
413  assert(Cost >= 0 && "TTI should not produce negative costs!");
414  return Cost;
415 }
416 
417 int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
418  unsigned Alignment,
419  unsigned AddressSpace,
420  const Instruction *I) const {
421  assert ((I == nullptr || I->getOpcode() == Opcode) &&
422  "Opcode should reflect passed instruction.");
423  int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
424  assert(Cost >= 0 && "TTI should not produce negative costs!");
425  return Cost;
426 }
427 
429  unsigned Alignment,
430  unsigned AddressSpace) const {
431  int Cost =
432  TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
433  assert(Cost >= 0 && "TTI should not produce negative costs!");
434  return Cost;
435 }
436 
438  Value *Ptr, bool VariableMask,
439  unsigned Alignment) const {
440  int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
441  Alignment);
442  assert(Cost >= 0 && "TTI should not produce negative costs!");
443  return Cost;
444 }
445 
447  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
448  unsigned Alignment, unsigned AddressSpace) const {
449  int Cost = TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
450  Alignment, AddressSpace);
451  assert(Cost >= 0 && "TTI should not produce negative costs!");
452  return Cost;
453 }
454 
457  unsigned ScalarizationCostPassed) const {
458  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
459  ScalarizationCostPassed);
460  assert(Cost >= 0 && "TTI should not produce negative costs!");
461  return Cost;
462 }
463 
465  ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) const {
466  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
467  assert(Cost >= 0 && "TTI should not produce negative costs!");
468  return Cost;
469 }
470 
472  ArrayRef<Type *> Tys) const {
473  int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys);
474  assert(Cost >= 0 && "TTI should not produce negative costs!");
475  return Cost;
476 }
477 
479  return TTIImpl->getNumberOfParts(Tp);
480 }
481 
483  ScalarEvolution *SE,
484  const SCEV *Ptr) const {
485  int Cost = TTIImpl->getAddressComputationCost(Tp, SE, Ptr);
486  assert(Cost >= 0 && "TTI should not produce negative costs!");
487  return Cost;
488 }
489 
491  bool IsPairwiseForm) const {
492  int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
493  assert(Cost >= 0 && "TTI should not produce negative costs!");
494  return Cost;
495 }
496 
498  bool IsPairwiseForm,
499  bool IsUnsigned) const {
500  int Cost =
501  TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
502  assert(Cost >= 0 && "TTI should not produce negative costs!");
503  return Cost;
504 }
505 
506 unsigned
508  return TTIImpl->getCostOfKeepingLiveOverCall(Tys);
509 }
510 
512  MemIntrinsicInfo &Info) const {
513  return TTIImpl->getTgtMemIntrinsic(Inst, Info);
514 }
515 
517  return TTIImpl->getAtomicMemIntrinsicMaxElementSize();
518 }
519 
521  IntrinsicInst *Inst, Type *ExpectedType) const {
522  return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
523 }
524 
526  Value *Length,
527  unsigned SrcAlign,
528  unsigned DestAlign) const {
529  return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAlign,
530  DestAlign);
531 }
532 
535  unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const {
536  TTIImpl->getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
537  SrcAlign, DestAlign);
538 }
539 
542 }
543 
545  const Function *Callee) const {
546  return TTIImpl->areInlineCompatible(Caller, Callee);
547 }
548 
550  return TTIImpl->getLoadStoreVecRegBitWidth(AS);
551 }
552 
554  return TTIImpl->isLegalToVectorizeLoad(LI);
555 }
556 
558  return TTIImpl->isLegalToVectorizeStore(SI);
559 }
560 
562  unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
563  return TTIImpl->isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
564  AddrSpace);
565 }
566 
568  unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
569  return TTIImpl->isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
570  AddrSpace);
571 }
572 
574  unsigned LoadSize,
575  unsigned ChainSizeInBytes,
576  VectorType *VecTy) const {
577  return TTIImpl->getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
578 }
579 
581  unsigned StoreSize,
582  unsigned ChainSizeInBytes,
583  VectorType *VecTy) const {
584  return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
585 }
586 
588  Type *Ty, ReductionFlags Flags) const {
589  return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags);
590 }
591 
593  return TTIImpl->shouldExpandReduction(II);
594 }
595 
596 int TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
597  return TTIImpl->getInstructionLatency(I);
598 }
599 
601  for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i)
602  if (Mask[i] >= 0 && Mask[i] != (int)(MaskSize - 1 - i))
603  return false;
604  return true;
605 }
606 
608  bool Vec0 = false;
609  bool Vec1 = false;
610  for (unsigned i = 0, NumVecElts = Mask.size(); i < NumVecElts; ++i) {
611  if (Mask[i] >= 0) {
612  if ((unsigned)Mask[i] >= NumVecElts)
613  Vec1 = true;
614  else
615  Vec0 = true;
616  }
617  }
618  return !(Vec0 && Vec1);
619 }
620 
622  for (unsigned i = 0; i < Mask.size(); ++i)
623  if (Mask[i] > 0)
624  return false;
625  return true;
626 }
627 
629  bool isAlternate = true;
630  unsigned MaskSize = Mask.size();
631 
632  // Example: shufflevector A, B, <0,5,2,7>
633  for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
634  if (Mask[i] < 0)
635  continue;
636  isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i);
637  }
638 
639  if (isAlternate)
640  return true;
641 
642  isAlternate = true;
643  // Example: shufflevector A, B, <4,1,6,3>
644  for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
645  if (Mask[i] < 0)
646  continue;
647  isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i);
648  }
649 
650  return isAlternate;
651 }
652 
656 
657  // Check for a splat of a constant or for a non uniform vector of constants.
658  if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
660  if (cast<Constant>(V)->getSplatValue() != nullptr)
662  }
663 
664  // Check for a splat of a uniform value. This is not loop aware, so return
665  // true only for the obviously uniform cases (argument, globalvalue)
666  const Value *Splat = getSplatValue(V);
667  if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
669 
670  return OpInfo;
671 }
672 
674  unsigned Level) {
675  // We don't need a shuffle if we just want to have element 0 in position 0 of
676  // the vector.
677  if (!SI && Level == 0 && IsLeft)
678  return true;
679  else if (!SI)
680  return false;
681 
683 
684  // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
685  // we look at the left or right side.
686  for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2)
687  Mask[i] = val;
688 
689  SmallVector<int, 16> ActualMask = SI->getShuffleMask();
690  return Mask == ActualMask;
691 }
692 
693 namespace {
694 /// Kind of the reduction data.
696  RK_None, /// Not a reduction.
697  RK_Arithmetic, /// Binary reduction data.
698  RK_MinMax, /// Min/max reduction data.
699  RK_UnsignedMinMax, /// Unsigned min/max reduction data.
700 };
701 /// Contains opcode + LHS/RHS parts of the reduction operations.
702 struct ReductionData {
703  ReductionData() = delete;
704  ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS)
705  : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) {
706  assert(Kind != RK_None && "expected binary or min/max reduction only.");
707  }
708  unsigned Opcode = 0;
709  Value *LHS = nullptr;
710  Value *RHS = nullptr;
711  ReductionKind Kind = RK_None;
712  bool hasSameData(ReductionData &RD) const {
713  return Kind == RD.Kind && Opcode == RD.Opcode;
714  }
715 };
716 } // namespace
717 
719  Value *L, *R;
720  if (m_BinOp(m_Value(L), m_Value(R)).match(I))
721  return ReductionData(RK_Arithmetic, I->getOpcode(), L, R);
722  if (auto *SI = dyn_cast<SelectInst>(I)) {
723  if (m_SMin(m_Value(L), m_Value(R)).match(SI) ||
724  m_SMax(m_Value(L), m_Value(R)).match(SI) ||
725  m_OrdFMin(m_Value(L), m_Value(R)).match(SI) ||
726  m_OrdFMax(m_Value(L), m_Value(R)).match(SI) ||
727  m_UnordFMin(m_Value(L), m_Value(R)).match(SI) ||
728  m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) {
729  auto *CI = cast<CmpInst>(SI->getCondition());
730  return ReductionData(RK_MinMax, CI->getOpcode(), L, R);
731  }
732  if (m_UMin(m_Value(L), m_Value(R)).match(SI) ||
733  m_UMax(m_Value(L), m_Value(R)).match(SI)) {
734  auto *CI = cast<CmpInst>(SI->getCondition());
735  return ReductionData(RK_UnsignedMinMax, CI->getOpcode(), L, R);
736  }
737  }
738  return llvm::None;
739 }
740 
742  unsigned Level,
743  unsigned NumLevels) {
744  // Match one level of pairwise operations.
745  // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
746  // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
747  // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
748  // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
749  // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
750  if (!I)
751  return RK_None;
752 
753  assert(I->getType()->isVectorTy() && "Expecting a vector type");
754 
756  if (!RD)
757  return RK_None;
758 
760  if (!LS && Level)
761  return RK_None;
763  if (!RS && Level)
764  return RK_None;
765 
766  // On level 0 we can omit one shufflevector instruction.
767  if (!Level && !RS && !LS)
768  return RK_None;
769 
770  // Shuffle inputs must match.
771  Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr;
772  Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr;
773  Value *NextLevelOp = nullptr;
774  if (NextLevelOpR && NextLevelOpL) {
775  // If we have two shuffles their operands must match.
776  if (NextLevelOpL != NextLevelOpR)
777  return RK_None;
778 
779  NextLevelOp = NextLevelOpL;
780  } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) {
781  // On the first level we can omit the shufflevector <0, undef,...>. So the
782  // input to the other shufflevector <1, undef> must match with one of the
783  // inputs to the current binary operation.
784  // Example:
785  // %NextLevelOpL = shufflevector %R, <1, undef ...>
786  // %BinOp = fadd %NextLevelOpL, %R
787  if (NextLevelOpL && NextLevelOpL != RD->RHS)
788  return RK_None;
789  else if (NextLevelOpR && NextLevelOpR != RD->LHS)
790  return RK_None;
791 
792  NextLevelOp = NextLevelOpL ? RD->RHS : RD->LHS;
793  } else
794  return RK_None;
795 
796  // Check that the next levels binary operation exists and matches with the
797  // current one.
798  if (Level + 1 != NumLevels) {
799  Optional<ReductionData> NextLevelRD =
800  getReductionData(cast<Instruction>(NextLevelOp));
801  if (!NextLevelRD || !RD->hasSameData(*NextLevelRD))
802  return RK_None;
803  }
804 
805  // Shuffle mask for pairwise operation must match.
806  if (matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)) {
807  if (!matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level))
808  return RK_None;
809  } else if (matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)) {
810  if (!matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level))
811  return RK_None;
812  } else {
813  return RK_None;
814  }
815 
816  if (++Level == NumLevels)
817  return RD->Kind;
818 
819  // Match next level.
820  return matchPairwiseReductionAtLevel(cast<Instruction>(NextLevelOp), Level,
821  NumLevels);
822 }
823 
825  unsigned &Opcode, Type *&Ty) {
826  if (!EnableReduxCost)
827  return RK_None;
828 
829  // Need to extract the first element.
830  ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
831  unsigned Idx = ~0u;
832  if (CI)
833  Idx = CI->getZExtValue();
834  if (Idx != 0)
835  return RK_None;
836 
837  auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
838  if (!RdxStart)
839  return RK_None;
841  if (!RD)
842  return RK_None;
843 
844  Type *VecTy = RdxStart->getType();
845  unsigned NumVecElems = VecTy->getVectorNumElements();
846  if (!isPowerOf2_32(NumVecElems))
847  return RK_None;
848 
849  // We look for a sequence of shuffle,shuffle,add triples like the following
850  // that builds a pairwise reduction tree.
851  //
852  // (X0, X1, X2, X3)
853  // (X0 + X1, X2 + X3, undef, undef)
854  // ((X0 + X1) + (X2 + X3), undef, undef, undef)
855  //
856  // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
857  // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
858  // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
859  // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
860  // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
861  // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
862  // <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
863  // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
864  // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
865  // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
866  // %r = extractelement <4 x float> %bin.rdx8, i32 0
867  if (matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) ==
868  RK_None)
869  return RK_None;
870 
871  Opcode = RD->Opcode;
872  Ty = VecTy;
873 
874  return RD->Kind;
875 }
876 
877 static std::pair<Value *, ShuffleVectorInst *>
879  ShuffleVectorInst *S = nullptr;
880 
881  if ((S = dyn_cast<ShuffleVectorInst>(L)))
882  return std::make_pair(R, S);
883 
884  S = dyn_cast<ShuffleVectorInst>(R);
885  return std::make_pair(L, S);
886 }
887 
888 static ReductionKind
890  unsigned &Opcode, Type *&Ty) {
891  if (!EnableReduxCost)
892  return RK_None;
893 
894  // Need to extract the first element.
895  ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
896  unsigned Idx = ~0u;
897  if (CI)
898  Idx = CI->getZExtValue();
899  if (Idx != 0)
900  return RK_None;
901 
902  auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
903  if (!RdxStart)
904  return RK_None;
906  if (!RD)
907  return RK_None;
908 
909  Type *VecTy = ReduxRoot->getOperand(0)->getType();
910  unsigned NumVecElems = VecTy->getVectorNumElements();
911  if (!isPowerOf2_32(NumVecElems))
912  return RK_None;
913 
914  // We look for a sequence of shuffles and adds like the following matching one
915  // fadd, shuffle vector pair at a time.
916  //
917  // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
918  // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
919  // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
920  // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
921  // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
922  // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
923  // %r = extractelement <4 x float> %bin.rdx8, i32 0
924 
925  unsigned MaskStart = 1;
926  Instruction *RdxOp = RdxStart;
927  SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
928  unsigned NumVecElemsRemain = NumVecElems;
929  while (NumVecElemsRemain - 1) {
930  // Check for the right reduction operation.
931  if (!RdxOp)
932  return RK_None;
933  Optional<ReductionData> RDLevel = getReductionData(RdxOp);
934  if (!RDLevel || !RDLevel->hasSameData(*RD))
935  return RK_None;
936 
937  Value *NextRdxOp;
938  ShuffleVectorInst *Shuffle;
939  std::tie(NextRdxOp, Shuffle) =
940  getShuffleAndOtherOprd(RDLevel->LHS, RDLevel->RHS);
941 
942  // Check the current reduction operation and the shuffle use the same value.
943  if (Shuffle == nullptr)
944  return RK_None;
945  if (Shuffle->getOperand(0) != NextRdxOp)
946  return RK_None;
947 
948  // Check that shuffle masks matches.
949  for (unsigned j = 0; j != MaskStart; ++j)
950  ShuffleMask[j] = MaskStart + j;
951  // Fill the rest of the mask with -1 for undef.
952  std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);
953 
955  if (ShuffleMask != Mask)
956  return RK_None;
957 
958  RdxOp = dyn_cast<Instruction>(NextRdxOp);
959  NumVecElemsRemain /= 2;
960  MaskStart *= 2;
961  }
962 
963  Opcode = RD->Opcode;
964  Ty = VecTy;
965  return RD->Kind;
966 }
967 
968 int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
969  switch (I->getOpcode()) {
970  case Instruction::GetElementPtr:
971  return getUserCost(I);
972 
973  case Instruction::Ret:
974  case Instruction::PHI:
975  case Instruction::Br: {
976  return getCFInstrCost(I->getOpcode());
977  }
978  case Instruction::Add:
979  case Instruction::FAdd:
980  case Instruction::Sub:
981  case Instruction::FSub:
982  case Instruction::Mul:
983  case Instruction::FMul:
984  case Instruction::UDiv:
985  case Instruction::SDiv:
986  case Instruction::FDiv:
987  case Instruction::URem:
988  case Instruction::SRem:
989  case Instruction::FRem:
990  case Instruction::Shl:
991  case Instruction::LShr:
992  case Instruction::AShr:
993  case Instruction::And:
994  case Instruction::Or:
995  case Instruction::Xor: {
997  getOperandInfo(I->getOperand(0));
999  getOperandInfo(I->getOperand(1));
1001  return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK,
1004  Operands);
1005  }
1006  case Instruction::Select: {
1007  const SelectInst *SI = cast<SelectInst>(I);
1008  Type *CondTy = SI->getCondition()->getType();
1009  return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I);
1010  }
1011  case Instruction::ICmp:
1012  case Instruction::FCmp: {
1013  Type *ValTy = I->getOperand(0)->getType();
1014  return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I);
1015  }
1016  case Instruction::Store: {
1017  const StoreInst *SI = cast<StoreInst>(I);
1018  Type *ValTy = SI->getValueOperand()->getType();
1019  return getMemoryOpCost(I->getOpcode(), ValTy,
1020  SI->getAlignment(),
1021  SI->getPointerAddressSpace(), I);
1022  }
1023  case Instruction::Load: {
1024  const LoadInst *LI = cast<LoadInst>(I);
1025  return getMemoryOpCost(I->getOpcode(), I->getType(),
1026  LI->getAlignment(),
1027  LI->getPointerAddressSpace(), I);
1028  }
1029  case Instruction::ZExt:
1030  case Instruction::SExt:
1031  case Instruction::FPToUI:
1032  case Instruction::FPToSI:
1033  case Instruction::FPExt:
1034  case Instruction::PtrToInt:
1035  case Instruction::IntToPtr:
1036  case Instruction::SIToFP:
1037  case Instruction::UIToFP:
1038  case Instruction::Trunc:
1039  case Instruction::FPTrunc:
1040  case Instruction::BitCast:
1041  case Instruction::AddrSpaceCast: {
1042  Type *SrcTy = I->getOperand(0)->getType();
1043  return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I);
1044  }
1045  case Instruction::ExtractElement: {
1046  const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
1048  unsigned Idx = -1;
1049  if (CI)
1050  Idx = CI->getZExtValue();
1051 
1052  // Try to match a reduction sequence (series of shufflevector and vector
1053  // adds followed by a extractelement).
1054  unsigned ReduxOpCode;
1055  Type *ReduxType;
1056 
1057  switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) {
1058  case RK_Arithmetic:
1059  return getArithmeticReductionCost(ReduxOpCode, ReduxType,
1060  /*IsPairwiseForm=*/false);
1061  case RK_MinMax:
1062  return getMinMaxReductionCost(
1063  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1064  /*IsPairwiseForm=*/false, /*IsUnsigned=*/false);
1065  case RK_UnsignedMinMax:
1066  return getMinMaxReductionCost(
1067  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1068  /*IsPairwiseForm=*/false, /*IsUnsigned=*/true);
1069  case RK_None:
1070  break;
1071  }
1072 
1073  switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) {
1074  case RK_Arithmetic:
1075  return getArithmeticReductionCost(ReduxOpCode, ReduxType,
1076  /*IsPairwiseForm=*/true);
1077  case RK_MinMax:
1078  return getMinMaxReductionCost(
1079  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1080  /*IsPairwiseForm=*/true, /*IsUnsigned=*/false);
1081  case RK_UnsignedMinMax:
1082  return getMinMaxReductionCost(
1083  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1084  /*IsPairwiseForm=*/true, /*IsUnsigned=*/true);
1085  case RK_None:
1086  break;
1087  }
1088 
1089  return getVectorInstrCost(I->getOpcode(),
1090  EEI->getOperand(0)->getType(), Idx);
1091  }
1092  case Instruction::InsertElement: {
1093  const InsertElementInst * IE = cast<InsertElementInst>(I);
1094  ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
1095  unsigned Idx = -1;
1096  if (CI)
1097  Idx = CI->getZExtValue();
1098  return getVectorInstrCost(I->getOpcode(),
1099  IE->getType(), Idx);
1100  }
1101  case Instruction::ShuffleVector: {
1102  const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
1103  Type *VecTypOp0 = Shuffle->getOperand(0)->getType();
1104  unsigned NumVecElems = VecTypOp0->getVectorNumElements();
1106 
1107  if (NumVecElems == Mask.size()) {
1108  if (isReverseVectorMask(Mask))
1110  0, nullptr);
1111  if (isAlternateVectorMask(Mask))
1113  VecTypOp0, 0, nullptr);
1114 
1115  if (isZeroEltBroadcastVectorMask(Mask))
1117  VecTypOp0, 0, nullptr);
1118 
1119  if (isSingleSourceVectorMask(Mask))
1121  VecTypOp0, 0, nullptr);
1122 
1124  VecTypOp0, 0, nullptr);
1125  }
1126 
1127  return -1;
1128  }
1129  case Instruction::Call:
1130  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1131  SmallVector<Value *, 4> Args(II->arg_operands());
1132 
1133  FastMathFlags FMF;
1134  if (auto *FPMO = dyn_cast<FPMathOperator>(II))
1135  FMF = FPMO->getFastMathFlags();
1136 
1137  return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
1138  Args, FMF);
1139  }
1140  return -1;
1141  default:
1142  // We don't have any information on this instruction.
1143  return -1;
1144  }
1145 }
1146 
1148 
1149 TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
1150 
1152  std::function<Result(const Function &)> TTICallback)
1153  : TTICallback(std::move(TTICallback)) {}
1154 
1157  return TTICallback(F);
1158 }
1159 
1160 AnalysisKey TargetIRAnalysis::Key;
1161 
1162 TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) {
1163  return Result(F.getParent()->getDataLayout());
1164 }
1165 
1166 // Register the basic pass.
1168  "Target Transform Information", false, true)
1170 
1171 void TargetTransformInfoWrapperPass::anchor() {}
1172 
1174  : ImmutablePass(ID) {
1177 }
1178 
1180  TargetIRAnalysis TIRA)
1181  : ImmutablePass(ID), TIRA(std::move(TIRA)) {
1184 }
1185 
1187  FunctionAnalysisManager DummyFAM;
1188  TTI = TIRA.run(F, DummyFAM);
1189  return *TTI;
1190 }
1191 
1192 ImmutablePass *
1194  return new TargetTransformInfoWrapperPass(std::move(TIRA));
1195 }
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
uint64_t CallInst * C
Value * getValueOperand()
Definition: Instructions.h:395
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:72
static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, unsigned Level)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
LLVMContext & Context
static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot, unsigned &Opcode, Type *&Ty)
TargetTransformInfo & operator=(TargetTransformInfo &&RHS)
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:1067
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isFPVectorizationPotentiallyUnsafe() const
Indicate that it is potentially unsafe to automatically vectorize floating-point operations because t...
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
const Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
static bool isAlternateVectorMask(ArrayRef< int > Mask)
bool supportsEfficientVectorElementLoadStore() const
If target has efficient vector element load/store instructions, it can return true here so that inser...
AMDGPU Rewrite Out Arguments
static ReductionKind matchPairwiseReductionAtLevel(Instruction *I, unsigned Level, unsigned NumLevels)
The main scalar evolution driver.
TargetTransformInfoWrapperPass()
We must provide a default constructor for the pass but it should never be used.
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value *> Operands) const
Estimate the cost of a GEP operation when lowered.
unsigned getNumberOfRegisters(bool Vector) const
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
Analysis pass providing the TargetTransformInfo.
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAlign, unsigned DestAlign) const
This instruction constructs a fixed permutation of two input vectors.
int getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type *> Tys) const
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace) const
F(f)
An instruction for reading from memory.
Definition: Instructions.h:164
MaxMin_match< FCmpInst, LHS, RHS, ufmax_pred_ty > m_UnordFMax(const LHS &L, const RHS &R)
Match an &#39;unordered&#39; floating point maximum function.
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
bool enableAggressiveInterleaving(bool LoopHasReductions) const
Don&#39;t restrict interleaved unrolling to small loops.
llvm::Optional< unsigned > getCacheAssociativity(CacheLevel Level) const
This file provides helpers for the implementation of a TargetTransformInfo-conforming class...
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
unsigned getJumpBufSize() const
Returns the target&#39;s jmp_buf size in bytes.
unsigned getMaxInterleaveFactor(unsigned VF) const
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Definition: BitVector.h:920
int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy=nullptr) const
Estimate the cost of a specific operation when lowered.
bool hasBranchDivergence() const
Return true if branch divergence exists.
This class represents the LLVM &#39;select&#39; instruction.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize) const
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr) const
bool enableMemCmpExpansion(unsigned &MaxLoadSize) const
Enable inline expansion of memcmp.
static AnalysisKey * ID()
Returns an opaque, unique ID for this analysis type.
Definition: PassManager.h:398
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
MaxMin_match< FCmpInst, LHS, RHS, ufmin_pred_ty > m_UnordFMin(const LHS &L, const RHS &R)
Match an &#39;unordered&#39; floating point minimum function.
bool isLegalMaskedScatter(Type *DataType) const
Return true if the target supports masked gather/scatter AVX-512 fully supports gather and scatter fo...
Choose alternate elements from vector.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:404
Class to represent function types.
Definition: DerivedTypes.h:103
unsigned getMinVectorRegisterBitWidth() const
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it&#39;s free to truncate a value of type Ty1 to type Ty2.
PopcntSupportKind
Flags indicating the kind of support for population count.
bool hasDivRemOp(Type *DataType, bool IsSigned) const
Return true if the target has a unified operation to calculate division and remainder.
MaxMin_match< FCmpInst, LHS, RHS, ofmin_pred_ty > m_OrdFMin(const LHS &L, const RHS &R)
Match an &#39;ordered&#39; floating point minimum function.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef< Value *> Args, FastMathFlags FMF, unsigned VF=1) const
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
static bool isSingleSourceVectorMask(ArrayRef< int > Mask)
An instruction for storing to memory.
Definition: Instructions.h:306
void initializeTargetTransformInfoWrapperPassPass(PassRegistry &)
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index=-1) const
bool isLegalToVectorizeLoad(LoadInst *LI) const
Reverse the order of the vector.
VectorType * getType() const
Overload to return most specific vector type.
amdgpu Simplify well known AMD library false Value * Callee
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
unsigned getJumpBufAlignment() const
Returns the target&#39;s jmp_buf alignment in bytes.
Value * getOperand(unsigned i) const
Definition: User.h:154
int getExtCost(const Instruction *I, const Value *Src) const
Estimate the cost of a EXT operation when lowered.
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
static cl::opt< bool > EnableReduxCost("costmodel-reduxcost", cl::init(false), cl::Hidden, cl::desc("Recognize reduction patterns."))
unsigned getMaxPrefetchIterationsAhead() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
This instruction inserts a single (scalar) element into a VectorType value.
static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V)
int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment) const
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
Wrapper pass for TargetTransformInfo.
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
bool isLegalToVectorizeStore(StoreInst *SI) const
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:421
unsigned getRegisterBitWidth(bool Vector) const
Flags describing the kind of vector reduction.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
bool isAlwaysUniform(const Value *V) const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
unsigned getNumberOfParts(Type *Tp) const
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
This is an important base class in LLVM.
Definition: Constant.h:42
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target...
TargetIRAnalysis()
Default construct a target IR analysis.
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy=nullptr, const Instruction *I=nullptr) const
bool isLegalMaskedStore(Type *DataType) const
Return true if the target supports masked load/store AVX2 and AVX-512 targets allow masks for consecu...
Merge elements from two source vectors into one with any shuffle mask.
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:75
const Value * getCondition() const
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
int getCallCost(FunctionType *FTy, int NumArgs=-1) const
Estimate the cost of a function call when lowered.
const AMDGPUAS & AS
static std::pair< Value *, ShuffleVectorInst * > getShuffleAndOtherOprd(Value *L, Value *R)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:34
OperandValueProperties
Additional properties of an operand&#39;s values.
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
ReductionKind
Kind of the reduction data.
int getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type...
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
ImmutablePass class - This class is used to provide information that does not need to be run...
Definition: Pass.h:256
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C1.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static bool isReverseVectorMask(ArrayRef< int > Mask)
int getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) const
Calculate the cost of performing a vector reduction.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
Module.h This file contains the declarations for the Module class.
unsigned getCostOfKeepingLiveOverCall(ArrayRef< Type *> Tys) const
int getIntImmCost(const APInt &Imm, Type *Ty) const
Return the expected cost of materializing for the given integer immediate of the specified type...
bool isLegalMaskedGather(Type *DataType) const
AddressSpace
Definition: NVPTXBaseInfo.h:22
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const
static cl::opt< bool > UseWideMemcpyLoopLowering("use-wide-memcpy-loop-lowering", cl::init(false), cl::desc("Enables the new wide memcpy loop lowering in Transforms/Utils."), cl::Hidden)
bool isSourceOfDivergence(const Value *V) const
Returns whether V is a source of divergence.
bool LSRWithInstrQueries() const
Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...
static ReductionKind matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, unsigned &Opcode, Type *&Ty)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:531
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:462
Class to represent vector types.
Definition: DerivedTypes.h:393
Class for arbitrary precision integers.
Definition: APInt.h:69
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index=-1) const
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
Result run(const Function &F, FunctionAnalysisManager &)
bool shouldExpandReduction(const IntrinsicInst *II) const
int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const
Return the expected cost for the given integer when optimising for size.
amdgpu Simplify well known AMD library false Value Value * Arg
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, unsigned Alignment=1, bool *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isLegalMaskedLoad(Type *DataType) const
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
unsigned getAtomicMemIntrinsicMaxElementSize() const
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:226
This class represents an analyzed expression in the program.
int getUserCost(const User *U, ArrayRef< const Value *> Operands) const
Estimate the cost of a given IR user when lowered.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:420
Parameters that control the generic loop unrolling transformation.
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
#define I(x, y, z)
Definition: MD5.cpp:58
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
iterator_range< value_op_iterator > operand_values()
Definition: User.h:246
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
This instruction extracts a single (scalar) element from a VectorType value.
int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index=0, Type *SubTp=nullptr) const
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>()) const
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:351
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:276
const unsigned Kind
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
Multiway switch.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
TargetTransformInfo Result
MaxMin_match< FCmpInst, LHS, RHS, ofmax_pred_ty > m_OrdFMax(const LHS &L, const RHS &R)
Match an &#39;ordered&#39; floating point maximum function.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> ParamTys) const
Estimate the cost of an intrinsic when lowered.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:545
LLVM Value Representation.
Definition: Value.h:73
unsigned getInliningThresholdMultiplier() const
bool useReductionIntrinsic(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
static bool isZeroEltBroadcastVectorMask(ArrayRef< int > Mask)
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
bool enableInterleavedAccessVectorization() const
Enable matching of interleaved access groups.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
Broadcast element 0 to all other elements.
static Optional< ReductionData > getReductionData(Instruction *I)
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type *> &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const
int getCFInstrCost(unsigned Opcode) const
bool prefersVectorizedAddressing() const
Return true if target doesn&#39;t mind addresses in vectors.
unsigned getOperandsScalarizationOverhead(ArrayRef< const Value *> Args, unsigned VF) const
print Print MemDeps of function
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP) const
Get target-customized preferences for the generic loop unrolling transformation.
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:160
OperandValueKind
Additional information about an operand&#39;s possible values.
A container for analyses that lazily runs them and caches their results.
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
This pass exposes codegen information to IR-level passes.
TargetTransformInfo & getTTI(const Function &F)
CacheLevel
The possible cache levels.
static void getShuffleMask(Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr) const
VectorType * getType() const
Overload to return most specific vector type.
unsigned getFlatAddressSpace() const
Returns the address space ID for a target&#39;s &#39;flat&#39; address space.
Information about a load/store intrinsic defined by the target.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:70
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm, bool IsUnsigned) const
llvm::Optional< unsigned > getCacheSize(CacheLevel Level) const
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Shuffle elements of single source vector with any shuffle mask.