LLVM  8.0.0svn
AArch64TargetTransformInfo.cpp
Go to the documentation of this file.
1 //===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
12 #include "llvm/Analysis/LoopInfo.h"
15 #include "llvm/CodeGen/CostTable.h"
17 #include "llvm/IR/IntrinsicInst.h"
18 #include "llvm/Support/Debug.h"
19 #include <algorithm>
20 using namespace llvm;
21 
22 #define DEBUG_TYPE "aarch64tti"
23 
24 static cl::opt<bool> EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix",
25  cl::init(true), cl::Hidden);
26 
28  const Function *Callee) const {
29  const TargetMachine &TM = getTLI()->getTargetMachine();
30 
31  const FeatureBitset &CallerBits =
32  TM.getSubtargetImpl(*Caller)->getFeatureBits();
33  const FeatureBitset &CalleeBits =
34  TM.getSubtargetImpl(*Callee)->getFeatureBits();
35 
36  // Inline a callee if its target-features are a subset of the callers
37  // target-features.
38  return (CallerBits & CalleeBits) == CalleeBits;
39 }
40 
41 /// Calculate the cost of materializing a 64-bit value. This helper
42 /// method might only calculate a fraction of a larger immediate. Therefore it
43 /// is valid to return a cost of ZERO.
45  // Check if the immediate can be encoded within an instruction.
46  if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
47  return 0;
48 
49  if (Val < 0)
50  Val = ~Val;
51 
52  // Calculate how many moves we will need to materialize this constant.
53  unsigned LZ = countLeadingZeros((uint64_t)Val);
54  return (64 - LZ + 15) / 16;
55 }
56 
57 /// Calculate the cost of materializing the given constant.
59  assert(Ty->isIntegerTy());
60 
61  unsigned BitSize = Ty->getPrimitiveSizeInBits();
62  if (BitSize == 0)
63  return ~0U;
64 
65  // Sign-extend all constants to a multiple of 64-bit.
66  APInt ImmVal = Imm;
67  if (BitSize & 0x3f)
68  ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
69 
70  // Split the constant into 64-bit chunks and calculate the cost for each
71  // chunk.
72  int Cost = 0;
73  for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
74  APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
75  int64_t Val = Tmp.getSExtValue();
76  Cost += getIntImmCost(Val);
77  }
78  // We need at least one instruction to materialze the constant.
79  return std::max(1, Cost);
80 }
81 
82 int AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
83  const APInt &Imm, Type *Ty) {
84  assert(Ty->isIntegerTy());
85 
86  unsigned BitSize = Ty->getPrimitiveSizeInBits();
87  // There is no cost model for constants with a bit size of 0. Return TCC_Free
88  // here, so that constant hoisting will ignore this constant.
89  if (BitSize == 0)
90  return TTI::TCC_Free;
91 
92  unsigned ImmIdx = ~0U;
93  switch (Opcode) {
94  default:
95  return TTI::TCC_Free;
96  case Instruction::GetElementPtr:
97  // Always hoist the base address of a GetElementPtr.
98  if (Idx == 0)
99  return 2 * TTI::TCC_Basic;
100  return TTI::TCC_Free;
101  case Instruction::Store:
102  ImmIdx = 0;
103  break;
104  case Instruction::Add:
105  case Instruction::Sub:
106  case Instruction::Mul:
107  case Instruction::UDiv:
108  case Instruction::SDiv:
109  case Instruction::URem:
110  case Instruction::SRem:
111  case Instruction::And:
112  case Instruction::Or:
113  case Instruction::Xor:
114  case Instruction::ICmp:
115  ImmIdx = 1;
116  break;
117  // Always return TCC_Free for the shift value of a shift instruction.
118  case Instruction::Shl:
119  case Instruction::LShr:
120  case Instruction::AShr:
121  if (Idx == 1)
122  return TTI::TCC_Free;
123  break;
124  case Instruction::Trunc:
125  case Instruction::ZExt:
126  case Instruction::SExt:
127  case Instruction::IntToPtr:
128  case Instruction::PtrToInt:
129  case Instruction::BitCast:
130  case Instruction::PHI:
131  case Instruction::Call:
132  case Instruction::Select:
133  case Instruction::Ret:
134  case Instruction::Load:
135  break;
136  }
137 
138  if (Idx == ImmIdx) {
139  int NumConstants = (BitSize + 63) / 64;
140  int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
141  return (Cost <= NumConstants * TTI::TCC_Basic)
142  ? static_cast<int>(TTI::TCC_Free)
143  : Cost;
144  }
145  return AArch64TTIImpl::getIntImmCost(Imm, Ty);
146 }
147 
149  const APInt &Imm, Type *Ty) {
150  assert(Ty->isIntegerTy());
151 
152  unsigned BitSize = Ty->getPrimitiveSizeInBits();
153  // There is no cost model for constants with a bit size of 0. Return TCC_Free
154  // here, so that constant hoisting will ignore this constant.
155  if (BitSize == 0)
156  return TTI::TCC_Free;
157 
158  switch (IID) {
159  default:
160  return TTI::TCC_Free;
161  case Intrinsic::sadd_with_overflow:
162  case Intrinsic::uadd_with_overflow:
163  case Intrinsic::ssub_with_overflow:
164  case Intrinsic::usub_with_overflow:
165  case Intrinsic::smul_with_overflow:
166  case Intrinsic::umul_with_overflow:
167  if (Idx == 1) {
168  int NumConstants = (BitSize + 63) / 64;
169  int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
170  return (Cost <= NumConstants * TTI::TCC_Basic)
171  ? static_cast<int>(TTI::TCC_Free)
172  : Cost;
173  }
174  break;
175  case Intrinsic::experimental_stackmap:
176  if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
177  return TTI::TCC_Free;
178  break;
179  case Intrinsic::experimental_patchpoint_void:
180  case Intrinsic::experimental_patchpoint_i64:
181  if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
182  return TTI::TCC_Free;
183  break;
184  }
185  return AArch64TTIImpl::getIntImmCost(Imm, Ty);
186 }
187 
190  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
191  if (TyWidth == 32 || TyWidth == 64)
192  return TTI::PSK_FastHardware;
193  // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
194  return TTI::PSK_Software;
195 }
196 
197 bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
199 
200  // A helper that returns a vector type from the given type. The number of
201  // elements in type Ty determine the vector width.
202  auto toVectorTy = [&](Type *ArgTy) {
203  return VectorType::get(ArgTy->getScalarType(),
204  DstTy->getVectorNumElements());
205  };
206 
207  // Exit early if DstTy is not a vector type whose elements are at least
208  // 16-bits wide.
209  if (!DstTy->isVectorTy() || DstTy->getScalarSizeInBits() < 16)
210  return false;
211 
212  // Determine if the operation has a widening variant. We consider both the
213  // "long" (e.g., usubl) and "wide" (e.g., usubw) versions of the
214  // instructions.
215  //
216  // TODO: Add additional widening operations (e.g., mul, shl, etc.) once we
217  // verify that their extending operands are eliminated during code
218  // generation.
219  switch (Opcode) {
220  case Instruction::Add: // UADDL(2), SADDL(2), UADDW(2), SADDW(2).
221  case Instruction::Sub: // USUBL(2), SSUBL(2), USUBW(2), SSUBW(2).
222  break;
223  default:
224  return false;
225  }
226 
227  // To be a widening instruction (either the "wide" or "long" versions), the
228  // second operand must be a sign- or zero extend having a single user. We
229  // only consider extends having a single user because they may otherwise not
230  // be eliminated.
231  if (Args.size() != 2 ||
232  (!isa<SExtInst>(Args[1]) && !isa<ZExtInst>(Args[1])) ||
233  !Args[1]->hasOneUse())
234  return false;
235  auto *Extend = cast<CastInst>(Args[1]);
236 
237  // Legalize the destination type and ensure it can be used in a widening
238  // operation.
239  auto DstTyL = TLI->getTypeLegalizationCost(DL, DstTy);
240  unsigned DstElTySize = DstTyL.second.getScalarSizeInBits();
241  if (!DstTyL.second.isVector() || DstElTySize != DstTy->getScalarSizeInBits())
242  return false;
243 
244  // Legalize the source type and ensure it can be used in a widening
245  // operation.
246  Type *SrcTy = toVectorTy(Extend->getSrcTy());
247  auto SrcTyL = TLI->getTypeLegalizationCost(DL, SrcTy);
248  unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
249  if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits())
250  return false;
251 
252  // Get the total number of vector elements in the legalized types.
253  unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorNumElements();
254  unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorNumElements();
255 
256  // Return true if the legalized types have the same number of vector elements
257  // and the destination element type size is twice that of the source type.
258  return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstElTySize;
259 }
260 
261 int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
262  const Instruction *I) {
263  int ISD = TLI->InstructionOpcodeToISD(Opcode);
264  assert(ISD && "Invalid opcode");
265 
266  // If the cast is observable, and it is used by a widening instruction (e.g.,
267  // uaddl, saddw, etc.), it may be free.
268  if (I && I->hasOneUse()) {
269  auto *SingleUser = cast<Instruction>(*I->user_begin());
270  SmallVector<const Value *, 4> Operands(SingleUser->operand_values());
271  if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands)) {
272  // If the cast is the second operand, it is free. We will generate either
273  // a "wide" or "long" version of the widening instruction.
274  if (I == SingleUser->getOperand(1))
275  return 0;
276  // If the cast is not the second operand, it will be free if it looks the
277  // same as the second operand. In this case, we will generate a "long"
278  // version of the widening instruction.
279  if (auto *Cast = dyn_cast<CastInst>(SingleUser->getOperand(1)))
280  if (I->getOpcode() == unsigned(Cast->getOpcode()) &&
281  cast<CastInst>(I)->getSrcTy() == Cast->getSrcTy())
282  return 0;
283  }
284  }
285 
286  EVT SrcTy = TLI->getValueType(DL, Src);
287  EVT DstTy = TLI->getValueType(DL, Dst);
288 
289  if (!SrcTy.isSimple() || !DstTy.isSimple())
290  return BaseT::getCastInstrCost(Opcode, Dst, Src);
291 
292  static const TypeConversionCostTblEntry
293  ConversionTbl[] = {
298 
299  // The number of shll instructions for the extension.
316 
317  // LowerVectorINT_TO_FP:
324 
325  // Complex: to v2f32
332 
333  // Complex: to v4f32
338 
339  // Complex: to v8f32
344 
345  // Complex: to v16f32
348 
349  // Complex: to v2f64
356 
357 
358  // LowerVectorFP_TO_INT
365 
366  // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext).
373 
374  // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2
379 
380  // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
387  };
388 
389  if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
390  DstTy.getSimpleVT(),
391  SrcTy.getSimpleVT()))
392  return Entry->Cost;
393 
394  return BaseT::getCastInstrCost(Opcode, Dst, Src);
395 }
396 
398  VectorType *VecTy,
399  unsigned Index) {
400 
401  // Make sure we were given a valid extend opcode.
402  assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
403  "Invalid opcode");
404 
405  // We are extending an element we extract from a vector, so the source type
406  // of the extend is the element type of the vector.
407  auto *Src = VecTy->getElementType();
408 
409  // Sign- and zero-extends are for integer types only.
410  assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) && "Invalid type");
411 
412  // Get the cost for the extract. We compute the cost (if any) for the extend
413  // below.
414  auto Cost = getVectorInstrCost(Instruction::ExtractElement, VecTy, Index);
415 
416  // Legalize the types.
417  auto VecLT = TLI->getTypeLegalizationCost(DL, VecTy);
418  auto DstVT = TLI->getValueType(DL, Dst);
419  auto SrcVT = TLI->getValueType(DL, Src);
420 
421  // If the resulting type is still a vector and the destination type is legal,
422  // we may get the extension for free. If not, get the default cost for the
423  // extend.
424  if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
425  return Cost + getCastInstrCost(Opcode, Dst, Src);
426 
427  // The destination type should be larger than the element type. If not, get
428  // the default cost for the extend.
429  if (DstVT.getSizeInBits() < SrcVT.getSizeInBits())
430  return Cost + getCastInstrCost(Opcode, Dst, Src);
431 
432  switch (Opcode) {
433  default:
434  llvm_unreachable("Opcode should be either SExt or ZExt");
435 
436  // For sign-extends, we only need a smov, which performs the extension
437  // automatically.
438  case Instruction::SExt:
439  return Cost;
440 
441  // For zero-extends, the extend is performed automatically by a umov unless
442  // the destination type is i64 and the element type is i8 or i16.
443  case Instruction::ZExt:
444  if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
445  return Cost;
446  }
447 
448  // If we are unable to perform the extend for free, get the default cost.
449  return Cost + getCastInstrCost(Opcode, Dst, Src);
450 }
451 
452 int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
453  unsigned Index) {
454  assert(Val->isVectorTy() && "This must be a vector type");
455 
456  if (Index != -1U) {
457  // Legalize the type.
458  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
459 
460  // This type is legalized to a scalar type.
461  if (!LT.second.isVector())
462  return 0;
463 
464  // The type may be split. Normalize the index to the new type.
465  unsigned Width = LT.second.getVectorNumElements();
466  Index = Index % Width;
467 
468  // The element at index zero is already inside the vector.
469  if (Index == 0)
470  return 0;
471  }
472 
473  // All other insert/extracts cost this much.
474  return ST->getVectorInsertExtractBaseCost();
475 }
476 
478  unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
479  TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
481  // Legalize the type.
482  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
483 
484  // If the instruction is a widening instruction (e.g., uaddl, saddw, etc.),
485  // add in the widening overhead specified by the sub-target. Since the
486  // extends feeding widening instructions are performed automatically, they
487  // aren't present in the generated code and have a zero cost. By adding a
488  // widening overhead here, we attach the total cost of the combined operation
489  // to the widening instruction.
490  int Cost = 0;
491  if (isWideningInstruction(Ty, Opcode, Args))
492  Cost += ST->getWideningBaseCost();
493 
494  int ISD = TLI->InstructionOpcodeToISD(Opcode);
495 
496  switch (ISD) {
497  default:
498  return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
499  Opd1PropInfo, Opd2PropInfo);
500  case ISD::SDIV:
502  Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
503  // On AArch64, scalar signed division by constants power-of-two are
504  // normally expanded to the sequence ADD + CMP + SELECT + SRA.
505  // The OperandValue properties many not be same as that of previous
506  // operation; conservatively assume OP_None.
507  Cost += getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
510  Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
513  Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info,
516  Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info,
519  return Cost;
520  }
522  case ISD::UDIV:
524  auto VT = TLI->getValueType(DL, Ty);
525  if (TLI->isOperationLegalOrCustom(ISD::MULHU, VT)) {
526  // Vector signed division by constant are expanded to the
527  // sequence MULHS + ADD/SUB + SRA + SRL + ADD, and unsigned division
528  // to MULHS + SUB + SRL + ADD + SRL.
529  int MulCost = getArithmeticInstrCost(Instruction::Mul, Ty, Opd1Info,
530  Opd2Info,
533  int AddCost = getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info,
534  Opd2Info,
537  int ShrCost = getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info,
538  Opd2Info,
541  return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
542  }
543  }
544 
545  Cost += BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
546  Opd1PropInfo, Opd2PropInfo);
547  if (Ty->isVectorTy()) {
548  // On AArch64, vector divisions are not supported natively and are
549  // expanded into scalar divisions of each pair of elements.
550  Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, Opd1Info,
551  Opd2Info, Opd1PropInfo, Opd2PropInfo);
552  Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, Opd1Info,
553  Opd2Info, Opd1PropInfo, Opd2PropInfo);
554  // TODO: if one of the arguments is scalar, then it's not necessary to
555  // double the cost of handling the vector elements.
556  Cost += Cost;
557  }
558  return Cost;
559 
560  case ISD::ADD:
561  case ISD::MUL:
562  case ISD::XOR:
563  case ISD::OR:
564  case ISD::AND:
565  // These nodes are marked as 'custom' for combining purposes only.
566  // We know that they are legal. See LowerAdd in ISelLowering.
567  return (Cost + 1) * LT.first;
568  }
569 }
570 
572  const SCEV *Ptr) {
573  // Address computations in vectorized code with non-consecutive addresses will
574  // likely result in more instructions compared to scalar code where the
575  // computation can more often be merged into the index mode. The resulting
576  // extra micro-ops can significantly decrease throughput.
577  unsigned NumVectorInstToHideOverhead = 10;
578  int MaxMergeDistance = 64;
579 
580  if (Ty->isVectorTy() && SE &&
581  !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
582  return NumVectorInstToHideOverhead;
583 
584  // In many cases the address computation is not merged into the instruction
585  // addressing mode.
586  return 1;
587 }
588 
589 int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
590  Type *CondTy, const Instruction *I) {
591 
592  int ISD = TLI->InstructionOpcodeToISD(Opcode);
593  // We don't lower some vector selects well that are wider than the register
594  // width.
595  if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
596  // We would need this many instructions to hide the scalarization happening.
597  const int AmortizationCost = 20;
598  static const TypeConversionCostTblEntry
599  VectorSelectTbl[] = {
603  { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
604  { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
605  { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
606  };
607 
608  EVT SelCondTy = TLI->getValueType(DL, CondTy);
609  EVT SelValTy = TLI->getValueType(DL, ValTy);
610  if (SelCondTy.isSimple() && SelValTy.isSimple()) {
611  if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD,
612  SelCondTy.getSimpleVT(),
613  SelValTy.getSimpleVT()))
614  return Entry->Cost;
615  }
616  }
617  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
618 }
619 
620 int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
621  unsigned Alignment, unsigned AddressSpace,
622  const Instruction *I) {
623  auto LT = TLI->getTypeLegalizationCost(DL, Ty);
624 
625  if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
626  LT.second.is128BitVector() && Alignment < 16) {
627  // Unaligned stores are extremely inefficient. We don't split all
628  // unaligned 128-bit stores because the negative impact that has shown in
629  // practice on inlined block copy code.
630  // We make such stores expensive so that we will only vectorize if there
631  // are 6 other instructions getting vectorized.
632  const int AmortizationCost = 6;
633 
634  return LT.first * 2 * AmortizationCost;
635  }
636 
637  if (Ty->isVectorTy() && Ty->getVectorElementType()->isIntegerTy(8)) {
638  unsigned ProfitableNumElements;
639  if (Opcode == Instruction::Store)
640  // We use a custom trunc store lowering so v.4b should be profitable.
641  ProfitableNumElements = 4;
642  else
643  // We scalarize the loads because there is not v.4b register and we
644  // have to promote the elements to v.2.
645  ProfitableNumElements = 8;
646 
647  if (Ty->getVectorNumElements() < ProfitableNumElements) {
648  unsigned NumVecElts = Ty->getVectorNumElements();
649  unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
650  // We generate 2 instructions per vector element.
651  return NumVectorizableInstsToAmortize * NumVecElts * 2;
652  }
653  }
654 
655  return LT.first;
656 }
657 
659  unsigned Factor,
660  ArrayRef<unsigned> Indices,
661  unsigned Alignment,
662  unsigned AddressSpace,
663  bool UseMaskForCond,
664  bool UseMaskForGaps) {
665  assert(Factor >= 2 && "Invalid interleave factor");
666  assert(isa<VectorType>(VecTy) && "Expect a vector type");
667 
668  if (!UseMaskForCond && !UseMaskForGaps &&
669  Factor <= TLI->getMaxSupportedInterleaveFactor()) {
670  unsigned NumElts = VecTy->getVectorNumElements();
671  auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
672 
673  // ldN/stN only support legal vector types of size 64 or 128 in bits.
674  // Accesses having vector types that are a multiple of 128 bits can be
675  // matched to more than one ldN/stN instruction.
676  if (NumElts % Factor == 0 &&
677  TLI->isLegalInterleavedAccessType(SubVecTy, DL))
678  return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
679  }
680 
681  return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
682  Alignment, AddressSpace,
683  UseMaskForCond, UseMaskForGaps);
684 }
685 
687  int Cost = 0;
688  for (auto *I : Tys) {
689  if (!I->isVectorTy())
690  continue;
691  if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
692  Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) +
694  }
695  return Cost;
696 }
697 
699  return ST->getMaxInterleaveFactor();
700 }
701 
702 // For Falkor, we want to avoid having too many strided loads in a loop since
703 // that can exhaust the HW prefetcher resources. We adjust the unroller
704 // MaxCount preference below to attempt to ensure unrolling doesn't create too
705 // many strided loads.
706 static void
709  enum { MaxStridedLoads = 7 };
710  auto countStridedLoads = [](Loop *L, ScalarEvolution &SE) {
711  int StridedLoads = 0;
712  // FIXME? We could make this more precise by looking at the CFG and
713  // e.g. not counting loads in each side of an if-then-else diamond.
714  for (const auto BB : L->blocks()) {
715  for (auto &I : *BB) {
716  LoadInst *LMemI = dyn_cast<LoadInst>(&I);
717  if (!LMemI)
718  continue;
719 
720  Value *PtrValue = LMemI->getPointerOperand();
721  if (L->isLoopInvariant(PtrValue))
722  continue;
723 
724  const SCEV *LSCEV = SE.getSCEV(PtrValue);
725  const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
726  if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
727  continue;
728 
729  // FIXME? We could take pairing of unrolled load copies into account
730  // by looking at the AddRec, but we would probably have to limit this
731  // to loops with no stores or other memory optimization barriers.
732  ++StridedLoads;
733  // We've seen enough strided loads that seeing more won't make a
734  // difference.
735  if (StridedLoads > MaxStridedLoads / 2)
736  return StridedLoads;
737  }
738  }
739  return StridedLoads;
740  };
741 
742  int StridedLoads = countStridedLoads(L, SE);
743  LLVM_DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoads
744  << " strided loads\n");
745  // Pick the largest power of 2 unroll count that won't result in too many
746  // strided loads.
747  if (StridedLoads) {
748  UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads);
749  LLVM_DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to "
750  << UP.MaxCount << '\n');
751  }
752 }
753 
756  // Enable partial unrolling and runtime unrolling.
758 
759  // For inner loop, it is more likely to be a hot one, and the runtime check
760  // can be promoted out from LICM pass, so the overhead is less, let's try
761  // a larger threshold to unroll more loops.
762  if (L->getLoopDepth() > 1)
763  UP.PartialThreshold *= 2;
764 
765  // Disable partial & runtime unrolling on -Os.
767 
771 }
772 
774  Type *ExpectedType) {
775  switch (Inst->getIntrinsicID()) {
776  default:
777  return nullptr;
778  case Intrinsic::aarch64_neon_st2:
779  case Intrinsic::aarch64_neon_st3:
780  case Intrinsic::aarch64_neon_st4: {
781  // Create a struct type
782  StructType *ST = dyn_cast<StructType>(ExpectedType);
783  if (!ST)
784  return nullptr;
785  unsigned NumElts = Inst->getNumArgOperands() - 1;
786  if (ST->getNumElements() != NumElts)
787  return nullptr;
788  for (unsigned i = 0, e = NumElts; i != e; ++i) {
789  if (Inst->getArgOperand(i)->getType() != ST->getElementType(i))
790  return nullptr;
791  }
792  Value *Res = UndefValue::get(ExpectedType);
793  IRBuilder<> Builder(Inst);
794  for (unsigned i = 0, e = NumElts; i != e; ++i) {
795  Value *L = Inst->getArgOperand(i);
796  Res = Builder.CreateInsertValue(Res, L, i);
797  }
798  return Res;
799  }
800  case Intrinsic::aarch64_neon_ld2:
801  case Intrinsic::aarch64_neon_ld3:
802  case Intrinsic::aarch64_neon_ld4:
803  if (Inst->getType() == ExpectedType)
804  return Inst;
805  return nullptr;
806  }
807 }
808 
810  MemIntrinsicInfo &Info) {
811  switch (Inst->getIntrinsicID()) {
812  default:
813  break;
814  case Intrinsic::aarch64_neon_ld2:
815  case Intrinsic::aarch64_neon_ld3:
816  case Intrinsic::aarch64_neon_ld4:
817  Info.ReadMem = true;
818  Info.WriteMem = false;
819  Info.PtrVal = Inst->getArgOperand(0);
820  break;
821  case Intrinsic::aarch64_neon_st2:
822  case Intrinsic::aarch64_neon_st3:
823  case Intrinsic::aarch64_neon_st4:
824  Info.ReadMem = false;
825  Info.WriteMem = true;
826  Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
827  break;
828  }
829 
830  switch (Inst->getIntrinsicID()) {
831  default:
832  return false;
833  case Intrinsic::aarch64_neon_ld2:
834  case Intrinsic::aarch64_neon_st2:
835  Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
836  break;
837  case Intrinsic::aarch64_neon_ld3:
838  case Intrinsic::aarch64_neon_st3:
839  Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
840  break;
841  case Intrinsic::aarch64_neon_ld4:
842  case Intrinsic::aarch64_neon_st4:
843  Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
844  break;
845  }
846  return true;
847 }
848 
849 /// See if \p I should be considered for address type promotion. We check if \p
850 /// I is a sext with right type and used in memory accesses. If it used in a
851 /// "complex" getelementptr, we allow it to be promoted without finding other
852 /// sext instructions that sign extended the same initial value. A getelementptr
853 /// is considered as "complex" if it has more than 2 operands.
855  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
856  bool Considerable = false;
857  AllowPromotionWithoutCommonHeader = false;
858  if (!isa<SExtInst>(&I))
859  return false;
860  Type *ConsideredSExtType =
862  if (I.getType() != ConsideredSExtType)
863  return false;
864  // See if the sext is the one with the right type and used in at least one
865  // GetElementPtrInst.
866  for (const User *U : I.users()) {
867  if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
868  Considerable = true;
869  // A getelementptr is considered as "complex" if it has more than 2
870  // operands. We will promote a SExt used in such complex GEP as we
871  // expect some computation to be merged if they are done on 64 bits.
872  if (GEPInst->getNumOperands() > 2) {
873  AllowPromotionWithoutCommonHeader = true;
874  break;
875  }
876  }
877  }
878  return Considerable;
879 }
880 
882  return ST->getCacheLineSize();
883 }
884 
886  return ST->getPrefetchDistance();
887 }
888 
890  return ST->getMinPrefetchStride();
891 }
892 
894  return ST->getMaxPrefetchIterationsAhead();
895 }
896 
897 bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
898  TTI::ReductionFlags Flags) const {
899  assert(isa<VectorType>(Ty) && "Expected Ty to be a vector type");
900  unsigned ScalarBits = Ty->getScalarSizeInBits();
901  switch (Opcode) {
902  case Instruction::FAdd:
903  case Instruction::FMul:
904  case Instruction::And:
905  case Instruction::Or:
906  case Instruction::Xor:
907  case Instruction::Mul:
908  return false;
909  case Instruction::Add:
910  return ScalarBits * Ty->getVectorNumElements() >= 128;
911  case Instruction::ICmp:
912  return (ScalarBits < 64) &&
913  (ScalarBits * Ty->getVectorNumElements() >= 128);
914  case Instruction::FCmp:
915  return Flags.NoNaN;
916  default:
917  llvm_unreachable("Unhandled reduction opcode");
918  }
919  return false;
920 }
921 
923  bool IsPairwiseForm) {
924 
925  if (IsPairwiseForm)
926  return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
927 
928  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
929  MVT MTy = LT.second;
930  int ISD = TLI->InstructionOpcodeToISD(Opcode);
931  assert(ISD && "Invalid opcode");
932 
933  // Horizontal adds can use the 'addv' instruction. We model the cost of these
934  // instructions as normal vector adds. This is the only arithmetic vector
935  // reduction operation for which we have an instruction.
936  static const CostTblEntry CostTblNoPairwise[]{
937  {ISD::ADD, MVT::v8i8, 1},
938  {ISD::ADD, MVT::v16i8, 1},
939  {ISD::ADD, MVT::v4i16, 1},
940  {ISD::ADD, MVT::v8i16, 1},
941  {ISD::ADD, MVT::v4i32, 1},
942  };
943 
944  if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy))
945  return LT.first * Entry->Cost;
946 
947  return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
948 }
949 
951  Type *SubTp) {
952  if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose ||
953  Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc) {
954  static const CostTblEntry ShuffleTbl[] = {
955  // Broadcast shuffle kinds can be performed with 'dup'.
966  // Transpose shuffle kinds can be performed with 'trn1/trn2' and
967  // 'zip1/zip2' instructions.
978  // Select shuffle kinds.
979  // TODO: handle vXi8/vXi16.
980  { TTI::SK_Select, MVT::v2i32, 1 }, // mov.
981  { TTI::SK_Select, MVT::v4i32, 2 }, // rev+trn (or similar).
982  { TTI::SK_Select, MVT::v2i64, 1 }, // mov.
983  { TTI::SK_Select, MVT::v2f32, 1 }, // mov.
984  { TTI::SK_Select, MVT::v4f32, 2 }, // rev+trn (or similar).
985  { TTI::SK_Select, MVT::v2f64, 1 }, // mov.
986  // PermuteSingleSrc shuffle kinds.
987  // TODO: handle vXi8/vXi16.
988  { TTI::SK_PermuteSingleSrc, MVT::v2i32, 1 }, // mov.
989  { TTI::SK_PermuteSingleSrc, MVT::v4i32, 3 }, // perfectshuffle worst case.
990  { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // mov.
991  { TTI::SK_PermuteSingleSrc, MVT::v2f32, 1 }, // mov.
992  { TTI::SK_PermuteSingleSrc, MVT::v4f32, 3 }, // perfectshuffle worst case.
993  { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // mov.
994  };
995  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
996  if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))
997  return LT.first * Entry->Cost;
998  }
999 
1000  return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
1001 }
Type * getVectorElementType() const
Definition: Type.h:371
static cl::opt< bool > EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix", cl::init(true), cl::Hidden)
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >())
Definition: BasicTTIImpl.h:568
bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, int64_t MergeDistance)
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:834
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
bool isMisaligned128StoreSlow() const
This class represents lattice values for constants.
Definition: AllocatorList.h:24
Type * getElementType(unsigned N) const
Definition: DerivedTypes.h:314
Cost tables and simple lookup functions.
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition: LoopInfo.h:92
unsigned getNumElements() const
Random access to the elements.
Definition: DerivedTypes.h:313
The main scalar evolution driver.
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
Type Conversion Cost Table.
Definition: CostTable.h:45
An instruction for reading from memory.
Definition: Instructions.h:168
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
unsigned getMaxInterleaveFactor(unsigned VF)
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
static void getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE, TargetTransformInfo::UnrollingPreferences &UP)
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:189
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>())
Cost Table Entry.
Definition: CostTable.h:25
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1509
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1079
unsigned getCacheLineSize() const
const FeatureBitset & getFeatureBits() const
Class to represent struct types.
Definition: DerivedTypes.h:201
int getIntImmCost(const APInt &Imm, Type *Ty)
Calculate the cost of materializing the given constant.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:743
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise)
Try to calculate arithmetic and shuffle op costs for reduction operations.
const TypeConversionCostTblEntry * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntry > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table, TypeTy must be comparable to CompareTy by ==. ...
Definition: CostTable.h:55
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I)
Definition: BasicTTIImpl.h:772
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1575
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:634
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:471
This node represents a polynomial recurrence on the trip count of the specified loop.
PopcntSupportKind
Flags indicating the kind of support for population count.
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:884
unsigned getPrefetchDistance() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
Selects elements from the corresponding lane of either source operand.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
unsigned getWideningBaseCost() const
Value * getOperand(unsigned i) const
Definition: User.h:170
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:517
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:304
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:843
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
Definition: BasicTTIImpl.h:424
bool useReductionIntrinsic(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:419
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
Container class for subtarget features.
int getIntImmCost(int64_t Val)
Calculate the cost of materializing a 64-bit value.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:429
Machine Value Type.
Flags describing the kind of vector reduction.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: BasicTTIImpl.h:850
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)
See if I should be considered for address type promotion.
Expected to fold away in lowering.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
Value * getPointerOperand()
Definition: Instructions.h:274
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I=nullptr)
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
This file a TargetTransformInfo::Concept conforming object specific to the AArch64 target machine...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:194
Extended Value Type.
Definition: ValueTypes.h:34
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1411
const TargetMachine & getTargetMachine() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:58
OperandValueProperties
Additional properties of an operand&#39;s values.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
unsigned getMinPrefetchStride() const
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
Definition: BasicTTIImpl.h:615
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition: APInt.h:947
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
int getCostOfKeepingLiveOverCall(ArrayRef< Type *> Tys)
AddressSpace
Definition: NVPTXBaseInfo.h:22
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target&#39;s TargetSubtargetInf...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:539
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:462
Class to represent vector types.
Definition: DerivedTypes.h:393
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Class for arbitrary precision integers.
Definition: APInt.h:70
unsigned getMaxInterleaveFactor() const
int getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm)
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:413
iterator_range< user_iterator > users()
Definition: Value.h:400
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:461
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
const CostTblEntry * CostTableLookup(ArrayRef< CostTblEntry > Tbl, int ISD, MVT Ty)
Find in cost table, TypeTy must be comparable to CompareTy by ==.
Definition: CostTable.h:32
unsigned getNumArgOperands() const
Definition: InstrTypes.h:1077
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:380
This class represents an analyzed expression in the program.
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:459
Parameters that control the generic loop unrolling transformation.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
#define I(x, y, z)
Definition: MD5.cpp:58
ARMProcFamilyEnum getProcFamily() const
Returns ARM processor family.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
const unsigned Kind
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:376
The cost of a typical &#39;add&#39; instruction.
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
unsigned getMaxPrefetchIterationsAhead() const
LLVM Value Representation.
Definition: Value.h:73
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:606
Broadcast element 0 to all other elements.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
Type * getElementType() const
Definition: DerivedTypes.h:360
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:413
OperandValueKind
Additional information about an operand&#39;s possible values.
This pass exposes codegen information to IR-level passes.
Conversion operators.
Definition: ISDOpcodes.h:458
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:1983
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:467
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:126
#define LLVM_DEBUG(X)
Definition: Debug.h:123
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
Information about a load/store intrinsic defined by the target.
bool NoNaN
If op is an fp min/max, whether NaNs may be present.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
Definition: Instruction.h:67
unsigned getVectorInsertExtractBaseCost() const
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Shuffle elements of single source vector with any shuffle mask.
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:373