LLVM  9.0.0svn
AArch64TargetTransformInfo.cpp
Go to the documentation of this file.
1 //===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
11 #include "llvm/Analysis/LoopInfo.h"
14 #include "llvm/CodeGen/CostTable.h"
16 #include "llvm/IR/IntrinsicInst.h"
17 #include "llvm/Support/Debug.h"
18 #include <algorithm>
19 using namespace llvm;
20 
21 #define DEBUG_TYPE "aarch64tti"
22 
23 static cl::opt<bool> EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix",
24  cl::init(true), cl::Hidden);
25 
27  const Function *Callee) const {
28  const TargetMachine &TM = getTLI()->getTargetMachine();
29 
30  const FeatureBitset &CallerBits =
31  TM.getSubtargetImpl(*Caller)->getFeatureBits();
32  const FeatureBitset &CalleeBits =
33  TM.getSubtargetImpl(*Callee)->getFeatureBits();
34 
35  // Inline a callee if its target-features are a subset of the callers
36  // target-features.
37  return (CallerBits & CalleeBits) == CalleeBits;
38 }
39 
40 /// Calculate the cost of materializing a 64-bit value. This helper
41 /// method might only calculate a fraction of a larger immediate. Therefore it
42 /// is valid to return a cost of ZERO.
44  // Check if the immediate can be encoded within an instruction.
45  if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
46  return 0;
47 
48  if (Val < 0)
49  Val = ~Val;
50 
51  // Calculate how many moves we will need to materialize this constant.
52  unsigned LZ = countLeadingZeros((uint64_t)Val);
53  return (64 - LZ + 15) / 16;
54 }
55 
56 /// Calculate the cost of materializing the given constant.
58  assert(Ty->isIntegerTy());
59 
60  unsigned BitSize = Ty->getPrimitiveSizeInBits();
61  if (BitSize == 0)
62  return ~0U;
63 
64  // Sign-extend all constants to a multiple of 64-bit.
65  APInt ImmVal = Imm;
66  if (BitSize & 0x3f)
67  ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
68 
69  // Split the constant into 64-bit chunks and calculate the cost for each
70  // chunk.
71  int Cost = 0;
72  for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
73  APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
74  int64_t Val = Tmp.getSExtValue();
75  Cost += getIntImmCost(Val);
76  }
77  // We need at least one instruction to materialze the constant.
78  return std::max(1, Cost);
79 }
80 
81 int AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
82  const APInt &Imm, Type *Ty) {
83  assert(Ty->isIntegerTy());
84 
85  unsigned BitSize = Ty->getPrimitiveSizeInBits();
86  // There is no cost model for constants with a bit size of 0. Return TCC_Free
87  // here, so that constant hoisting will ignore this constant.
88  if (BitSize == 0)
89  return TTI::TCC_Free;
90 
91  unsigned ImmIdx = ~0U;
92  switch (Opcode) {
93  default:
94  return TTI::TCC_Free;
95  case Instruction::GetElementPtr:
96  // Always hoist the base address of a GetElementPtr.
97  if (Idx == 0)
98  return 2 * TTI::TCC_Basic;
99  return TTI::TCC_Free;
100  case Instruction::Store:
101  ImmIdx = 0;
102  break;
103  case Instruction::Add:
104  case Instruction::Sub:
105  case Instruction::Mul:
106  case Instruction::UDiv:
107  case Instruction::SDiv:
108  case Instruction::URem:
109  case Instruction::SRem:
110  case Instruction::And:
111  case Instruction::Or:
112  case Instruction::Xor:
113  case Instruction::ICmp:
114  ImmIdx = 1;
115  break;
116  // Always return TCC_Free for the shift value of a shift instruction.
117  case Instruction::Shl:
118  case Instruction::LShr:
119  case Instruction::AShr:
120  if (Idx == 1)
121  return TTI::TCC_Free;
122  break;
123  case Instruction::Trunc:
124  case Instruction::ZExt:
125  case Instruction::SExt:
126  case Instruction::IntToPtr:
127  case Instruction::PtrToInt:
128  case Instruction::BitCast:
129  case Instruction::PHI:
130  case Instruction::Call:
131  case Instruction::Select:
132  case Instruction::Ret:
133  case Instruction::Load:
134  break;
135  }
136 
137  if (Idx == ImmIdx) {
138  int NumConstants = (BitSize + 63) / 64;
139  int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
140  return (Cost <= NumConstants * TTI::TCC_Basic)
141  ? static_cast<int>(TTI::TCC_Free)
142  : Cost;
143  }
144  return AArch64TTIImpl::getIntImmCost(Imm, Ty);
145 }
146 
148  const APInt &Imm, Type *Ty) {
149  assert(Ty->isIntegerTy());
150 
151  unsigned BitSize = Ty->getPrimitiveSizeInBits();
152  // There is no cost model for constants with a bit size of 0. Return TCC_Free
153  // here, so that constant hoisting will ignore this constant.
154  if (BitSize == 0)
155  return TTI::TCC_Free;
156 
157  switch (IID) {
158  default:
159  return TTI::TCC_Free;
160  case Intrinsic::sadd_with_overflow:
161  case Intrinsic::uadd_with_overflow:
162  case Intrinsic::ssub_with_overflow:
163  case Intrinsic::usub_with_overflow:
164  case Intrinsic::smul_with_overflow:
165  case Intrinsic::umul_with_overflow:
166  if (Idx == 1) {
167  int NumConstants = (BitSize + 63) / 64;
168  int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
169  return (Cost <= NumConstants * TTI::TCC_Basic)
170  ? static_cast<int>(TTI::TCC_Free)
171  : Cost;
172  }
173  break;
174  case Intrinsic::experimental_stackmap:
175  if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
176  return TTI::TCC_Free;
177  break;
178  case Intrinsic::experimental_patchpoint_void:
179  case Intrinsic::experimental_patchpoint_i64:
180  if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
181  return TTI::TCC_Free;
182  break;
183  }
184  return AArch64TTIImpl::getIntImmCost(Imm, Ty);
185 }
186 
189  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
190  if (TyWidth == 32 || TyWidth == 64)
191  return TTI::PSK_FastHardware;
192  // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
193  return TTI::PSK_Software;
194 }
195 
196 bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
198 
199  // A helper that returns a vector type from the given type. The number of
200  // elements in type Ty determine the vector width.
201  auto toVectorTy = [&](Type *ArgTy) {
202  return VectorType::get(ArgTy->getScalarType(),
203  DstTy->getVectorNumElements());
204  };
205 
206  // Exit early if DstTy is not a vector type whose elements are at least
207  // 16-bits wide.
208  if (!DstTy->isVectorTy() || DstTy->getScalarSizeInBits() < 16)
209  return false;
210 
211  // Determine if the operation has a widening variant. We consider both the
212  // "long" (e.g., usubl) and "wide" (e.g., usubw) versions of the
213  // instructions.
214  //
215  // TODO: Add additional widening operations (e.g., mul, shl, etc.) once we
216  // verify that their extending operands are eliminated during code
217  // generation.
218  switch (Opcode) {
219  case Instruction::Add: // UADDL(2), SADDL(2), UADDW(2), SADDW(2).
220  case Instruction::Sub: // USUBL(2), SSUBL(2), USUBW(2), SSUBW(2).
221  break;
222  default:
223  return false;
224  }
225 
226  // To be a widening instruction (either the "wide" or "long" versions), the
227  // second operand must be a sign- or zero extend having a single user. We
228  // only consider extends having a single user because they may otherwise not
229  // be eliminated.
230  if (Args.size() != 2 ||
231  (!isa<SExtInst>(Args[1]) && !isa<ZExtInst>(Args[1])) ||
232  !Args[1]->hasOneUse())
233  return false;
234  auto *Extend = cast<CastInst>(Args[1]);
235 
236  // Legalize the destination type and ensure it can be used in a widening
237  // operation.
238  auto DstTyL = TLI->getTypeLegalizationCost(DL, DstTy);
239  unsigned DstElTySize = DstTyL.second.getScalarSizeInBits();
240  if (!DstTyL.second.isVector() || DstElTySize != DstTy->getScalarSizeInBits())
241  return false;
242 
243  // Legalize the source type and ensure it can be used in a widening
244  // operation.
245  Type *SrcTy = toVectorTy(Extend->getSrcTy());
246  auto SrcTyL = TLI->getTypeLegalizationCost(DL, SrcTy);
247  unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
248  if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits())
249  return false;
250 
251  // Get the total number of vector elements in the legalized types.
252  unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorNumElements();
253  unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorNumElements();
254 
255  // Return true if the legalized types have the same number of vector elements
256  // and the destination element type size is twice that of the source type.
257  return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstElTySize;
258 }
259 
260 int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
261  const Instruction *I) {
262  int ISD = TLI->InstructionOpcodeToISD(Opcode);
263  assert(ISD && "Invalid opcode");
264 
265  // If the cast is observable, and it is used by a widening instruction (e.g.,
266  // uaddl, saddw, etc.), it may be free.
267  if (I && I->hasOneUse()) {
268  auto *SingleUser = cast<Instruction>(*I->user_begin());
269  SmallVector<const Value *, 4> Operands(SingleUser->operand_values());
270  if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands)) {
271  // If the cast is the second operand, it is free. We will generate either
272  // a "wide" or "long" version of the widening instruction.
273  if (I == SingleUser->getOperand(1))
274  return 0;
275  // If the cast is not the second operand, it will be free if it looks the
276  // same as the second operand. In this case, we will generate a "long"
277  // version of the widening instruction.
278  if (auto *Cast = dyn_cast<CastInst>(SingleUser->getOperand(1)))
279  if (I->getOpcode() == unsigned(Cast->getOpcode()) &&
280  cast<CastInst>(I)->getSrcTy() == Cast->getSrcTy())
281  return 0;
282  }
283  }
284 
285  EVT SrcTy = TLI->getValueType(DL, Src);
286  EVT DstTy = TLI->getValueType(DL, Dst);
287 
288  if (!SrcTy.isSimple() || !DstTy.isSimple())
289  return BaseT::getCastInstrCost(Opcode, Dst, Src);
290 
291  static const TypeConversionCostTblEntry
292  ConversionTbl[] = {
297 
298  // The number of shll instructions for the extension.
315 
316  // LowerVectorINT_TO_FP:
323 
324  // Complex: to v2f32
331 
332  // Complex: to v4f32
337 
338  // Complex: to v8f32
343 
344  // Complex: to v16f32
347 
348  // Complex: to v2f64
355 
356 
357  // LowerVectorFP_TO_INT
364 
365  // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext).
372 
373  // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2
378 
379  // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
386  };
387 
388  if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
389  DstTy.getSimpleVT(),
390  SrcTy.getSimpleVT()))
391  return Entry->Cost;
392 
393  return BaseT::getCastInstrCost(Opcode, Dst, Src);
394 }
395 
397  VectorType *VecTy,
398  unsigned Index) {
399 
400  // Make sure we were given a valid extend opcode.
401  assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
402  "Invalid opcode");
403 
404  // We are extending an element we extract from a vector, so the source type
405  // of the extend is the element type of the vector.
406  auto *Src = VecTy->getElementType();
407 
408  // Sign- and zero-extends are for integer types only.
409  assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) && "Invalid type");
410 
411  // Get the cost for the extract. We compute the cost (if any) for the extend
412  // below.
413  auto Cost = getVectorInstrCost(Instruction::ExtractElement, VecTy, Index);
414 
415  // Legalize the types.
416  auto VecLT = TLI->getTypeLegalizationCost(DL, VecTy);
417  auto DstVT = TLI->getValueType(DL, Dst);
418  auto SrcVT = TLI->getValueType(DL, Src);
419 
420  // If the resulting type is still a vector and the destination type is legal,
421  // we may get the extension for free. If not, get the default cost for the
422  // extend.
423  if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
424  return Cost + getCastInstrCost(Opcode, Dst, Src);
425 
426  // The destination type should be larger than the element type. If not, get
427  // the default cost for the extend.
428  if (DstVT.getSizeInBits() < SrcVT.getSizeInBits())
429  return Cost + getCastInstrCost(Opcode, Dst, Src);
430 
431  switch (Opcode) {
432  default:
433  llvm_unreachable("Opcode should be either SExt or ZExt");
434 
435  // For sign-extends, we only need a smov, which performs the extension
436  // automatically.
437  case Instruction::SExt:
438  return Cost;
439 
440  // For zero-extends, the extend is performed automatically by a umov unless
441  // the destination type is i64 and the element type is i8 or i16.
442  case Instruction::ZExt:
443  if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
444  return Cost;
445  }
446 
447  // If we are unable to perform the extend for free, get the default cost.
448  return Cost + getCastInstrCost(Opcode, Dst, Src);
449 }
450 
451 int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
452  unsigned Index) {
453  assert(Val->isVectorTy() && "This must be a vector type");
454 
455  if (Index != -1U) {
456  // Legalize the type.
457  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
458 
459  // This type is legalized to a scalar type.
460  if (!LT.second.isVector())
461  return 0;
462 
463  // The type may be split. Normalize the index to the new type.
464  unsigned Width = LT.second.getVectorNumElements();
465  Index = Index % Width;
466 
467  // The element at index zero is already inside the vector.
468  if (Index == 0)
469  return 0;
470  }
471 
472  // All other insert/extracts cost this much.
473  return ST->getVectorInsertExtractBaseCost();
474 }
475 
477  unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
478  TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
480  // Legalize the type.
481  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
482 
483  // If the instruction is a widening instruction (e.g., uaddl, saddw, etc.),
484  // add in the widening overhead specified by the sub-target. Since the
485  // extends feeding widening instructions are performed automatically, they
486  // aren't present in the generated code and have a zero cost. By adding a
487  // widening overhead here, we attach the total cost of the combined operation
488  // to the widening instruction.
489  int Cost = 0;
490  if (isWideningInstruction(Ty, Opcode, Args))
491  Cost += ST->getWideningBaseCost();
492 
493  int ISD = TLI->InstructionOpcodeToISD(Opcode);
494 
495  switch (ISD) {
496  default:
497  return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
498  Opd1PropInfo, Opd2PropInfo);
499  case ISD::SDIV:
501  Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
502  // On AArch64, scalar signed division by constants power-of-two are
503  // normally expanded to the sequence ADD + CMP + SELECT + SRA.
504  // The OperandValue properties many not be same as that of previous
505  // operation; conservatively assume OP_None.
506  Cost += getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
509  Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
512  Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info,
515  Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info,
518  return Cost;
519  }
521  case ISD::UDIV:
523  auto VT = TLI->getValueType(DL, Ty);
524  if (TLI->isOperationLegalOrCustom(ISD::MULHU, VT)) {
525  // Vector signed division by constant are expanded to the
526  // sequence MULHS + ADD/SUB + SRA + SRL + ADD, and unsigned division
527  // to MULHS + SUB + SRL + ADD + SRL.
528  int MulCost = getArithmeticInstrCost(Instruction::Mul, Ty, Opd1Info,
529  Opd2Info,
532  int AddCost = getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info,
533  Opd2Info,
536  int ShrCost = getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info,
537  Opd2Info,
540  return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
541  }
542  }
543 
544  Cost += BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
545  Opd1PropInfo, Opd2PropInfo);
546  if (Ty->isVectorTy()) {
547  // On AArch64, vector divisions are not supported natively and are
548  // expanded into scalar divisions of each pair of elements.
549  Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, Opd1Info,
550  Opd2Info, Opd1PropInfo, Opd2PropInfo);
551  Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, Opd1Info,
552  Opd2Info, Opd1PropInfo, Opd2PropInfo);
553  // TODO: if one of the arguments is scalar, then it's not necessary to
554  // double the cost of handling the vector elements.
555  Cost += Cost;
556  }
557  return Cost;
558 
559  case ISD::ADD:
560  case ISD::MUL:
561  case ISD::XOR:
562  case ISD::OR:
563  case ISD::AND:
564  // These nodes are marked as 'custom' for combining purposes only.
565  // We know that they are legal. See LowerAdd in ISelLowering.
566  return (Cost + 1) * LT.first;
567  }
568 }
569 
571  const SCEV *Ptr) {
572  // Address computations in vectorized code with non-consecutive addresses will
573  // likely result in more instructions compared to scalar code where the
574  // computation can more often be merged into the index mode. The resulting
575  // extra micro-ops can significantly decrease throughput.
576  unsigned NumVectorInstToHideOverhead = 10;
577  int MaxMergeDistance = 64;
578 
579  if (Ty->isVectorTy() && SE &&
580  !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
581  return NumVectorInstToHideOverhead;
582 
583  // In many cases the address computation is not merged into the instruction
584  // addressing mode.
585  return 1;
586 }
587 
588 int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
589  Type *CondTy, const Instruction *I) {
590 
591  int ISD = TLI->InstructionOpcodeToISD(Opcode);
592  // We don't lower some vector selects well that are wider than the register
593  // width.
594  if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
595  // We would need this many instructions to hide the scalarization happening.
596  const int AmortizationCost = 20;
597  static const TypeConversionCostTblEntry
598  VectorSelectTbl[] = {
602  { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
603  { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
604  { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
605  };
606 
607  EVT SelCondTy = TLI->getValueType(DL, CondTy);
608  EVT SelValTy = TLI->getValueType(DL, ValTy);
609  if (SelCondTy.isSimple() && SelValTy.isSimple()) {
610  if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD,
611  SelCondTy.getSimpleVT(),
612  SelValTy.getSimpleVT()))
613  return Entry->Cost;
614  }
615  }
616  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
617 }
618 
619 int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
620  unsigned Alignment, unsigned AddressSpace,
621  const Instruction *I) {
622  auto LT = TLI->getTypeLegalizationCost(DL, Ty);
623 
624  if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
625  LT.second.is128BitVector() && Alignment < 16) {
626  // Unaligned stores are extremely inefficient. We don't split all
627  // unaligned 128-bit stores because the negative impact that has shown in
628  // practice on inlined block copy code.
629  // We make such stores expensive so that we will only vectorize if there
630  // are 6 other instructions getting vectorized.
631  const int AmortizationCost = 6;
632 
633  return LT.first * 2 * AmortizationCost;
634  }
635 
636  if (Ty->isVectorTy() && Ty->getVectorElementType()->isIntegerTy(8)) {
637  unsigned ProfitableNumElements;
638  if (Opcode == Instruction::Store)
639  // We use a custom trunc store lowering so v.4b should be profitable.
640  ProfitableNumElements = 4;
641  else
642  // We scalarize the loads because there is not v.4b register and we
643  // have to promote the elements to v.2.
644  ProfitableNumElements = 8;
645 
646  if (Ty->getVectorNumElements() < ProfitableNumElements) {
647  unsigned NumVecElts = Ty->getVectorNumElements();
648  unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
649  // We generate 2 instructions per vector element.
650  return NumVectorizableInstsToAmortize * NumVecElts * 2;
651  }
652  }
653 
654  return LT.first;
655 }
656 
658  unsigned Factor,
659  ArrayRef<unsigned> Indices,
660  unsigned Alignment,
661  unsigned AddressSpace,
662  bool UseMaskForCond,
663  bool UseMaskForGaps) {
664  assert(Factor >= 2 && "Invalid interleave factor");
665  assert(isa<VectorType>(VecTy) && "Expect a vector type");
666 
667  if (!UseMaskForCond && !UseMaskForGaps &&
668  Factor <= TLI->getMaxSupportedInterleaveFactor()) {
669  unsigned NumElts = VecTy->getVectorNumElements();
670  auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
671 
672  // ldN/stN only support legal vector types of size 64 or 128 in bits.
673  // Accesses having vector types that are a multiple of 128 bits can be
674  // matched to more than one ldN/stN instruction.
675  if (NumElts % Factor == 0 &&
676  TLI->isLegalInterleavedAccessType(SubVecTy, DL))
677  return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
678  }
679 
680  return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
681  Alignment, AddressSpace,
682  UseMaskForCond, UseMaskForGaps);
683 }
684 
686  int Cost = 0;
687  for (auto *I : Tys) {
688  if (!I->isVectorTy())
689  continue;
690  if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
691  Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) +
693  }
694  return Cost;
695 }
696 
698  return ST->getMaxInterleaveFactor();
699 }
700 
701 // For Falkor, we want to avoid having too many strided loads in a loop since
702 // that can exhaust the HW prefetcher resources. We adjust the unroller
703 // MaxCount preference below to attempt to ensure unrolling doesn't create too
704 // many strided loads.
705 static void
708  enum { MaxStridedLoads = 7 };
709  auto countStridedLoads = [](Loop *L, ScalarEvolution &SE) {
710  int StridedLoads = 0;
711  // FIXME? We could make this more precise by looking at the CFG and
712  // e.g. not counting loads in each side of an if-then-else diamond.
713  for (const auto BB : L->blocks()) {
714  for (auto &I : *BB) {
715  LoadInst *LMemI = dyn_cast<LoadInst>(&I);
716  if (!LMemI)
717  continue;
718 
719  Value *PtrValue = LMemI->getPointerOperand();
720  if (L->isLoopInvariant(PtrValue))
721  continue;
722 
723  const SCEV *LSCEV = SE.getSCEV(PtrValue);
724  const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
725  if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
726  continue;
727 
728  // FIXME? We could take pairing of unrolled load copies into account
729  // by looking at the AddRec, but we would probably have to limit this
730  // to loops with no stores or other memory optimization barriers.
731  ++StridedLoads;
732  // We've seen enough strided loads that seeing more won't make a
733  // difference.
734  if (StridedLoads > MaxStridedLoads / 2)
735  return StridedLoads;
736  }
737  }
738  return StridedLoads;
739  };
740 
741  int StridedLoads = countStridedLoads(L, SE);
742  LLVM_DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoads
743  << " strided loads\n");
744  // Pick the largest power of 2 unroll count that won't result in too many
745  // strided loads.
746  if (StridedLoads) {
747  UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads);
748  LLVM_DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to "
749  << UP.MaxCount << '\n');
750  }
751 }
752 
755  // Enable partial unrolling and runtime unrolling.
757 
758  // For inner loop, it is more likely to be a hot one, and the runtime check
759  // can be promoted out from LICM pass, so the overhead is less, let's try
760  // a larger threshold to unroll more loops.
761  if (L->getLoopDepth() > 1)
762  UP.PartialThreshold *= 2;
763 
764  // Disable partial & runtime unrolling on -Os.
766 
770 }
771 
773  Type *ExpectedType) {
774  switch (Inst->getIntrinsicID()) {
775  default:
776  return nullptr;
777  case Intrinsic::aarch64_neon_st2:
778  case Intrinsic::aarch64_neon_st3:
779  case Intrinsic::aarch64_neon_st4: {
780  // Create a struct type
781  StructType *ST = dyn_cast<StructType>(ExpectedType);
782  if (!ST)
783  return nullptr;
784  unsigned NumElts = Inst->getNumArgOperands() - 1;
785  if (ST->getNumElements() != NumElts)
786  return nullptr;
787  for (unsigned i = 0, e = NumElts; i != e; ++i) {
788  if (Inst->getArgOperand(i)->getType() != ST->getElementType(i))
789  return nullptr;
790  }
791  Value *Res = UndefValue::get(ExpectedType);
792  IRBuilder<> Builder(Inst);
793  for (unsigned i = 0, e = NumElts; i != e; ++i) {
794  Value *L = Inst->getArgOperand(i);
795  Res = Builder.CreateInsertValue(Res, L, i);
796  }
797  return Res;
798  }
799  case Intrinsic::aarch64_neon_ld2:
800  case Intrinsic::aarch64_neon_ld3:
801  case Intrinsic::aarch64_neon_ld4:
802  if (Inst->getType() == ExpectedType)
803  return Inst;
804  return nullptr;
805  }
806 }
807 
810  switch (Inst->getIntrinsicID()) {
811  default:
812  break;
813  case Intrinsic::aarch64_neon_ld2:
814  case Intrinsic::aarch64_neon_ld3:
815  case Intrinsic::aarch64_neon_ld4:
816  Info.ReadMem = true;
817  Info.WriteMem = false;
818  Info.PtrVal = Inst->getArgOperand(0);
819  break;
820  case Intrinsic::aarch64_neon_st2:
821  case Intrinsic::aarch64_neon_st3:
822  case Intrinsic::aarch64_neon_st4:
823  Info.ReadMem = false;
824  Info.WriteMem = true;
825  Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
826  break;
827  }
828 
829  switch (Inst->getIntrinsicID()) {
830  default:
831  return false;
832  case Intrinsic::aarch64_neon_ld2:
833  case Intrinsic::aarch64_neon_st2:
834  Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
835  break;
836  case Intrinsic::aarch64_neon_ld3:
837  case Intrinsic::aarch64_neon_st3:
838  Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
839  break;
840  case Intrinsic::aarch64_neon_ld4:
841  case Intrinsic::aarch64_neon_st4:
842  Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
843  break;
844  }
845  return true;
846 }
847 
848 /// See if \p I should be considered for address type promotion. We check if \p
849 /// I is a sext with right type and used in memory accesses. If it used in a
850 /// "complex" getelementptr, we allow it to be promoted without finding other
851 /// sext instructions that sign extended the same initial value. A getelementptr
852 /// is considered as "complex" if it has more than 2 operands.
854  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
855  bool Considerable = false;
856  AllowPromotionWithoutCommonHeader = false;
857  if (!isa<SExtInst>(&I))
858  return false;
859  Type *ConsideredSExtType =
861  if (I.getType() != ConsideredSExtType)
862  return false;
863  // See if the sext is the one with the right type and used in at least one
864  // GetElementPtrInst.
865  for (const User *U : I.users()) {
866  if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
867  Considerable = true;
868  // A getelementptr is considered as "complex" if it has more than 2
869  // operands. We will promote a SExt used in such complex GEP as we
870  // expect some computation to be merged if they are done on 64 bits.
871  if (GEPInst->getNumOperands() > 2) {
872  AllowPromotionWithoutCommonHeader = true;
873  break;
874  }
875  }
876  }
877  return Considerable;
878 }
879 
881  return ST->getCacheLineSize();
882 }
883 
885  return ST->getPrefetchDistance();
886 }
887 
889  return ST->getMinPrefetchStride();
890 }
891 
893  return ST->getMaxPrefetchIterationsAhead();
894 }
895 
896 bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
897  TTI::ReductionFlags Flags) const {
898  assert(isa<VectorType>(Ty) && "Expected Ty to be a vector type");
899  unsigned ScalarBits = Ty->getScalarSizeInBits();
900  switch (Opcode) {
901  case Instruction::FAdd:
902  case Instruction::FMul:
903  case Instruction::And:
904  case Instruction::Or:
905  case Instruction::Xor:
906  case Instruction::Mul:
907  return false;
908  case Instruction::Add:
909  return ScalarBits * Ty->getVectorNumElements() >= 128;
910  case Instruction::ICmp:
911  return (ScalarBits < 64) &&
912  (ScalarBits * Ty->getVectorNumElements() >= 128);
913  case Instruction::FCmp:
914  return Flags.NoNaN;
915  default:
916  llvm_unreachable("Unhandled reduction opcode");
917  }
918  return false;
919 }
920 
922  bool IsPairwiseForm) {
923 
924  if (IsPairwiseForm)
925  return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
926 
927  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
928  MVT MTy = LT.second;
929  int ISD = TLI->InstructionOpcodeToISD(Opcode);
930  assert(ISD && "Invalid opcode");
931 
932  // Horizontal adds can use the 'addv' instruction. We model the cost of these
933  // instructions as normal vector adds. This is the only arithmetic vector
934  // reduction operation for which we have an instruction.
935  static const CostTblEntry CostTblNoPairwise[]{
936  {ISD::ADD, MVT::v8i8, 1},
937  {ISD::ADD, MVT::v16i8, 1},
938  {ISD::ADD, MVT::v4i16, 1},
939  {ISD::ADD, MVT::v8i16, 1},
940  {ISD::ADD, MVT::v4i32, 1},
941  };
942 
943  if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy))
944  return LT.first * Entry->Cost;
945 
946  return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
947 }
948 
950  Type *SubTp) {
951  if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose ||
952  Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc) {
953  static const CostTblEntry ShuffleTbl[] = {
954  // Broadcast shuffle kinds can be performed with 'dup'.
965  // Transpose shuffle kinds can be performed with 'trn1/trn2' and
966  // 'zip1/zip2' instructions.
977  // Select shuffle kinds.
978  // TODO: handle vXi8/vXi16.
979  { TTI::SK_Select, MVT::v2i32, 1 }, // mov.
980  { TTI::SK_Select, MVT::v4i32, 2 }, // rev+trn (or similar).
981  { TTI::SK_Select, MVT::v2i64, 1 }, // mov.
982  { TTI::SK_Select, MVT::v2f32, 1 }, // mov.
983  { TTI::SK_Select, MVT::v4f32, 2 }, // rev+trn (or similar).
984  { TTI::SK_Select, MVT::v2f64, 1 }, // mov.
985  // PermuteSingleSrc shuffle kinds.
986  // TODO: handle vXi8/vXi16.
987  { TTI::SK_PermuteSingleSrc, MVT::v2i32, 1 }, // mov.
988  { TTI::SK_PermuteSingleSrc, MVT::v4i32, 3 }, // perfectshuffle worst case.
989  { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // mov.
990  { TTI::SK_PermuteSingleSrc, MVT::v2f32, 1 }, // mov.
991  { TTI::SK_PermuteSingleSrc, MVT::v4f32, 3 }, // perfectshuffle worst case.
992  { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // mov.
993  };
994  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
995  if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))
996  return LT.first * Entry->Cost;
997  }
998 
999  return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
1000 }
Type * getVectorElementType() const
Definition: Type.h:370
static cl::opt< bool > EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix", cl::init(true), cl::Hidden)
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >())
Definition: BasicTTIImpl.h:567
bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, int64_t MergeDistance)
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:833
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
bool isMisaligned128StoreSlow() const
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Type * getElementType(unsigned N) const
Definition: DerivedTypes.h:345
Cost tables and simple lookup functions.
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition: LoopInfo.h:91
unsigned getNumElements() const
Random access to the elements.
Definition: DerivedTypes.h:344
The main scalar evolution driver.
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
Type Conversion Cost Table.
Definition: CostTable.h:44
An instruction for reading from memory.
Definition: Instructions.h:167
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:176
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
unsigned getMaxInterleaveFactor(unsigned VF)
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
static void getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE, TargetTransformInfo::UnrollingPreferences &UP)
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:188
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>())
Cost Table Entry.
Definition: CostTable.h:24
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1508
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1155
unsigned getCacheLineSize() const
const FeatureBitset & getFeatureBits() const
Class to represent struct types.
Definition: DerivedTypes.h:232
int getIntImmCost(const APInt &Imm, Type *Ty)
Calculate the cost of materializing the given constant.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:742
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise)
Try to calculate arithmetic and shuffle op costs for reduction operations.
const TypeConversionCostTblEntry * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntry > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table, TypeTy must be comparable to CompareTy by ==. ...
Definition: CostTable.h:54
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I)
Definition: BasicTTIImpl.h:771
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1574
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:633
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:477
This node represents a polynomial recurrence on the trip count of the specified loop.
PopcntSupportKind
Flags indicating the kind of support for population count.
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:883
unsigned getPrefetchDistance() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
Selects elements from the corresponding lane of either source operand.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
unsigned getWideningBaseCost() const
Value * getOperand(unsigned i) const
Definition: User.h:169
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:523
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:303
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:873
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
Definition: BasicTTIImpl.h:423
bool useReductionIntrinsic(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:422
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
Container class for subtarget features.
int getIntImmCost(int64_t Val)
Calculate the cost of materializing a 64-bit value.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
Machine Value Type.
Flags describing the kind of vector reduction.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: BasicTTIImpl.h:849
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)
See if I should be considered for address type promotion.
Expected to fold away in lowering.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
Value * getPointerOperand()
Definition: Instructions.h:284
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I=nullptr)
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
This file a TargetTransformInfo::Concept conforming object specific to the AArch64 target machine...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:192
Extended Value Type.
Definition: ValueTypes.h:33
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1414
const TargetMachine & getTargetMachine() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:57
OperandValueProperties
Additional properties of an operand&#39;s values.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:50
unsigned getMinPrefetchStride() const
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
Definition: BasicTTIImpl.h:614
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition: APInt.h:946
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:129
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:839
int getCostOfKeepingLiveOverCall(ArrayRef< Type *> Tys)
AddressSpace
Definition: NVPTXBaseInfo.h:21
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target&#39;s TargetSubtargetInf...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:493
Class to represent vector types.
Definition: DerivedTypes.h:424
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Class for arbitrary precision integers.
Definition: APInt.h:69
unsigned getMaxInterleaveFactor() const
int getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm)
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:419
iterator_range< user_iterator > users()
Definition: Value.h:399
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:467
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
const CostTblEntry * CostTableLookup(ArrayRef< CostTblEntry > Tbl, int ISD, MVT Ty)
Find in cost table, TypeTy must be comparable to CompareTy by ==.
Definition: CostTable.h:31
unsigned getNumArgOperands() const
Definition: InstrTypes.h:1153
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:386
This class represents an analyzed expression in the program.
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:464
static bool hasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI, MachineDominatorTree &MDT, LiveIntervals &LIS)
Parameters that control the generic loop unrolling transformation.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
ARMProcFamilyEnum getProcFamily() const
Returns ARM processor family.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
const unsigned Kind
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:375
The cost of a typical &#39;add&#39; instruction.
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:114
unsigned getMaxPrefetchIterationsAhead() const
LLVM Value Representation.
Definition: Value.h:72
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:605
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:250
Broadcast element 0 to all other elements.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:58
Type * getElementType() const
Definition: DerivedTypes.h:391
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:412
OperandValueKind
Additional information about an operand&#39;s possible values.
This pass exposes codegen information to IR-level passes.
Conversion operators.
Definition: ISDOpcodes.h:464
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2127
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:473
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:125
#define LLVM_DEBUG(X)
Definition: Debug.h:122
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:155
Information about a load/store intrinsic defined by the target.
bool NoNaN
If op is an fp min/max, whether NaNs may be present.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
Definition: Instruction.h:66
unsigned getVectorInsertExtractBaseCost() const
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Shuffle elements of single source vector with any shuffle mask.
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:379