LLVM  10.0.0svn
AArch64TargetTransformInfo.cpp
Go to the documentation of this file.
1 //===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AArch64ExpandImm.h"
12 #include "llvm/Analysis/LoopInfo.h"
15 #include "llvm/CodeGen/CostTable.h"
17 #include "llvm/IR/IntrinsicInst.h"
18 #include "llvm/Support/Debug.h"
19 #include <algorithm>
20 using namespace llvm;
21 
22 #define DEBUG_TYPE "aarch64tti"
23 
24 static cl::opt<bool> EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix",
25  cl::init(true), cl::Hidden);
26 
28  const Function *Callee) const {
29  const TargetMachine &TM = getTLI()->getTargetMachine();
30 
31  const FeatureBitset &CallerBits =
32  TM.getSubtargetImpl(*Caller)->getFeatureBits();
33  const FeatureBitset &CalleeBits =
34  TM.getSubtargetImpl(*Callee)->getFeatureBits();
35 
36  // Inline a callee if its target-features are a subset of the callers
37  // target-features.
38  return (CallerBits & CalleeBits) == CalleeBits;
39 }
40 
41 /// Calculate the cost of materializing a 64-bit value. This helper
42 /// method might only calculate a fraction of a larger immediate. Therefore it
43 /// is valid to return a cost of ZERO.
45  // Check if the immediate can be encoded within an instruction.
46  if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
47  return 0;
48 
49  if (Val < 0)
50  Val = ~Val;
51 
52  // Calculate how many moves we will need to materialize this constant.
54  AArch64_IMM::expandMOVImm(Val, 64, Insn);
55  return Insn.size();
56 }
57 
58 /// Calculate the cost of materializing the given constant.
60  assert(Ty->isIntegerTy());
61 
62  unsigned BitSize = Ty->getPrimitiveSizeInBits();
63  if (BitSize == 0)
64  return ~0U;
65 
66  // Sign-extend all constants to a multiple of 64-bit.
67  APInt ImmVal = Imm;
68  if (BitSize & 0x3f)
69  ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
70 
71  // Split the constant into 64-bit chunks and calculate the cost for each
72  // chunk.
73  int Cost = 0;
74  for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
75  APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
76  int64_t Val = Tmp.getSExtValue();
77  Cost += getIntImmCost(Val);
78  }
79  // We need at least one instruction to materialze the constant.
80  return std::max(1, Cost);
81 }
82 
83 int AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
84  const APInt &Imm, Type *Ty) {
85  assert(Ty->isIntegerTy());
86 
87  unsigned BitSize = Ty->getPrimitiveSizeInBits();
88  // There is no cost model for constants with a bit size of 0. Return TCC_Free
89  // here, so that constant hoisting will ignore this constant.
90  if (BitSize == 0)
91  return TTI::TCC_Free;
92 
93  unsigned ImmIdx = ~0U;
94  switch (Opcode) {
95  default:
96  return TTI::TCC_Free;
97  case Instruction::GetElementPtr:
98  // Always hoist the base address of a GetElementPtr.
99  if (Idx == 0)
100  return 2 * TTI::TCC_Basic;
101  return TTI::TCC_Free;
102  case Instruction::Store:
103  ImmIdx = 0;
104  break;
105  case Instruction::Add:
106  case Instruction::Sub:
107  case Instruction::Mul:
108  case Instruction::UDiv:
109  case Instruction::SDiv:
110  case Instruction::URem:
111  case Instruction::SRem:
112  case Instruction::And:
113  case Instruction::Or:
114  case Instruction::Xor:
115  case Instruction::ICmp:
116  ImmIdx = 1;
117  break;
118  // Always return TCC_Free for the shift value of a shift instruction.
119  case Instruction::Shl:
120  case Instruction::LShr:
121  case Instruction::AShr:
122  if (Idx == 1)
123  return TTI::TCC_Free;
124  break;
125  case Instruction::Trunc:
126  case Instruction::ZExt:
127  case Instruction::SExt:
128  case Instruction::IntToPtr:
129  case Instruction::PtrToInt:
130  case Instruction::BitCast:
131  case Instruction::PHI:
132  case Instruction::Call:
133  case Instruction::Select:
134  case Instruction::Ret:
135  case Instruction::Load:
136  break;
137  }
138 
139  if (Idx == ImmIdx) {
140  int NumConstants = (BitSize + 63) / 64;
141  int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
142  return (Cost <= NumConstants * TTI::TCC_Basic)
143  ? static_cast<int>(TTI::TCC_Free)
144  : Cost;
145  }
146  return AArch64TTIImpl::getIntImmCost(Imm, Ty);
147 }
148 
150  const APInt &Imm, Type *Ty) {
151  assert(Ty->isIntegerTy());
152 
153  unsigned BitSize = Ty->getPrimitiveSizeInBits();
154  // There is no cost model for constants with a bit size of 0. Return TCC_Free
155  // here, so that constant hoisting will ignore this constant.
156  if (BitSize == 0)
157  return TTI::TCC_Free;
158 
159  switch (IID) {
160  default:
161  return TTI::TCC_Free;
162  case Intrinsic::sadd_with_overflow:
163  case Intrinsic::uadd_with_overflow:
164  case Intrinsic::ssub_with_overflow:
165  case Intrinsic::usub_with_overflow:
166  case Intrinsic::smul_with_overflow:
167  case Intrinsic::umul_with_overflow:
168  if (Idx == 1) {
169  int NumConstants = (BitSize + 63) / 64;
170  int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
171  return (Cost <= NumConstants * TTI::TCC_Basic)
172  ? static_cast<int>(TTI::TCC_Free)
173  : Cost;
174  }
175  break;
176  case Intrinsic::experimental_stackmap:
177  if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
178  return TTI::TCC_Free;
179  break;
180  case Intrinsic::experimental_patchpoint_void:
181  case Intrinsic::experimental_patchpoint_i64:
182  if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
183  return TTI::TCC_Free;
184  break;
185  }
186  return AArch64TTIImpl::getIntImmCost(Imm, Ty);
187 }
188 
191  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
192  if (TyWidth == 32 || TyWidth == 64)
193  return TTI::PSK_FastHardware;
194  // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
195  return TTI::PSK_Software;
196 }
197 
198 bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
200 
201  // A helper that returns a vector type from the given type. The number of
202  // elements in type Ty determine the vector width.
203  auto toVectorTy = [&](Type *ArgTy) {
204  return VectorType::get(ArgTy->getScalarType(),
205  DstTy->getVectorNumElements());
206  };
207 
208  // Exit early if DstTy is not a vector type whose elements are at least
209  // 16-bits wide.
210  if (!DstTy->isVectorTy() || DstTy->getScalarSizeInBits() < 16)
211  return false;
212 
213  // Determine if the operation has a widening variant. We consider both the
214  // "long" (e.g., usubl) and "wide" (e.g., usubw) versions of the
215  // instructions.
216  //
217  // TODO: Add additional widening operations (e.g., mul, shl, etc.) once we
218  // verify that their extending operands are eliminated during code
219  // generation.
220  switch (Opcode) {
221  case Instruction::Add: // UADDL(2), SADDL(2), UADDW(2), SADDW(2).
222  case Instruction::Sub: // USUBL(2), SSUBL(2), USUBW(2), SSUBW(2).
223  break;
224  default:
225  return false;
226  }
227 
228  // To be a widening instruction (either the "wide" or "long" versions), the
229  // second operand must be a sign- or zero extend having a single user. We
230  // only consider extends having a single user because they may otherwise not
231  // be eliminated.
232  if (Args.size() != 2 ||
233  (!isa<SExtInst>(Args[1]) && !isa<ZExtInst>(Args[1])) ||
234  !Args[1]->hasOneUse())
235  return false;
236  auto *Extend = cast<CastInst>(Args[1]);
237 
238  // Legalize the destination type and ensure it can be used in a widening
239  // operation.
240  auto DstTyL = TLI->getTypeLegalizationCost(DL, DstTy);
241  unsigned DstElTySize = DstTyL.second.getScalarSizeInBits();
242  if (!DstTyL.second.isVector() || DstElTySize != DstTy->getScalarSizeInBits())
243  return false;
244 
245  // Legalize the source type and ensure it can be used in a widening
246  // operation.
247  Type *SrcTy = toVectorTy(Extend->getSrcTy());
248  auto SrcTyL = TLI->getTypeLegalizationCost(DL, SrcTy);
249  unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
250  if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits())
251  return false;
252 
253  // Get the total number of vector elements in the legalized types.
254  unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorNumElements();
255  unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorNumElements();
256 
257  // Return true if the legalized types have the same number of vector elements
258  // and the destination element type size is twice that of the source type.
259  return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstElTySize;
260 }
261 
262 int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
263  const Instruction *I) {
264  int ISD = TLI->InstructionOpcodeToISD(Opcode);
265  assert(ISD && "Invalid opcode");
266 
267  // If the cast is observable, and it is used by a widening instruction (e.g.,
268  // uaddl, saddw, etc.), it may be free.
269  if (I && I->hasOneUse()) {
270  auto *SingleUser = cast<Instruction>(*I->user_begin());
271  SmallVector<const Value *, 4> Operands(SingleUser->operand_values());
272  if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands)) {
273  // If the cast is the second operand, it is free. We will generate either
274  // a "wide" or "long" version of the widening instruction.
275  if (I == SingleUser->getOperand(1))
276  return 0;
277  // If the cast is not the second operand, it will be free if it looks the
278  // same as the second operand. In this case, we will generate a "long"
279  // version of the widening instruction.
280  if (auto *Cast = dyn_cast<CastInst>(SingleUser->getOperand(1)))
281  if (I->getOpcode() == unsigned(Cast->getOpcode()) &&
282  cast<CastInst>(I)->getSrcTy() == Cast->getSrcTy())
283  return 0;
284  }
285  }
286 
287  EVT SrcTy = TLI->getValueType(DL, Src);
288  EVT DstTy = TLI->getValueType(DL, Dst);
289 
290  if (!SrcTy.isSimple() || !DstTy.isSimple())
291  return BaseT::getCastInstrCost(Opcode, Dst, Src);
292 
293  static const TypeConversionCostTblEntry
294  ConversionTbl[] = {
299 
300  // The number of shll instructions for the extension.
317 
318  // LowerVectorINT_TO_FP:
325 
326  // Complex: to v2f32
333 
334  // Complex: to v4f32
339 
340  // Complex: to v8f32
345 
346  // Complex: to v16f32
349 
350  // Complex: to v2f64
357 
358 
359  // LowerVectorFP_TO_INT
366 
367  // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext).
374 
375  // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2
380 
381  // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
388  };
389 
390  if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
391  DstTy.getSimpleVT(),
392  SrcTy.getSimpleVT()))
393  return Entry->Cost;
394 
395  return BaseT::getCastInstrCost(Opcode, Dst, Src);
396 }
397 
399  VectorType *VecTy,
400  unsigned Index) {
401 
402  // Make sure we were given a valid extend opcode.
403  assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
404  "Invalid opcode");
405 
406  // We are extending an element we extract from a vector, so the source type
407  // of the extend is the element type of the vector.
408  auto *Src = VecTy->getElementType();
409 
410  // Sign- and zero-extends are for integer types only.
411  assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) && "Invalid type");
412 
413  // Get the cost for the extract. We compute the cost (if any) for the extend
414  // below.
415  auto Cost = getVectorInstrCost(Instruction::ExtractElement, VecTy, Index);
416 
417  // Legalize the types.
418  auto VecLT = TLI->getTypeLegalizationCost(DL, VecTy);
419  auto DstVT = TLI->getValueType(DL, Dst);
420  auto SrcVT = TLI->getValueType(DL, Src);
421 
422  // If the resulting type is still a vector and the destination type is legal,
423  // we may get the extension for free. If not, get the default cost for the
424  // extend.
425  if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
426  return Cost + getCastInstrCost(Opcode, Dst, Src);
427 
428  // The destination type should be larger than the element type. If not, get
429  // the default cost for the extend.
430  if (DstVT.getSizeInBits() < SrcVT.getSizeInBits())
431  return Cost + getCastInstrCost(Opcode, Dst, Src);
432 
433  switch (Opcode) {
434  default:
435  llvm_unreachable("Opcode should be either SExt or ZExt");
436 
437  // For sign-extends, we only need a smov, which performs the extension
438  // automatically.
439  case Instruction::SExt:
440  return Cost;
441 
442  // For zero-extends, the extend is performed automatically by a umov unless
443  // the destination type is i64 and the element type is i8 or i16.
444  case Instruction::ZExt:
445  if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
446  return Cost;
447  }
448 
449  // If we are unable to perform the extend for free, get the default cost.
450  return Cost + getCastInstrCost(Opcode, Dst, Src);
451 }
452 
453 int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
454  unsigned Index) {
455  assert(Val->isVectorTy() && "This must be a vector type");
456 
457  if (Index != -1U) {
458  // Legalize the type.
459  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
460 
461  // This type is legalized to a scalar type.
462  if (!LT.second.isVector())
463  return 0;
464 
465  // The type may be split. Normalize the index to the new type.
466  unsigned Width = LT.second.getVectorNumElements();
467  Index = Index % Width;
468 
469  // The element at index zero is already inside the vector.
470  if (Index == 0)
471  return 0;
472  }
473 
474  // All other insert/extracts cost this much.
475  return ST->getVectorInsertExtractBaseCost();
476 }
477 
479  unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
480  TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
482  // Legalize the type.
483  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
484 
485  // If the instruction is a widening instruction (e.g., uaddl, saddw, etc.),
486  // add in the widening overhead specified by the sub-target. Since the
487  // extends feeding widening instructions are performed automatically, they
488  // aren't present in the generated code and have a zero cost. By adding a
489  // widening overhead here, we attach the total cost of the combined operation
490  // to the widening instruction.
491  int Cost = 0;
492  if (isWideningInstruction(Ty, Opcode, Args))
493  Cost += ST->getWideningBaseCost();
494 
495  int ISD = TLI->InstructionOpcodeToISD(Opcode);
496 
497  switch (ISD) {
498  default:
499  return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
500  Opd1PropInfo, Opd2PropInfo);
501  case ISD::SDIV:
503  Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
504  // On AArch64, scalar signed division by constants power-of-two are
505  // normally expanded to the sequence ADD + CMP + SELECT + SRA.
506  // The OperandValue properties many not be same as that of previous
507  // operation; conservatively assume OP_None.
508  Cost += getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
511  Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
514  Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info,
517  Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info,
520  return Cost;
521  }
523  case ISD::UDIV:
525  auto VT = TLI->getValueType(DL, Ty);
526  if (TLI->isOperationLegalOrCustom(ISD::MULHU, VT)) {
527  // Vector signed division by constant are expanded to the
528  // sequence MULHS + ADD/SUB + SRA + SRL + ADD, and unsigned division
529  // to MULHS + SUB + SRL + ADD + SRL.
530  int MulCost = getArithmeticInstrCost(Instruction::Mul, Ty, Opd1Info,
531  Opd2Info,
534  int AddCost = getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info,
535  Opd2Info,
538  int ShrCost = getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info,
539  Opd2Info,
542  return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
543  }
544  }
545 
546  Cost += BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
547  Opd1PropInfo, Opd2PropInfo);
548  if (Ty->isVectorTy()) {
549  // On AArch64, vector divisions are not supported natively and are
550  // expanded into scalar divisions of each pair of elements.
551  Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, Opd1Info,
552  Opd2Info, Opd1PropInfo, Opd2PropInfo);
553  Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, Opd1Info,
554  Opd2Info, Opd1PropInfo, Opd2PropInfo);
555  // TODO: if one of the arguments is scalar, then it's not necessary to
556  // double the cost of handling the vector elements.
557  Cost += Cost;
558  }
559  return Cost;
560 
561  case ISD::ADD:
562  case ISD::MUL:
563  case ISD::XOR:
564  case ISD::OR:
565  case ISD::AND:
566  // These nodes are marked as 'custom' for combining purposes only.
567  // We know that they are legal. See LowerAdd in ISelLowering.
568  return (Cost + 1) * LT.first;
569  }
570 }
571 
573  const SCEV *Ptr) {
574  // Address computations in vectorized code with non-consecutive addresses will
575  // likely result in more instructions compared to scalar code where the
576  // computation can more often be merged into the index mode. The resulting
577  // extra micro-ops can significantly decrease throughput.
578  unsigned NumVectorInstToHideOverhead = 10;
579  int MaxMergeDistance = 64;
580 
581  if (Ty->isVectorTy() && SE &&
582  !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
583  return NumVectorInstToHideOverhead;
584 
585  // In many cases the address computation is not merged into the instruction
586  // addressing mode.
587  return 1;
588 }
589 
590 int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
591  Type *CondTy, const Instruction *I) {
592 
593  int ISD = TLI->InstructionOpcodeToISD(Opcode);
594  // We don't lower some vector selects well that are wider than the register
595  // width.
596  if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
597  // We would need this many instructions to hide the scalarization happening.
598  const int AmortizationCost = 20;
599  static const TypeConversionCostTblEntry
600  VectorSelectTbl[] = {
604  { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
605  { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
606  { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
607  };
608 
609  EVT SelCondTy = TLI->getValueType(DL, CondTy);
610  EVT SelValTy = TLI->getValueType(DL, ValTy);
611  if (SelCondTy.isSimple() && SelValTy.isSimple()) {
612  if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD,
613  SelCondTy.getSimpleVT(),
614  SelValTy.getSimpleVT()))
615  return Entry->Cost;
616  }
617  }
618  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
619 }
620 
622 AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
625  Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
626  Options.NumLoadsPerBlock = Options.MaxNumLoads;
627  // TODO: Though vector loads usually perform well on AArch64, in some targets
628  // they may wake up the FP unit, which raises the power consumption. Perhaps
629  // they could be used with no holds barred (-O3).
630  Options.LoadSizes = {8, 4, 2, 1};
631  return Options;
632 }
633 
634 int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
635  unsigned Alignment, unsigned AddressSpace,
636  const Instruction *I) {
637  auto LT = TLI->getTypeLegalizationCost(DL, Ty);
638 
639  if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
640  LT.second.is128BitVector() && Alignment < 16) {
641  // Unaligned stores are extremely inefficient. We don't split all
642  // unaligned 128-bit stores because the negative impact that has shown in
643  // practice on inlined block copy code.
644  // We make such stores expensive so that we will only vectorize if there
645  // are 6 other instructions getting vectorized.
646  const int AmortizationCost = 6;
647 
648  return LT.first * 2 * AmortizationCost;
649  }
650 
651  if (Ty->isVectorTy() && Ty->getVectorElementType()->isIntegerTy(8)) {
652  unsigned ProfitableNumElements;
653  if (Opcode == Instruction::Store)
654  // We use a custom trunc store lowering so v.4b should be profitable.
655  ProfitableNumElements = 4;
656  else
657  // We scalarize the loads because there is not v.4b register and we
658  // have to promote the elements to v.2.
659  ProfitableNumElements = 8;
660 
661  if (Ty->getVectorNumElements() < ProfitableNumElements) {
662  unsigned NumVecElts = Ty->getVectorNumElements();
663  unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
664  // We generate 2 instructions per vector element.
665  return NumVectorizableInstsToAmortize * NumVecElts * 2;
666  }
667  }
668 
669  return LT.first;
670 }
671 
673  unsigned Factor,
674  ArrayRef<unsigned> Indices,
675  unsigned Alignment,
676  unsigned AddressSpace,
677  bool UseMaskForCond,
678  bool UseMaskForGaps) {
679  assert(Factor >= 2 && "Invalid interleave factor");
680  assert(isa<VectorType>(VecTy) && "Expect a vector type");
681 
682  if (!UseMaskForCond && !UseMaskForGaps &&
683  Factor <= TLI->getMaxSupportedInterleaveFactor()) {
684  unsigned NumElts = VecTy->getVectorNumElements();
685  auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
686 
687  // ldN/stN only support legal vector types of size 64 or 128 in bits.
688  // Accesses having vector types that are a multiple of 128 bits can be
689  // matched to more than one ldN/stN instruction.
690  if (NumElts % Factor == 0 &&
691  TLI->isLegalInterleavedAccessType(SubVecTy, DL))
692  return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
693  }
694 
695  return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
696  Alignment, AddressSpace,
697  UseMaskForCond, UseMaskForGaps);
698 }
699 
701  int Cost = 0;
702  for (auto *I : Tys) {
703  if (!I->isVectorTy())
704  continue;
705  if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
706  Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) +
708  }
709  return Cost;
710 }
711 
713  return ST->getMaxInterleaveFactor();
714 }
715 
716 // For Falkor, we want to avoid having too many strided loads in a loop since
717 // that can exhaust the HW prefetcher resources. We adjust the unroller
718 // MaxCount preference below to attempt to ensure unrolling doesn't create too
719 // many strided loads.
720 static void
723  enum { MaxStridedLoads = 7 };
724  auto countStridedLoads = [](Loop *L, ScalarEvolution &SE) {
725  int StridedLoads = 0;
726  // FIXME? We could make this more precise by looking at the CFG and
727  // e.g. not counting loads in each side of an if-then-else diamond.
728  for (const auto BB : L->blocks()) {
729  for (auto &I : *BB) {
730  LoadInst *LMemI = dyn_cast<LoadInst>(&I);
731  if (!LMemI)
732  continue;
733 
734  Value *PtrValue = LMemI->getPointerOperand();
735  if (L->isLoopInvariant(PtrValue))
736  continue;
737 
738  const SCEV *LSCEV = SE.getSCEV(PtrValue);
739  const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
740  if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
741  continue;
742 
743  // FIXME? We could take pairing of unrolled load copies into account
744  // by looking at the AddRec, but we would probably have to limit this
745  // to loops with no stores or other memory optimization barriers.
746  ++StridedLoads;
747  // We've seen enough strided loads that seeing more won't make a
748  // difference.
749  if (StridedLoads > MaxStridedLoads / 2)
750  return StridedLoads;
751  }
752  }
753  return StridedLoads;
754  };
755 
756  int StridedLoads = countStridedLoads(L, SE);
757  LLVM_DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoads
758  << " strided loads\n");
759  // Pick the largest power of 2 unroll count that won't result in too many
760  // strided loads.
761  if (StridedLoads) {
762  UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads);
763  LLVM_DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to "
764  << UP.MaxCount << '\n');
765  }
766 }
767 
770  // Enable partial unrolling and runtime unrolling.
772 
773  // For inner loop, it is more likely to be a hot one, and the runtime check
774  // can be promoted out from LICM pass, so the overhead is less, let's try
775  // a larger threshold to unroll more loops.
776  if (L->getLoopDepth() > 1)
777  UP.PartialThreshold *= 2;
778 
779  // Disable partial & runtime unrolling on -Os.
781 
785 }
786 
788  Type *ExpectedType) {
789  switch (Inst->getIntrinsicID()) {
790  default:
791  return nullptr;
792  case Intrinsic::aarch64_neon_st2:
793  case Intrinsic::aarch64_neon_st3:
794  case Intrinsic::aarch64_neon_st4: {
795  // Create a struct type
796  StructType *ST = dyn_cast<StructType>(ExpectedType);
797  if (!ST)
798  return nullptr;
799  unsigned NumElts = Inst->getNumArgOperands() - 1;
800  if (ST->getNumElements() != NumElts)
801  return nullptr;
802  for (unsigned i = 0, e = NumElts; i != e; ++i) {
803  if (Inst->getArgOperand(i)->getType() != ST->getElementType(i))
804  return nullptr;
805  }
806  Value *Res = UndefValue::get(ExpectedType);
807  IRBuilder<> Builder(Inst);
808  for (unsigned i = 0, e = NumElts; i != e; ++i) {
809  Value *L = Inst->getArgOperand(i);
810  Res = Builder.CreateInsertValue(Res, L, i);
811  }
812  return Res;
813  }
814  case Intrinsic::aarch64_neon_ld2:
815  case Intrinsic::aarch64_neon_ld3:
816  case Intrinsic::aarch64_neon_ld4:
817  if (Inst->getType() == ExpectedType)
818  return Inst;
819  return nullptr;
820  }
821 }
822 
825  switch (Inst->getIntrinsicID()) {
826  default:
827  break;
828  case Intrinsic::aarch64_neon_ld2:
829  case Intrinsic::aarch64_neon_ld3:
830  case Intrinsic::aarch64_neon_ld4:
831  Info.ReadMem = true;
832  Info.WriteMem = false;
833  Info.PtrVal = Inst->getArgOperand(0);
834  break;
835  case Intrinsic::aarch64_neon_st2:
836  case Intrinsic::aarch64_neon_st3:
837  case Intrinsic::aarch64_neon_st4:
838  Info.ReadMem = false;
839  Info.WriteMem = true;
840  Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
841  break;
842  }
843 
844  switch (Inst->getIntrinsicID()) {
845  default:
846  return false;
847  case Intrinsic::aarch64_neon_ld2:
848  case Intrinsic::aarch64_neon_st2:
849  Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
850  break;
851  case Intrinsic::aarch64_neon_ld3:
852  case Intrinsic::aarch64_neon_st3:
853  Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
854  break;
855  case Intrinsic::aarch64_neon_ld4:
856  case Intrinsic::aarch64_neon_st4:
857  Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
858  break;
859  }
860  return true;
861 }
862 
863 /// See if \p I should be considered for address type promotion. We check if \p
864 /// I is a sext with right type and used in memory accesses. If it used in a
865 /// "complex" getelementptr, we allow it to be promoted without finding other
866 /// sext instructions that sign extended the same initial value. A getelementptr
867 /// is considered as "complex" if it has more than 2 operands.
869  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
870  bool Considerable = false;
871  AllowPromotionWithoutCommonHeader = false;
872  if (!isa<SExtInst>(&I))
873  return false;
874  Type *ConsideredSExtType =
876  if (I.getType() != ConsideredSExtType)
877  return false;
878  // See if the sext is the one with the right type and used in at least one
879  // GetElementPtrInst.
880  for (const User *U : I.users()) {
881  if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
882  Considerable = true;
883  // A getelementptr is considered as "complex" if it has more than 2
884  // operands. We will promote a SExt used in such complex GEP as we
885  // expect some computation to be merged if they are done on 64 bits.
886  if (GEPInst->getNumOperands() > 2) {
887  AllowPromotionWithoutCommonHeader = true;
888  break;
889  }
890  }
891  }
892  return Considerable;
893 }
894 
896  return ST->getCacheLineSize();
897 }
898 
900  return ST->getPrefetchDistance();
901 }
902 
904  return ST->getMinPrefetchStride();
905 }
906 
908  return ST->getMaxPrefetchIterationsAhead();
909 }
910 
911 bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
912  TTI::ReductionFlags Flags) const {
913  assert(isa<VectorType>(Ty) && "Expected Ty to be a vector type");
914  unsigned ScalarBits = Ty->getScalarSizeInBits();
915  switch (Opcode) {
916  case Instruction::FAdd:
917  case Instruction::FMul:
918  case Instruction::And:
919  case Instruction::Or:
920  case Instruction::Xor:
921  case Instruction::Mul:
922  return false;
923  case Instruction::Add:
924  return ScalarBits * Ty->getVectorNumElements() >= 128;
925  case Instruction::ICmp:
926  return (ScalarBits < 64) &&
927  (ScalarBits * Ty->getVectorNumElements() >= 128);
928  case Instruction::FCmp:
929  return Flags.NoNaN;
930  default:
931  llvm_unreachable("Unhandled reduction opcode");
932  }
933  return false;
934 }
935 
937  bool IsPairwiseForm) {
938 
939  if (IsPairwiseForm)
940  return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
941 
942  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
943  MVT MTy = LT.second;
944  int ISD = TLI->InstructionOpcodeToISD(Opcode);
945  assert(ISD && "Invalid opcode");
946 
947  // Horizontal adds can use the 'addv' instruction. We model the cost of these
948  // instructions as normal vector adds. This is the only arithmetic vector
949  // reduction operation for which we have an instruction.
950  static const CostTblEntry CostTblNoPairwise[]{
951  {ISD::ADD, MVT::v8i8, 1},
952  {ISD::ADD, MVT::v16i8, 1},
953  {ISD::ADD, MVT::v4i16, 1},
954  {ISD::ADD, MVT::v8i16, 1},
955  {ISD::ADD, MVT::v4i32, 1},
956  };
957 
958  if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy))
959  return LT.first * Entry->Cost;
960 
961  return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
962 }
963 
965  Type *SubTp) {
966  if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose ||
967  Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc) {
968  static const CostTblEntry ShuffleTbl[] = {
969  // Broadcast shuffle kinds can be performed with 'dup'.
980  // Transpose shuffle kinds can be performed with 'trn1/trn2' and
981  // 'zip1/zip2' instructions.
992  // Select shuffle kinds.
993  // TODO: handle vXi8/vXi16.
994  { TTI::SK_Select, MVT::v2i32, 1 }, // mov.
995  { TTI::SK_Select, MVT::v4i32, 2 }, // rev+trn (or similar).
996  { TTI::SK_Select, MVT::v2i64, 1 }, // mov.
997  { TTI::SK_Select, MVT::v2f32, 1 }, // mov.
998  { TTI::SK_Select, MVT::v4f32, 2 }, // rev+trn (or similar).
999  { TTI::SK_Select, MVT::v2f64, 1 }, // mov.
1000  // PermuteSingleSrc shuffle kinds.
1001  // TODO: handle vXi8/vXi16.
1002  { TTI::SK_PermuteSingleSrc, MVT::v2i32, 1 }, // mov.
1003  { TTI::SK_PermuteSingleSrc, MVT::v4i32, 3 }, // perfectshuffle worst case.
1004  { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // mov.
1005  { TTI::SK_PermuteSingleSrc, MVT::v2f32, 1 }, // mov.
1006  { TTI::SK_PermuteSingleSrc, MVT::v4f32, 3 }, // perfectshuffle worst case.
1007  { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // mov.
1008  };
1009  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
1010  if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))
1011  return LT.first * Entry->Cost;
1012  }
1013 
1014  return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
1015 }
Type * getVectorElementType() const
Definition: Type.h:371
static cl::opt< bool > EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix", cl::init(true), cl::Hidden)
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >())
Definition: BasicTTIImpl.h:593
bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, int64_t MergeDistance)
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:836
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
bool isMisaligned128StoreSlow() const
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Type * getElementType(unsigned N) const
Definition: DerivedTypes.h:346
Cost tables and simple lookup functions.
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition: LoopInfo.h:97
unsigned getNumElements() const
Random access to the elements.
Definition: DerivedTypes.h:345
The main scalar evolution driver.
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
Type Conversion Cost Table.
Definition: CostTable.h:44
An instruction for reading from memory.
Definition: Instructions.h:167
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:176
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
unsigned getMaxInterleaveFactor(unsigned VF)
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
static void getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE, TargetTransformInfo::UnrollingPreferences &UP)
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>())
Cost Table Entry.
Definition: CostTable.h:24
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1515
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1241
unsigned getCacheLineSize() const
const FeatureBitset & getFeatureBits() const
Class to represent struct types.
Definition: DerivedTypes.h:233
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:779
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise)
Try to calculate arithmetic and shuffle op costs for reduction operations.
const TypeConversionCostTblEntry * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntry > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table, TypeTy must be comparable to CompareTy by ==. ...
Definition: CostTable.h:54
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I)
Definition: BasicTTIImpl.h:797
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1581
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:659
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:502
This node represents a polynomial recurrence on the trip count of the specified loop.
PopcntSupportKind
Flags indicating the kind of support for population count.
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:886
unsigned getPrefetchDistance() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
Selects elements from the corresponding lane of either source operand.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
unsigned getWideningBaseCost() const
Value * getOperand(unsigned i) const
Definition: User.h:169
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:548
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:303
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:875
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
Definition: BasicTTIImpl.h:442
bool useReductionIntrinsic(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
Returns options for expansion of memcmp. IsZeroCmp is.
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
bool requiresStrictAlign() const
Container class for subtarget features.
int getIntImmCost(int64_t Val)
Calculate the cost of materializing a 64-bit value.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
Machine Value Type.
Flags describing the kind of vector reduction.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: BasicTTIImpl.h:875
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)
See if I should be considered for address type promotion.
Expected to fold away in lowering.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
Value * getPointerOperand()
Definition: Instructions.h:284
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I=nullptr)
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
This file a TargetTransformInfo::Concept conforming object specific to the AArch64 target machine...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
Extended Value Type.
Definition: ValueTypes.h:33
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1433
size_t size() const
Definition: SmallVector.h:52
const TargetMachine & getTargetMachine() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:61
OperandValueProperties
Additional properties of an operand&#39;s values.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:50
unsigned getMinPrefetchStride() const
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
Definition: BasicTTIImpl.h:640
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition: APInt.h:946
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:129
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
int getCostOfKeepingLiveOverCall(ArrayRef< Type *> Tys)
AddressSpace
Definition: NVPTXBaseInfo.h:21
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target&#39;s TargetSubtargetInf...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:535
Class to represent vector types.
Definition: DerivedTypes.h:427
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Class for arbitrary precision integers.
Definition: APInt.h:69
unsigned getMaxInterleaveFactor() const
int getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm)
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:444
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
iterator_range< user_iterator > users()
Definition: Value.h:419
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:492
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
const CostTblEntry * CostTableLookup(ArrayRef< CostTblEntry > Tbl, int ISD, MVT Ty)
Find in cost table, TypeTy must be comparable to CompareTy by ==.
Definition: CostTable.h:31
unsigned getNumArgOperands() const
Definition: InstrTypes.h:1239
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:411
This class represents an analyzed expression in the program.
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
static bool hasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI, MachineDominatorTree &MDT, LiveIntervals &LIS)
Parameters that control the generic loop unrolling transformation.
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:609
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
ARMProcFamilyEnum getProcFamily() const
Returns ARM processor family.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:395
The cost of a typical &#39;add&#39; instruction.
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:114
unsigned getMaxPrefetchIterationsAhead() const
LLVM Value Representation.
Definition: Value.h:73
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:258
Broadcast element 0 to all other elements.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
Type * getElementType() const
Definition: DerivedTypes.h:394
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:432
OperandValueKind
Additional information about an operand&#39;s possible values.
This pass exposes codegen information to IR-level passes.
Conversion operators.
Definition: ISDOpcodes.h:489
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2343
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:498
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:125
#define LLVM_DEBUG(X)
Definition: Debug.h:122
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:161
Information about a load/store intrinsic defined by the target.
bool NoNaN
If op is an fp min/max, whether NaNs may be present.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
Definition: Instruction.h:66
unsigned getVectorInsertExtractBaseCost() const
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Shuffle elements of single source vector with any shuffle mask.
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:404