LLVM  7.0.0svn
AArch64TargetTransformInfo.cpp
Go to the documentation of this file.
1 //===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
12 #include "llvm/Analysis/LoopInfo.h"
15 #include "llvm/CodeGen/CostTable.h"
17 #include "llvm/IR/IntrinsicInst.h"
18 #include "llvm/Support/Debug.h"
19 #include <algorithm>
20 using namespace llvm;
21 
22 #define DEBUG_TYPE "aarch64tti"
23 
24 static cl::opt<bool> EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix",
25  cl::init(true), cl::Hidden);
26 
28  const Function *Callee) const {
29  const TargetMachine &TM = getTLI()->getTargetMachine();
30 
31  const FeatureBitset &CallerBits =
32  TM.getSubtargetImpl(*Caller)->getFeatureBits();
33  const FeatureBitset &CalleeBits =
34  TM.getSubtargetImpl(*Callee)->getFeatureBits();
35 
36  // Inline a callee if its target-features are a subset of the callers
37  // target-features.
38  return (CallerBits & CalleeBits) == CalleeBits;
39 }
40 
41 /// Calculate the cost of materializing a 64-bit value. This helper
42 /// method might only calculate a fraction of a larger immediate. Therefore it
43 /// is valid to return a cost of ZERO.
45  // Check if the immediate can be encoded within an instruction.
46  if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
47  return 0;
48 
49  if (Val < 0)
50  Val = ~Val;
51 
52  // Calculate how many moves we will need to materialize this constant.
53  unsigned LZ = countLeadingZeros((uint64_t)Val);
54  return (64 - LZ + 15) / 16;
55 }
56 
57 /// Calculate the cost of materializing the given constant.
59  assert(Ty->isIntegerTy());
60 
61  unsigned BitSize = Ty->getPrimitiveSizeInBits();
62  if (BitSize == 0)
63  return ~0U;
64 
65  // Sign-extend all constants to a multiple of 64-bit.
66  APInt ImmVal = Imm;
67  if (BitSize & 0x3f)
68  ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
69 
70  // Split the constant into 64-bit chunks and calculate the cost for each
71  // chunk.
72  int Cost = 0;
73  for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
74  APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
75  int64_t Val = Tmp.getSExtValue();
76  Cost += getIntImmCost(Val);
77  }
78  // We need at least one instruction to materialze the constant.
79  return std::max(1, Cost);
80 }
81 
82 int AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
83  const APInt &Imm, Type *Ty) {
84  assert(Ty->isIntegerTy());
85 
86  unsigned BitSize = Ty->getPrimitiveSizeInBits();
87  // There is no cost model for constants with a bit size of 0. Return TCC_Free
88  // here, so that constant hoisting will ignore this constant.
89  if (BitSize == 0)
90  return TTI::TCC_Free;
91 
92  unsigned ImmIdx = ~0U;
93  switch (Opcode) {
94  default:
95  return TTI::TCC_Free;
96  case Instruction::GetElementPtr:
97  // Always hoist the base address of a GetElementPtr.
98  if (Idx == 0)
99  return 2 * TTI::TCC_Basic;
100  return TTI::TCC_Free;
101  case Instruction::Store:
102  ImmIdx = 0;
103  break;
104  case Instruction::Add:
105  case Instruction::Sub:
106  case Instruction::Mul:
107  case Instruction::UDiv:
108  case Instruction::SDiv:
109  case Instruction::URem:
110  case Instruction::SRem:
111  case Instruction::And:
112  case Instruction::Or:
113  case Instruction::Xor:
114  case Instruction::ICmp:
115  ImmIdx = 1;
116  break;
117  // Always return TCC_Free for the shift value of a shift instruction.
118  case Instruction::Shl:
119  case Instruction::LShr:
120  case Instruction::AShr:
121  if (Idx == 1)
122  return TTI::TCC_Free;
123  break;
124  case Instruction::Trunc:
125  case Instruction::ZExt:
126  case Instruction::SExt:
127  case Instruction::IntToPtr:
128  case Instruction::PtrToInt:
129  case Instruction::BitCast:
130  case Instruction::PHI:
131  case Instruction::Call:
132  case Instruction::Select:
133  case Instruction::Ret:
134  case Instruction::Load:
135  break;
136  }
137 
138  if (Idx == ImmIdx) {
139  int NumConstants = (BitSize + 63) / 64;
140  int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
141  return (Cost <= NumConstants * TTI::TCC_Basic)
142  ? static_cast<int>(TTI::TCC_Free)
143  : Cost;
144  }
145  return AArch64TTIImpl::getIntImmCost(Imm, Ty);
146 }
147 
149  const APInt &Imm, Type *Ty) {
150  assert(Ty->isIntegerTy());
151 
152  unsigned BitSize = Ty->getPrimitiveSizeInBits();
153  // There is no cost model for constants with a bit size of 0. Return TCC_Free
154  // here, so that constant hoisting will ignore this constant.
155  if (BitSize == 0)
156  return TTI::TCC_Free;
157 
158  switch (IID) {
159  default:
160  return TTI::TCC_Free;
161  case Intrinsic::sadd_with_overflow:
162  case Intrinsic::uadd_with_overflow:
163  case Intrinsic::ssub_with_overflow:
164  case Intrinsic::usub_with_overflow:
165  case Intrinsic::smul_with_overflow:
166  case Intrinsic::umul_with_overflow:
167  if (Idx == 1) {
168  int NumConstants = (BitSize + 63) / 64;
169  int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
170  return (Cost <= NumConstants * TTI::TCC_Basic)
171  ? static_cast<int>(TTI::TCC_Free)
172  : Cost;
173  }
174  break;
175  case Intrinsic::experimental_stackmap:
176  if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
177  return TTI::TCC_Free;
178  break;
179  case Intrinsic::experimental_patchpoint_void:
180  case Intrinsic::experimental_patchpoint_i64:
181  if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
182  return TTI::TCC_Free;
183  break;
184  }
185  return AArch64TTIImpl::getIntImmCost(Imm, Ty);
186 }
187 
190  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
191  if (TyWidth == 32 || TyWidth == 64)
192  return TTI::PSK_FastHardware;
193  // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
194  return TTI::PSK_Software;
195 }
196 
197 bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
199 
200  // A helper that returns a vector type from the given type. The number of
201  // elements in type Ty determine the vector width.
202  auto toVectorTy = [&](Type *ArgTy) {
203  return VectorType::get(ArgTy->getScalarType(),
204  DstTy->getVectorNumElements());
205  };
206 
207  // Exit early if DstTy is not a vector type whose elements are at least
208  // 16-bits wide.
209  if (!DstTy->isVectorTy() || DstTy->getScalarSizeInBits() < 16)
210  return false;
211 
212  // Determine if the operation has a widening variant. We consider both the
213  // "long" (e.g., usubl) and "wide" (e.g., usubw) versions of the
214  // instructions.
215  //
216  // TODO: Add additional widening operations (e.g., mul, shl, etc.) once we
217  // verify that their extending operands are eliminated during code
218  // generation.
219  switch (Opcode) {
220  case Instruction::Add: // UADDL(2), SADDL(2), UADDW(2), SADDW(2).
221  case Instruction::Sub: // USUBL(2), SSUBL(2), USUBW(2), SSUBW(2).
222  break;
223  default:
224  return false;
225  }
226 
227  // To be a widening instruction (either the "wide" or "long" versions), the
228  // second operand must be a sign- or zero extend having a single user. We
229  // only consider extends having a single user because they may otherwise not
230  // be eliminated.
231  if (Args.size() != 2 ||
232  (!isa<SExtInst>(Args[1]) && !isa<ZExtInst>(Args[1])) ||
233  !Args[1]->hasOneUse())
234  return false;
235  auto *Extend = cast<CastInst>(Args[1]);
236 
237  // Legalize the destination type and ensure it can be used in a widening
238  // operation.
239  auto DstTyL = TLI->getTypeLegalizationCost(DL, DstTy);
240  unsigned DstElTySize = DstTyL.second.getScalarSizeInBits();
241  if (!DstTyL.second.isVector() || DstElTySize != DstTy->getScalarSizeInBits())
242  return false;
243 
244  // Legalize the source type and ensure it can be used in a widening
245  // operation.
246  Type *SrcTy = toVectorTy(Extend->getSrcTy());
247  auto SrcTyL = TLI->getTypeLegalizationCost(DL, SrcTy);
248  unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
249  if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits())
250  return false;
251 
252  // Get the total number of vector elements in the legalized types.
253  unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorNumElements();
254  unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorNumElements();
255 
256  // Return true if the legalized types have the same number of vector elements
257  // and the destination element type size is twice that of the source type.
258  return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstElTySize;
259 }
260 
261 int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
262  const Instruction *I) {
263  int ISD = TLI->InstructionOpcodeToISD(Opcode);
264  assert(ISD && "Invalid opcode");
265 
266  // If the cast is observable, and it is used by a widening instruction (e.g.,
267  // uaddl, saddw, etc.), it may be free.
268  if (I && I->hasOneUse()) {
269  auto *SingleUser = cast<Instruction>(*I->user_begin());
270  SmallVector<const Value *, 4> Operands(SingleUser->operand_values());
271  if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands)) {
272  // If the cast is the second operand, it is free. We will generate either
273  // a "wide" or "long" version of the widening instruction.
274  if (I == SingleUser->getOperand(1))
275  return 0;
276  // If the cast is not the second operand, it will be free if it looks the
277  // same as the second operand. In this case, we will generate a "long"
278  // version of the widening instruction.
279  if (auto *Cast = dyn_cast<CastInst>(SingleUser->getOperand(1)))
280  if (I->getOpcode() == unsigned(Cast->getOpcode()) &&
281  cast<CastInst>(I)->getSrcTy() == Cast->getSrcTy())
282  return 0;
283  }
284  }
285 
286  EVT SrcTy = TLI->getValueType(DL, Src);
287  EVT DstTy = TLI->getValueType(DL, Dst);
288 
289  if (!SrcTy.isSimple() || !DstTy.isSimple())
290  return BaseT::getCastInstrCost(Opcode, Dst, Src);
291 
292  static const TypeConversionCostTblEntry
293  ConversionTbl[] = {
298 
299  // The number of shll instructions for the extension.
316 
317  // LowerVectorINT_TO_FP:
324 
325  // Complex: to v2f32
332 
333  // Complex: to v4f32
338 
339  // Complex: to v8f32
344 
345  // Complex: to v16f32
348 
349  // Complex: to v2f64
356 
357 
358  // LowerVectorFP_TO_INT
365 
366  // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext).
373 
374  // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2
379 
380  // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
387  };
388 
389  if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
390  DstTy.getSimpleVT(),
391  SrcTy.getSimpleVT()))
392  return Entry->Cost;
393 
394  return BaseT::getCastInstrCost(Opcode, Dst, Src);
395 }
396 
398  VectorType *VecTy,
399  unsigned Index) {
400 
401  // Make sure we were given a valid extend opcode.
402  assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
403  "Invalid opcode");
404 
405  // We are extending an element we extract from a vector, so the source type
406  // of the extend is the element type of the vector.
407  auto *Src = VecTy->getElementType();
408 
409  // Sign- and zero-extends are for integer types only.
410  assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) && "Invalid type");
411 
412  // Get the cost for the extract. We compute the cost (if any) for the extend
413  // below.
414  auto Cost = getVectorInstrCost(Instruction::ExtractElement, VecTy, Index);
415 
416  // Legalize the types.
417  auto VecLT = TLI->getTypeLegalizationCost(DL, VecTy);
418  auto DstVT = TLI->getValueType(DL, Dst);
419  auto SrcVT = TLI->getValueType(DL, Src);
420 
421  // If the resulting type is still a vector and the destination type is legal,
422  // we may get the extension for free. If not, get the default cost for the
423  // extend.
424  if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
425  return Cost + getCastInstrCost(Opcode, Dst, Src);
426 
427  // The destination type should be larger than the element type. If not, get
428  // the default cost for the extend.
429  if (DstVT.getSizeInBits() < SrcVT.getSizeInBits())
430  return Cost + getCastInstrCost(Opcode, Dst, Src);
431 
432  switch (Opcode) {
433  default:
434  llvm_unreachable("Opcode should be either SExt or ZExt");
435 
436  // For sign-extends, we only need a smov, which performs the extension
437  // automatically.
438  case Instruction::SExt:
439  return Cost;
440 
441  // For zero-extends, the extend is performed automatically by a umov unless
442  // the destination type is i64 and the element type is i8 or i16.
443  case Instruction::ZExt:
444  if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
445  return Cost;
446  }
447 
448  // If we are unable to perform the extend for free, get the default cost.
449  return Cost + getCastInstrCost(Opcode, Dst, Src);
450 }
451 
452 int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
453  unsigned Index) {
454  assert(Val->isVectorTy() && "This must be a vector type");
455 
456  if (Index != -1U) {
457  // Legalize the type.
458  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
459 
460  // This type is legalized to a scalar type.
461  if (!LT.second.isVector())
462  return 0;
463 
464  // The type may be split. Normalize the index to the new type.
465  unsigned Width = LT.second.getVectorNumElements();
466  Index = Index % Width;
467 
468  // The element at index zero is already inside the vector.
469  if (Index == 0)
470  return 0;
471  }
472 
473  // All other insert/extracts cost this much.
474  return ST->getVectorInsertExtractBaseCost();
475 }
476 
478  unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
479  TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
481  // Legalize the type.
482  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
483 
484  // If the instruction is a widening instruction (e.g., uaddl, saddw, etc.),
485  // add in the widening overhead specified by the sub-target. Since the
486  // extends feeding widening instructions are performed automatically, they
487  // aren't present in the generated code and have a zero cost. By adding a
488  // widening overhead here, we attach the total cost of the combined operation
489  // to the widening instruction.
490  int Cost = 0;
491  if (isWideningInstruction(Ty, Opcode, Args))
492  Cost += ST->getWideningBaseCost();
493 
494  int ISD = TLI->InstructionOpcodeToISD(Opcode);
495 
496  switch (ISD) {
497  default:
498  return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
499  Opd1PropInfo, Opd2PropInfo);
500  case ISD::SDIV:
502  Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
503  // On AArch64, scalar signed division by constants power-of-two are
504  // normally expanded to the sequence ADD + CMP + SELECT + SRA.
505  // The OperandValue properties many not be same as that of previous
506  // operation; conservatively assume OP_None.
507  Cost += getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
510  Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
513  Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info,
516  Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info,
519  return Cost;
520  }
522  case ISD::UDIV:
524  auto VT = TLI->getValueType(DL, Ty);
525  if (TLI->isOperationLegalOrCustom(ISD::MULHU, VT)) {
526  // Vector signed division by constant are expanded to the
527  // sequence MULHS + ADD/SUB + SRA + SRL + ADD, and unsigned division
528  // to MULHS + SUB + SRL + ADD + SRL.
529  int MulCost = getArithmeticInstrCost(Instruction::Mul, Ty, Opd1Info,
530  Opd2Info,
533  int AddCost = getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info,
534  Opd2Info,
537  int ShrCost = getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info,
538  Opd2Info,
541  return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
542  }
543  }
544 
545  Cost += BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
546  Opd1PropInfo, Opd2PropInfo);
547  if (Ty->isVectorTy()) {
548  // On AArch64, vector divisions are not supported natively and are
549  // expanded into scalar divisions of each pair of elements.
550  Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, Opd1Info,
551  Opd2Info, Opd1PropInfo, Opd2PropInfo);
552  Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, Opd1Info,
553  Opd2Info, Opd1PropInfo, Opd2PropInfo);
554  // TODO: if one of the arguments is scalar, then it's not necessary to
555  // double the cost of handling the vector elements.
556  Cost += Cost;
557  }
558  return Cost;
559 
560  case ISD::ADD:
561  case ISD::MUL:
562  case ISD::XOR:
563  case ISD::OR:
564  case ISD::AND:
565  // These nodes are marked as 'custom' for combining purposes only.
566  // We know that they are legal. See LowerAdd in ISelLowering.
567  return (Cost + 1) * LT.first;
568  }
569 }
570 
572  const SCEV *Ptr) {
573  // Address computations in vectorized code with non-consecutive addresses will
574  // likely result in more instructions compared to scalar code where the
575  // computation can more often be merged into the index mode. The resulting
576  // extra micro-ops can significantly decrease throughput.
577  unsigned NumVectorInstToHideOverhead = 10;
578  int MaxMergeDistance = 64;
579 
580  if (Ty->isVectorTy() && SE &&
581  !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
582  return NumVectorInstToHideOverhead;
583 
584  // In many cases the address computation is not merged into the instruction
585  // addressing mode.
586  return 1;
587 }
588 
589 int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
590  Type *CondTy, const Instruction *I) {
591 
592  int ISD = TLI->InstructionOpcodeToISD(Opcode);
593  // We don't lower some vector selects well that are wider than the register
594  // width.
595  if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
596  // We would need this many instructions to hide the scalarization happening.
597  const int AmortizationCost = 20;
598  static const TypeConversionCostTblEntry
599  VectorSelectTbl[] = {
603  { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
604  { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
605  { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
606  };
607 
608  EVT SelCondTy = TLI->getValueType(DL, CondTy);
609  EVT SelValTy = TLI->getValueType(DL, ValTy);
610  if (SelCondTy.isSimple() && SelValTy.isSimple()) {
611  if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD,
612  SelCondTy.getSimpleVT(),
613  SelValTy.getSimpleVT()))
614  return Entry->Cost;
615  }
616  }
617  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
618 }
619 
620 int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
621  unsigned Alignment, unsigned AddressSpace,
622  const Instruction *I) {
623  auto LT = TLI->getTypeLegalizationCost(DL, Ty);
624 
625  if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
626  LT.second.is128BitVector() && Alignment < 16) {
627  // Unaligned stores are extremely inefficient. We don't split all
628  // unaligned 128-bit stores because the negative impact that has shown in
629  // practice on inlined block copy code.
630  // We make such stores expensive so that we will only vectorize if there
631  // are 6 other instructions getting vectorized.
632  const int AmortizationCost = 6;
633 
634  return LT.first * 2 * AmortizationCost;
635  }
636 
637  if (Ty->isVectorTy() && Ty->getVectorElementType()->isIntegerTy(8) &&
638  Ty->getVectorNumElements() < 8) {
639  // We scalarize the loads/stores because there is not v.4b register and we
640  // have to promote the elements to v.4h.
641  unsigned NumVecElts = Ty->getVectorNumElements();
642  unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
643  // We generate 2 instructions per vector element.
644  return NumVectorizableInstsToAmortize * NumVecElts * 2;
645  }
646 
647  return LT.first;
648 }
649 
651  unsigned Factor,
652  ArrayRef<unsigned> Indices,
653  unsigned Alignment,
654  unsigned AddressSpace) {
655  assert(Factor >= 2 && "Invalid interleave factor");
656  assert(isa<VectorType>(VecTy) && "Expect a vector type");
657 
658  if (Factor <= TLI->getMaxSupportedInterleaveFactor()) {
659  unsigned NumElts = VecTy->getVectorNumElements();
660  auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
661 
662  // ldN/stN only support legal vector types of size 64 or 128 in bits.
663  // Accesses having vector types that are a multiple of 128 bits can be
664  // matched to more than one ldN/stN instruction.
665  if (NumElts % Factor == 0 &&
666  TLI->isLegalInterleavedAccessType(SubVecTy, DL))
667  return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
668  }
669 
670  return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
671  Alignment, AddressSpace);
672 }
673 
675  int Cost = 0;
676  for (auto *I : Tys) {
677  if (!I->isVectorTy())
678  continue;
679  if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
680  Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) +
682  }
683  return Cost;
684 }
685 
687  return ST->getMaxInterleaveFactor();
688 }
689 
690 // For Falkor, we want to avoid having too many strided loads in a loop since
691 // that can exhaust the HW prefetcher resources. We adjust the unroller
692 // MaxCount preference below to attempt to ensure unrolling doesn't create too
693 // many strided loads.
694 static void
697  enum { MaxStridedLoads = 7 };
698  auto countStridedLoads = [](Loop *L, ScalarEvolution &SE) {
699  int StridedLoads = 0;
700  // FIXME? We could make this more precise by looking at the CFG and
701  // e.g. not counting loads in each side of an if-then-else diamond.
702  for (const auto BB : L->blocks()) {
703  for (auto &I : *BB) {
704  LoadInst *LMemI = dyn_cast<LoadInst>(&I);
705  if (!LMemI)
706  continue;
707 
708  Value *PtrValue = LMemI->getPointerOperand();
709  if (L->isLoopInvariant(PtrValue))
710  continue;
711 
712  const SCEV *LSCEV = SE.getSCEV(PtrValue);
713  const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
714  if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
715  continue;
716 
717  // FIXME? We could take pairing of unrolled load copies into account
718  // by looking at the AddRec, but we would probably have to limit this
719  // to loops with no stores or other memory optimization barriers.
720  ++StridedLoads;
721  // We've seen enough strided loads that seeing more won't make a
722  // difference.
723  if (StridedLoads > MaxStridedLoads / 2)
724  return StridedLoads;
725  }
726  }
727  return StridedLoads;
728  };
729 
730  int StridedLoads = countStridedLoads(L, SE);
731  LLVM_DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoads
732  << " strided loads\n");
733  // Pick the largest power of 2 unroll count that won't result in too many
734  // strided loads.
735  if (StridedLoads) {
736  UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads);
737  LLVM_DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to "
738  << UP.MaxCount << '\n');
739  }
740 }
741 
744  // Enable partial unrolling and runtime unrolling.
746 
747  // For inner loop, it is more likely to be a hot one, and the runtime check
748  // can be promoted out from LICM pass, so the overhead is less, let's try
749  // a larger threshold to unroll more loops.
750  if (L->getLoopDepth() > 1)
751  UP.PartialThreshold *= 2;
752 
753  // Disable partial & runtime unrolling on -Os.
755 
759 }
760 
762  Type *ExpectedType) {
763  switch (Inst->getIntrinsicID()) {
764  default:
765  return nullptr;
766  case Intrinsic::aarch64_neon_st2:
767  case Intrinsic::aarch64_neon_st3:
768  case Intrinsic::aarch64_neon_st4: {
769  // Create a struct type
770  StructType *ST = dyn_cast<StructType>(ExpectedType);
771  if (!ST)
772  return nullptr;
773  unsigned NumElts = Inst->getNumArgOperands() - 1;
774  if (ST->getNumElements() != NumElts)
775  return nullptr;
776  for (unsigned i = 0, e = NumElts; i != e; ++i) {
777  if (Inst->getArgOperand(i)->getType() != ST->getElementType(i))
778  return nullptr;
779  }
780  Value *Res = UndefValue::get(ExpectedType);
781  IRBuilder<> Builder(Inst);
782  for (unsigned i = 0, e = NumElts; i != e; ++i) {
783  Value *L = Inst->getArgOperand(i);
784  Res = Builder.CreateInsertValue(Res, L, i);
785  }
786  return Res;
787  }
788  case Intrinsic::aarch64_neon_ld2:
789  case Intrinsic::aarch64_neon_ld3:
790  case Intrinsic::aarch64_neon_ld4:
791  if (Inst->getType() == ExpectedType)
792  return Inst;
793  return nullptr;
794  }
795 }
796 
798  MemIntrinsicInfo &Info) {
799  switch (Inst->getIntrinsicID()) {
800  default:
801  break;
802  case Intrinsic::aarch64_neon_ld2:
803  case Intrinsic::aarch64_neon_ld3:
804  case Intrinsic::aarch64_neon_ld4:
805  Info.ReadMem = true;
806  Info.WriteMem = false;
807  Info.PtrVal = Inst->getArgOperand(0);
808  break;
809  case Intrinsic::aarch64_neon_st2:
810  case Intrinsic::aarch64_neon_st3:
811  case Intrinsic::aarch64_neon_st4:
812  Info.ReadMem = false;
813  Info.WriteMem = true;
814  Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
815  break;
816  }
817 
818  switch (Inst->getIntrinsicID()) {
819  default:
820  return false;
821  case Intrinsic::aarch64_neon_ld2:
822  case Intrinsic::aarch64_neon_st2:
823  Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
824  break;
825  case Intrinsic::aarch64_neon_ld3:
826  case Intrinsic::aarch64_neon_st3:
827  Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
828  break;
829  case Intrinsic::aarch64_neon_ld4:
830  case Intrinsic::aarch64_neon_st4:
831  Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
832  break;
833  }
834  return true;
835 }
836 
837 /// See if \p I should be considered for address type promotion. We check if \p
838 /// I is a sext with right type and used in memory accesses. If it used in a
839 /// "complex" getelementptr, we allow it to be promoted without finding other
840 /// sext instructions that sign extended the same initial value. A getelementptr
841 /// is considered as "complex" if it has more than 2 operands.
843  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
844  bool Considerable = false;
845  AllowPromotionWithoutCommonHeader = false;
846  if (!isa<SExtInst>(&I))
847  return false;
848  Type *ConsideredSExtType =
850  if (I.getType() != ConsideredSExtType)
851  return false;
852  // See if the sext is the one with the right type and used in at least one
853  // GetElementPtrInst.
854  for (const User *U : I.users()) {
855  if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
856  Considerable = true;
857  // A getelementptr is considered as "complex" if it has more than 2
858  // operands. We will promote a SExt used in such complex GEP as we
859  // expect some computation to be merged if they are done on 64 bits.
860  if (GEPInst->getNumOperands() > 2) {
861  AllowPromotionWithoutCommonHeader = true;
862  break;
863  }
864  }
865  }
866  return Considerable;
867 }
868 
870  return ST->getCacheLineSize();
871 }
872 
874  return ST->getPrefetchDistance();
875 }
876 
878  return ST->getMinPrefetchStride();
879 }
880 
882  return ST->getMaxPrefetchIterationsAhead();
883 }
884 
885 bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
886  TTI::ReductionFlags Flags) const {
887  assert(isa<VectorType>(Ty) && "Expected Ty to be a vector type");
888  unsigned ScalarBits = Ty->getScalarSizeInBits();
889  switch (Opcode) {
890  case Instruction::FAdd:
891  case Instruction::FMul:
892  case Instruction::And:
893  case Instruction::Or:
894  case Instruction::Xor:
895  case Instruction::Mul:
896  return false;
897  case Instruction::Add:
898  return ScalarBits * Ty->getVectorNumElements() >= 128;
899  case Instruction::ICmp:
900  return (ScalarBits < 64) &&
901  (ScalarBits * Ty->getVectorNumElements() >= 128);
902  case Instruction::FCmp:
903  return Flags.NoNaN;
904  default:
905  llvm_unreachable("Unhandled reduction opcode");
906  }
907  return false;
908 }
909 
911  bool IsPairwiseForm) {
912 
913  if (IsPairwiseForm)
914  return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
915 
916  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
917  MVT MTy = LT.second;
918  int ISD = TLI->InstructionOpcodeToISD(Opcode);
919  assert(ISD && "Invalid opcode");
920 
921  // Horizontal adds can use the 'addv' instruction. We model the cost of these
922  // instructions as normal vector adds. This is the only arithmetic vector
923  // reduction operation for which we have an instruction.
924  static const CostTblEntry CostTblNoPairwise[]{
925  {ISD::ADD, MVT::v8i8, 1},
926  {ISD::ADD, MVT::v16i8, 1},
927  {ISD::ADD, MVT::v4i16, 1},
928  {ISD::ADD, MVT::v8i16, 1},
929  {ISD::ADD, MVT::v4i32, 1},
930  };
931 
932  if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy))
933  return LT.first * Entry->Cost;
934 
935  return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
936 }
937 
939  Type *SubTp) {
940 
941  // Transpose shuffle kinds can be performed with 'trn1/trn2' and 'zip1/zip2'
942  // instructions.
943  if (Kind == TTI::SK_Transpose) {
944  static const CostTblEntry TransposeTbl[] = {
955  };
956  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
957  if (const auto *Entry =
958  CostTableLookup(TransposeTbl, ISD::VECTOR_SHUFFLE, LT.second))
959  return LT.first * Entry->Cost;
960  }
961 
962  return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
963 }
Type * getVectorElementType() const
Definition: Type.h:368
static cl::opt< bool > EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix", cl::init(true), cl::Hidden)
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >())
Definition: BasicTTIImpl.h:507
bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, int64_t MergeDistance)
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:840
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
bool isMisaligned128StoreSlow() const
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
Type * getElementType(unsigned N) const
Definition: DerivedTypes.h:314
Cost tables and simple lookup functions.
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:343
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition: LoopInfo.h:92
unsigned getNumElements() const
Random access to the elements.
Definition: DerivedTypes.h:313
The main scalar evolution driver.
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
Type Conversion Cost Table.
Definition: CostTable.h:45
An instruction for reading from memory.
Definition: Instructions.h:164
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
unsigned getMaxInterleaveFactor(unsigned VF)
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
static void getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE, TargetTransformInfo::UnrollingPreferences &UP)
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:181
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>())
Cost Table Entry.
Definition: CostTable.h:25
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1493
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)
unsigned getCacheLineSize() const
const FeatureBitset & getFeatureBits() const
Class to represent struct types.
Definition: DerivedTypes.h:201
int getIntImmCost(const APInt &Imm, Type *Ty)
Calculate the cost of materializing the given constant.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:731
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise)
Try to calculate arithmetic and shuffle op costs for reduction operations.
const TypeConversionCostTblEntry * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntry > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table, TypeTy must be comparable to CompareTy by ==. ...
Definition: CostTable.h:55
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace)
Definition: BasicTTIImpl.h:783
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I)
Definition: BasicTTIImpl.h:705
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1559
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:567
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:448
This node represents a polynomial recurrence on the trip count of the specified loop.
PopcntSupportKind
Flags indicating the kind of support for population count.
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:890
unsigned getPrefetchDistance() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
unsigned getWideningBaseCost() const
amdgpu Simplify well known AMD library false Value * Callee
Value * getOperand(unsigned i) const
Definition: User.h:170
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:492
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:301
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:837
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
Definition: BasicTTIImpl.h:363
bool useReductionIntrinsic(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
Container class for subtarget features.
int getIntImmCost(int64_t Val)
Calculate the cost of materializing a 64-bit value.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:421
Machine Value Type.
Flags describing the kind of vector reduction.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)
See if I should be considered for address type promotion.
Expected to fold away in lowering.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
Value * getPointerOperand()
Definition: Instructions.h:270
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I=nullptr)
This file a TargetTransformInfo::Concept conforming object specific to the AArch64 target machine...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:193
Extended Value Type.
Definition: ValueTypes.h:34
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1382
const TargetMachine & getTargetMachine() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:57
OperandValueProperties
Additional properties of an operand&#39;s values.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
unsigned getMinPrefetchStride() const
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
Definition: BasicTTIImpl.h:554
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition: APInt.h:940
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
int getCostOfKeepingLiveOverCall(ArrayRef< Type *> Tys)
AddressSpace
Definition: NVPTXBaseInfo.h:22
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target&#39;s TargetSubtargetInf...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:531
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:462
Class to represent vector types.
Definition: DerivedTypes.h:393
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Class for arbitrary precision integers.
Definition: APInt.h:69
unsigned getMaxInterleaveFactor() const
int getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm)
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:390
iterator_range< user_iterator > users()
Definition: Value.h:399
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:438
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
const CostTblEntry * CostTableLookup(ArrayRef< CostTblEntry > Tbl, int ISD, MVT Ty)
Find in cost table, TypeTy must be comparable to CompareTy by ==.
Definition: CostTable.h:32
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace)
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:363
This class represents an analyzed expression in the program.
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
unsigned getNumArgOperands() const
Return the number of call arguments.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:445
Parameters that control the generic loop unrolling transformation.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
ARMProcFamilyEnum getProcFamily() const
Returns ARM processor family.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
const unsigned Kind
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:375
The cost of a typical &#39;add&#39; instruction.
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
unsigned getMaxPrefetchIterationsAhead() const
LLVM Value Representation.
Definition: Value.h:73
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:593
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
Type * getElementType() const
Definition: DerivedTypes.h:360
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:412
OperandValueKind
Additional information about an operand&#39;s possible values.
This pass exposes codegen information to IR-level passes.
Conversion operators.
Definition: ISDOpcodes.h:435
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:1971
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:444
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:126
#define LLVM_DEBUG(X)
Definition: Debug.h:119
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
Information about a load/store intrinsic defined by the target.
bool NoNaN
If op is an fp min/max, whether NaNs may be present.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
Definition: Instruction.h:67
unsigned getVectorInsertExtractBaseCost() const
ShuffleKind
The various kinds of shuffle patterns for vector queries.
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:356