LLVM  14.0.0git
PPCTargetTransformInfo.cpp
Go to the documentation of this file.
1 //===-- PPCTargetTransformInfo.cpp - PPC specific TTI ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
14 #include "llvm/CodeGen/CostTable.h"
17 #include "llvm/IR/IntrinsicsPowerPC.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/KnownBits.h"
23 
24 using namespace llvm;
25 
26 #define DEBUG_TYPE "ppctti"
27 
28 static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
29 cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
30 
31 // This is currently only used for the data prefetch pass
32 static cl::opt<unsigned>
33 CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64),
34  cl::desc("The loop prefetch cache line size"));
35 
36 static cl::opt<bool>
37 EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false),
38  cl::desc("Enable using coldcc calling conv for cold "
39  "internal functions"));
40 
41 static cl::opt<bool>
42 LsrNoInsnsCost("ppc-lsr-no-insns-cost", cl::Hidden, cl::init(false),
43  cl::desc("Do not add instruction count to lsr cost model"));
44 
45 // The latency of mtctr is only justified if there are more than 4
46 // comparisons that will be removed as a result.
47 static cl::opt<unsigned>
48 SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden,
49  cl::desc("Loops with a constant trip count smaller than "
50  "this value will not use the count register."));
51 
52 //===----------------------------------------------------------------------===//
53 //
54 // PPC cost model.
55 //
56 //===----------------------------------------------------------------------===//
57 
59 PPCTTIImpl::getPopcntSupport(unsigned TyWidth) {
60  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
61  if (ST->hasPOPCNTD() != PPCSubtarget::POPCNTD_Unavailable && TyWidth <= 64)
62  return ST->hasPOPCNTD() == PPCSubtarget::POPCNTD_Slow ?
64  return TTI::PSK_Software;
65 }
66 
69  Intrinsic::ID IID = II.getIntrinsicID();
70  switch (IID) {
71  default:
72  break;
73  case Intrinsic::ppc_altivec_lvx:
74  case Intrinsic::ppc_altivec_lvxl:
75  // Turn PPC lvx -> load if the pointer is known aligned.
77  II.getArgOperand(0), Align(16), IC.getDataLayout(), &II,
78  &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
79  Value *Ptr = IC.Builder.CreateBitCast(
81  return new LoadInst(II.getType(), Ptr, "", false, Align(16));
82  }
83  break;
84  case Intrinsic::ppc_vsx_lxvw4x:
85  case Intrinsic::ppc_vsx_lxvd2x: {
86  // Turn PPC VSX loads into normal loads.
87  Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(0),
89  return new LoadInst(II.getType(), Ptr, Twine(""), false, Align(1));
90  }
91  case Intrinsic::ppc_altivec_stvx:
92  case Intrinsic::ppc_altivec_stvxl:
93  // Turn stvx -> store if the pointer is known aligned.
95  II.getArgOperand(1), Align(16), IC.getDataLayout(), &II,
96  &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
97  Type *OpPtrTy = PointerType::getUnqual(II.getArgOperand(0)->getType());
98  Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
99  return new StoreInst(II.getArgOperand(0), Ptr, false, Align(16));
100  }
101  break;
102  case Intrinsic::ppc_vsx_stxvw4x:
103  case Intrinsic::ppc_vsx_stxvd2x: {
104  // Turn PPC VSX stores into normal stores.
105  Type *OpPtrTy = PointerType::getUnqual(II.getArgOperand(0)->getType());
106  Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
107  return new StoreInst(II.getArgOperand(0), Ptr, false, Align(1));
108  }
109  case Intrinsic::ppc_altivec_vperm:
110  // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
111  // Note that ppc_altivec_vperm has a big-endian bias, so when creating
112  // a vectorshuffle for little endian, we must undo the transformation
113  // performed on vec_perm in altivec.h. That is, we must complement
114  // the permutation mask with respect to 31 and reverse the order of
115  // V1 and V2.
116  if (Constant *Mask = dyn_cast<Constant>(II.getArgOperand(2))) {
117  assert(cast<FixedVectorType>(Mask->getType())->getNumElements() == 16 &&
118  "Bad type for intrinsic!");
119 
120  // Check that all of the elements are integer constants or undefs.
121  bool AllEltsOk = true;
122  for (unsigned i = 0; i != 16; ++i) {
123  Constant *Elt = Mask->getAggregateElement(i);
124  if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
125  AllEltsOk = false;
126  break;
127  }
128  }
129 
130  if (AllEltsOk) {
131  // Cast the input vectors to byte vectors.
132  Value *Op0 =
133  IC.Builder.CreateBitCast(II.getArgOperand(0), Mask->getType());
134  Value *Op1 =
135  IC.Builder.CreateBitCast(II.getArgOperand(1), Mask->getType());
136  Value *Result = UndefValue::get(Op0->getType());
137 
138  // Only extract each element once.
139  Value *ExtractedElts[32];
140  memset(ExtractedElts, 0, sizeof(ExtractedElts));
141 
142  for (unsigned i = 0; i != 16; ++i) {
143  if (isa<UndefValue>(Mask->getAggregateElement(i)))
144  continue;
145  unsigned Idx =
146  cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
147  Idx &= 31; // Match the hardware behavior.
148  if (DL.isLittleEndian())
149  Idx = 31 - Idx;
150 
151  if (!ExtractedElts[Idx]) {
152  Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
153  Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
154  ExtractedElts[Idx] = IC.Builder.CreateExtractElement(
155  Idx < 16 ? Op0ToUse : Op1ToUse, IC.Builder.getInt32(Idx & 15));
156  }
157 
158  // Insert this value into the result vector.
159  Result = IC.Builder.CreateInsertElement(Result, ExtractedElts[Idx],
160  IC.Builder.getInt32(i));
161  }
162  return CastInst::Create(Instruction::BitCast, Result, II.getType());
163  }
164  }
165  break;
166  }
167  return None;
168 }
169 
173  return BaseT::getIntImmCost(Imm, Ty, CostKind);
174 
175  assert(Ty->isIntegerTy());
176 
177  unsigned BitSize = Ty->getPrimitiveSizeInBits();
178  if (BitSize == 0)
179  return ~0U;
180 
181  if (Imm == 0)
182  return TTI::TCC_Free;
183 
184  if (Imm.getBitWidth() <= 64) {
185  if (isInt<16>(Imm.getSExtValue()))
186  return TTI::TCC_Basic;
187 
188  if (isInt<32>(Imm.getSExtValue())) {
189  // A constant that can be materialized using lis.
190  if ((Imm.getZExtValue() & 0xFFFF) == 0)
191  return TTI::TCC_Basic;
192 
193  return 2 * TTI::TCC_Basic;
194  }
195  }
196 
197  return 4 * TTI::TCC_Basic;
198 }
199 
201  const APInt &Imm, Type *Ty,
204  return BaseT::getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
205 
206  assert(Ty->isIntegerTy());
207 
208  unsigned BitSize = Ty->getPrimitiveSizeInBits();
209  if (BitSize == 0)
210  return ~0U;
211 
212  switch (IID) {
213  default:
214  return TTI::TCC_Free;
215  case Intrinsic::sadd_with_overflow:
216  case Intrinsic::uadd_with_overflow:
217  case Intrinsic::ssub_with_overflow:
218  case Intrinsic::usub_with_overflow:
219  if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue()))
220  return TTI::TCC_Free;
221  break;
222  case Intrinsic::experimental_stackmap:
223  if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
224  return TTI::TCC_Free;
225  break;
226  case Intrinsic::experimental_patchpoint_void:
227  case Intrinsic::experimental_patchpoint_i64:
228  if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
229  return TTI::TCC_Free;
230  break;
231  }
232  return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind);
233 }
234 
235 InstructionCost PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
236  const APInt &Imm, Type *Ty,
238  Instruction *Inst) {
240  return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind, Inst);
241 
242  assert(Ty->isIntegerTy());
243 
244  unsigned BitSize = Ty->getPrimitiveSizeInBits();
245  if (BitSize == 0)
246  return ~0U;
247 
248  unsigned ImmIdx = ~0U;
249  bool ShiftedFree = false, RunFree = false, UnsignedFree = false,
250  ZeroFree = false;
251  switch (Opcode) {
252  default:
253  return TTI::TCC_Free;
254  case Instruction::GetElementPtr:
255  // Always hoist the base address of a GetElementPtr. This prevents the
256  // creation of new constants for every base constant that gets constant
257  // folded with the offset.
258  if (Idx == 0)
259  return 2 * TTI::TCC_Basic;
260  return TTI::TCC_Free;
261  case Instruction::And:
262  RunFree = true; // (for the rotate-and-mask instructions)
264  case Instruction::Add:
265  case Instruction::Or:
266  case Instruction::Xor:
267  ShiftedFree = true;
269  case Instruction::Sub:
270  case Instruction::Mul:
271  case Instruction::Shl:
272  case Instruction::LShr:
273  case Instruction::AShr:
274  ImmIdx = 1;
275  break;
276  case Instruction::ICmp:
277  UnsignedFree = true;
278  ImmIdx = 1;
279  // Zero comparisons can use record-form instructions.
281  case Instruction::Select:
282  ZeroFree = true;
283  break;
284  case Instruction::PHI:
285  case Instruction::Call:
286  case Instruction::Ret:
287  case Instruction::Load:
288  case Instruction::Store:
289  break;
290  }
291 
292  if (ZeroFree && Imm == 0)
293  return TTI::TCC_Free;
294 
295  if (Idx == ImmIdx && Imm.getBitWidth() <= 64) {
296  if (isInt<16>(Imm.getSExtValue()))
297  return TTI::TCC_Free;
298 
299  if (RunFree) {
300  if (Imm.getBitWidth() <= 32 &&
301  (isShiftedMask_32(Imm.getZExtValue()) ||
303  return TTI::TCC_Free;
304 
305  if (ST->isPPC64() &&
306  (isShiftedMask_64(Imm.getZExtValue()) ||
308  return TTI::TCC_Free;
309  }
310 
311  if (UnsignedFree && isUInt<16>(Imm.getZExtValue()))
312  return TTI::TCC_Free;
313 
314  if (ShiftedFree && (Imm.getZExtValue() & 0xFFFF) == 0)
315  return TTI::TCC_Free;
316  }
317 
318  return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind);
319 }
320 
324  // We already implement getCastInstrCost and getMemoryOpCost where we perform
325  // the vector adjustment there.
326  if (isa<CastInst>(U) || isa<LoadInst>(U) || isa<StoreInst>(U))
328 
329  if (U->getType()->isVectorTy()) {
330  // Instructions that need to be split should cost more.
331  std::pair<InstructionCost, MVT> LT =
332  TLI->getTypeLegalizationCost(DL, U->getType());
333  return LT.first * BaseT::getUserCost(U, Operands, CostKind);
334  }
335 
337 }
338 
339 // Determining the address of a TLS variable results in a function call in
340 // certain TLS models.
341 static bool memAddrUsesCTR(const Value *MemAddr, const PPCTargetMachine &TM,
343  // No need to traverse again if we already checked this operand.
344  if (!Visited.insert(MemAddr).second)
345  return false;
346  const auto *GV = dyn_cast<GlobalValue>(MemAddr);
347  if (!GV) {
348  // Recurse to check for constants that refer to TLS global variables.
349  if (const auto *CV = dyn_cast<Constant>(MemAddr))
350  for (const auto &CO : CV->operands())
351  if (memAddrUsesCTR(CO, TM, Visited))
352  return true;
353  return false;
354  }
355 
356  if (!GV->isThreadLocal())
357  return false;
358  TLSModel::Model Model = TM.getTLSModel(GV);
360 }
361 
362 bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
364  const PPCTargetMachine &TM = ST->getTargetMachine();
365 
366  // Loop through the inline asm constraints and look for something that
367  // clobbers ctr.
368  auto asmClobbersCTR = [](InlineAsm *IA) {
369  InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
370  for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
371  InlineAsm::ConstraintInfo &C = CIV[i];
372  if (C.Type != InlineAsm::isInput)
373  for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
374  if (StringRef(C.Codes[j]).equals_insensitive("{ctr}"))
375  return true;
376  }
377  return false;
378  };
379 
380  auto isLargeIntegerTy = [](bool Is32Bit, Type *Ty) {
381  if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
382  return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
383 
384  return false;
385  };
386 
387  auto supportedHalfPrecisionOp = [](Instruction *Inst) {
388  switch (Inst->getOpcode()) {
389  default:
390  return false;
391  case Instruction::FPTrunc:
392  case Instruction::FPExt:
393  case Instruction::Load:
394  case Instruction::Store:
395  case Instruction::FPToUI:
396  case Instruction::UIToFP:
397  case Instruction::FPToSI:
398  case Instruction::SIToFP:
399  return true;
400  }
401  };
402 
403  for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
404  J != JE; ++J) {
405  // There are no direct operations on half precision so assume that
406  // anything with that type requires a call except for a few select
407  // operations with Power9.
408  if (Instruction *CurrInst = dyn_cast<Instruction>(J)) {
409  for (const auto &Op : CurrInst->operands()) {
410  if (Op->getType()->getScalarType()->isHalfTy() ||
411  CurrInst->getType()->getScalarType()->isHalfTy())
412  return !(ST->isISA3_0() && supportedHalfPrecisionOp(CurrInst));
413  }
414  }
415  if (CallInst *CI = dyn_cast<CallInst>(J)) {
416  // Inline ASM is okay, unless it clobbers the ctr register.
417  if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand())) {
418  if (asmClobbersCTR(IA))
419  return true;
420  continue;
421  }
422 
423  if (Function *F = CI->getCalledFunction()) {
424  // Most intrinsics don't become function calls, but some might.
425  // sin, cos, exp and log are always calls.
426  unsigned Opcode = 0;
427  if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
428  switch (F->getIntrinsicID()) {
429  default: continue;
430  // If we have a call to loop_decrement or set_loop_iterations,
431  // we're definitely using CTR.
432  case Intrinsic::set_loop_iterations:
433  case Intrinsic::loop_decrement:
434  return true;
435 
436  // Binary operations on 128-bit value will use CTR.
437  case Intrinsic::experimental_constrained_fadd:
438  case Intrinsic::experimental_constrained_fsub:
439  case Intrinsic::experimental_constrained_fmul:
440  case Intrinsic::experimental_constrained_fdiv:
441  case Intrinsic::experimental_constrained_frem:
442  if (F->getType()->getScalarType()->isFP128Ty() ||
443  F->getType()->getScalarType()->isPPC_FP128Ty())
444  return true;
445  break;
446 
447  case Intrinsic::experimental_constrained_fptosi:
448  case Intrinsic::experimental_constrained_fptoui:
449  case Intrinsic::experimental_constrained_sitofp:
450  case Intrinsic::experimental_constrained_uitofp: {
451  Type *SrcType = CI->getArgOperand(0)->getType()->getScalarType();
452  Type *DstType = CI->getType()->getScalarType();
453  if (SrcType->isPPC_FP128Ty() || DstType->isPPC_FP128Ty() ||
454  isLargeIntegerTy(!TM.isPPC64(), SrcType) ||
455  isLargeIntegerTy(!TM.isPPC64(), DstType))
456  return true;
457  break;
458  }
459 
460  // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
461  // because, although it does clobber the counter register, the
462  // control can't then return to inside the loop unless there is also
463  // an eh_sjlj_setjmp.
464  case Intrinsic::eh_sjlj_setjmp:
465 
466  case Intrinsic::memcpy:
467  case Intrinsic::memmove:
468  case Intrinsic::memset:
469  case Intrinsic::powi:
470  case Intrinsic::log:
471  case Intrinsic::log2:
472  case Intrinsic::log10:
473  case Intrinsic::exp:
474  case Intrinsic::exp2:
475  case Intrinsic::pow:
476  case Intrinsic::sin:
477  case Intrinsic::cos:
478  case Intrinsic::experimental_constrained_powi:
479  case Intrinsic::experimental_constrained_log:
480  case Intrinsic::experimental_constrained_log2:
481  case Intrinsic::experimental_constrained_log10:
482  case Intrinsic::experimental_constrained_exp:
483  case Intrinsic::experimental_constrained_exp2:
484  case Intrinsic::experimental_constrained_pow:
485  case Intrinsic::experimental_constrained_sin:
486  case Intrinsic::experimental_constrained_cos:
487  return true;
488  // There is no corresponding FMA instruction for PPC double double.
489  // Thus, we need to disable CTR loop generation for this type.
490  case Intrinsic::fmuladd:
491  case Intrinsic::copysign:
492  if (CI->getArgOperand(0)->getType()->getScalarType()->
493  isPPC_FP128Ty())
494  return true;
495  else
496  continue; // ISD::FCOPYSIGN is never a library call.
497  case Intrinsic::fma: Opcode = ISD::FMA; break;
498  case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
499  case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
500  case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
501  case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
502  case Intrinsic::rint: Opcode = ISD::FRINT; break;
503  case Intrinsic::lrint: Opcode = ISD::LRINT; break;
504  case Intrinsic::llrint: Opcode = ISD::LLRINT; break;
505  case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
506  case Intrinsic::round: Opcode = ISD::FROUND; break;
507  case Intrinsic::lround: Opcode = ISD::LROUND; break;
508  case Intrinsic::llround: Opcode = ISD::LLROUND; break;
509  case Intrinsic::minnum: Opcode = ISD::FMINNUM; break;
510  case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break;
511  case Intrinsic::experimental_constrained_fcmp:
512  Opcode = ISD::STRICT_FSETCC;
513  break;
514  case Intrinsic::experimental_constrained_fcmps:
515  Opcode = ISD::STRICT_FSETCCS;
516  break;
517  case Intrinsic::experimental_constrained_fma:
518  Opcode = ISD::STRICT_FMA;
519  break;
520  case Intrinsic::experimental_constrained_sqrt:
521  Opcode = ISD::STRICT_FSQRT;
522  break;
523  case Intrinsic::experimental_constrained_floor:
524  Opcode = ISD::STRICT_FFLOOR;
525  break;
526  case Intrinsic::experimental_constrained_ceil:
527  Opcode = ISD::STRICT_FCEIL;
528  break;
529  case Intrinsic::experimental_constrained_trunc:
530  Opcode = ISD::STRICT_FTRUNC;
531  break;
532  case Intrinsic::experimental_constrained_rint:
533  Opcode = ISD::STRICT_FRINT;
534  break;
535  case Intrinsic::experimental_constrained_lrint:
536  Opcode = ISD::STRICT_LRINT;
537  break;
538  case Intrinsic::experimental_constrained_llrint:
539  Opcode = ISD::STRICT_LLRINT;
540  break;
541  case Intrinsic::experimental_constrained_nearbyint:
542  Opcode = ISD::STRICT_FNEARBYINT;
543  break;
544  case Intrinsic::experimental_constrained_round:
545  Opcode = ISD::STRICT_FROUND;
546  break;
547  case Intrinsic::experimental_constrained_lround:
548  Opcode = ISD::STRICT_LROUND;
549  break;
550  case Intrinsic::experimental_constrained_llround:
551  Opcode = ISD::STRICT_LLROUND;
552  break;
553  case Intrinsic::experimental_constrained_minnum:
554  Opcode = ISD::STRICT_FMINNUM;
555  break;
556  case Intrinsic::experimental_constrained_maxnum:
557  Opcode = ISD::STRICT_FMAXNUM;
558  break;
559  case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break;
560  case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break;
561  }
562  }
563 
564  // PowerPC does not use [US]DIVREM or other library calls for
565  // operations on regular types which are not otherwise library calls
566  // (i.e. soft float or atomics). If adapting for targets that do,
567  // additional care is required here.
568 
569  LibFunc Func;
570  if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
571  LibInfo->getLibFunc(F->getName(), Func) &&
572  LibInfo->hasOptimizedCodeGen(Func)) {
573  // Non-read-only functions are never treated as intrinsics.
574  if (!CI->onlyReadsMemory())
575  return true;
576 
577  // Conversion happens only for FP calls.
578  if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
579  return true;
580 
581  switch (Func) {
582  default: return true;
583  case LibFunc_copysign:
584  case LibFunc_copysignf:
585  continue; // ISD::FCOPYSIGN is never a library call.
586  case LibFunc_copysignl:
587  return true;
588  case LibFunc_fabs:
589  case LibFunc_fabsf:
590  case LibFunc_fabsl:
591  continue; // ISD::FABS is never a library call.
592  case LibFunc_sqrt:
593  case LibFunc_sqrtf:
594  case LibFunc_sqrtl:
595  Opcode = ISD::FSQRT; break;
596  case LibFunc_floor:
597  case LibFunc_floorf:
598  case LibFunc_floorl:
599  Opcode = ISD::FFLOOR; break;
600  case LibFunc_nearbyint:
601  case LibFunc_nearbyintf:
602  case LibFunc_nearbyintl:
603  Opcode = ISD::FNEARBYINT; break;
604  case LibFunc_ceil:
605  case LibFunc_ceilf:
606  case LibFunc_ceill:
607  Opcode = ISD::FCEIL; break;
608  case LibFunc_rint:
609  case LibFunc_rintf:
610  case LibFunc_rintl:
611  Opcode = ISD::FRINT; break;
612  case LibFunc_round:
613  case LibFunc_roundf:
614  case LibFunc_roundl:
615  Opcode = ISD::FROUND; break;
616  case LibFunc_trunc:
617  case LibFunc_truncf:
618  case LibFunc_truncl:
619  Opcode = ISD::FTRUNC; break;
620  case LibFunc_fmin:
621  case LibFunc_fminf:
622  case LibFunc_fminl:
623  Opcode = ISD::FMINNUM; break;
624  case LibFunc_fmax:
625  case LibFunc_fmaxf:
626  case LibFunc_fmaxl:
627  Opcode = ISD::FMAXNUM; break;
628  }
629  }
630 
631  if (Opcode) {
632  EVT EVTy =
633  TLI->getValueType(DL, CI->getArgOperand(0)->getType(), true);
634 
635  if (EVTy == MVT::Other)
636  return true;
637 
638  if (TLI->isOperationLegalOrCustom(Opcode, EVTy))
639  continue;
640  else if (EVTy.isVector() &&
641  TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType()))
642  continue;
643 
644  return true;
645  }
646  }
647 
648  return true;
649  } else if (isa<BinaryOperator>(J) &&
650  (J->getType()->getScalarType()->isFP128Ty() ||
651  J->getType()->getScalarType()->isPPC_FP128Ty())) {
652  // Most operations on f128 or ppc_f128 values become calls.
653  return true;
654  } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
655  isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
656  CastInst *CI = cast<CastInst>(J);
657  if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
658  CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
659  isLargeIntegerTy(!TM.isPPC64(), CI->getSrcTy()->getScalarType()) ||
660  isLargeIntegerTy(!TM.isPPC64(), CI->getDestTy()->getScalarType()))
661  return true;
662  } else if (isLargeIntegerTy(!TM.isPPC64(),
663  J->getType()->getScalarType()) &&
664  (J->getOpcode() == Instruction::UDiv ||
665  J->getOpcode() == Instruction::SDiv ||
666  J->getOpcode() == Instruction::URem ||
667  J->getOpcode() == Instruction::SRem)) {
668  return true;
669  } else if (!TM.isPPC64() &&
670  isLargeIntegerTy(false, J->getType()->getScalarType()) &&
671  (J->getOpcode() == Instruction::Shl ||
672  J->getOpcode() == Instruction::AShr ||
673  J->getOpcode() == Instruction::LShr)) {
674  // Only on PPC32, for 128-bit integers (specifically not 64-bit
675  // integers), these might be runtime calls.
676  return true;
677  } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
678  // On PowerPC, indirect jumps use the counter register.
679  return true;
680  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
681  if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
682  return true;
683  }
684 
685  // FREM is always a call.
686  if (J->getOpcode() == Instruction::FRem)
687  return true;
688 
689  if (ST->useSoftFloat()) {
690  switch(J->getOpcode()) {
691  case Instruction::FAdd:
692  case Instruction::FSub:
693  case Instruction::FMul:
694  case Instruction::FDiv:
695  case Instruction::FPTrunc:
696  case Instruction::FPExt:
697  case Instruction::FPToUI:
698  case Instruction::FPToSI:
699  case Instruction::UIToFP:
700  case Instruction::SIToFP:
701  case Instruction::FCmp:
702  return true;
703  }
704  }
705 
706  for (Value *Operand : J->operands())
707  if (memAddrUsesCTR(Operand, TM, Visited))
708  return true;
709  }
710 
711  return false;
712 }
713 
715  AssumptionCache &AC,
716  TargetLibraryInfo *LibInfo,
717  HardwareLoopInfo &HWLoopInfo) {
718  const PPCTargetMachine &TM = ST->getTargetMachine();
719  TargetSchedModel SchedModel;
720  SchedModel.init(ST);
721 
722  // Do not convert small short loops to CTR loop.
723  unsigned ConstTripCount = SE.getSmallConstantTripCount(L);
724  if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {
726  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
728  for (BasicBlock *BB : L->blocks())
729  Metrics.analyzeBasicBlock(BB, *this, EphValues);
730  // 6 is an approximate latency for the mtctr instruction.
731  if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth()))
732  return false;
733  }
734 
735  // We don't want to spill/restore the counter register, and so we don't
736  // want to use the counter register if the loop contains calls.
738  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
739  I != IE; ++I)
740  if (mightUseCTR(*I, LibInfo, Visited))
741  return false;
742 
743  SmallVector<BasicBlock*, 4> ExitingBlocks;
744  L->getExitingBlocks(ExitingBlocks);
745 
746  // If there is an exit edge known to be frequently taken,
747  // we should not transform this loop.
748  for (auto &BB : ExitingBlocks) {
749  Instruction *TI = BB->getTerminator();
750  if (!TI) continue;
751 
752  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
753  uint64_t TrueWeight = 0, FalseWeight = 0;
754  if (!BI->isConditional() ||
755  !BI->extractProfMetadata(TrueWeight, FalseWeight))
756  continue;
757 
758  // If the exit path is more frequent than the loop path,
759  // we return here without further analysis for this loop.
760  bool TrueIsExit = !L->contains(BI->getSuccessor(0));
761  if (( TrueIsExit && FalseWeight < TrueWeight) ||
762  (!TrueIsExit && FalseWeight > TrueWeight))
763  return false;
764  }
765  }
766 
767  // If an exit block has a PHI that accesses a TLS variable as one of the
768  // incoming values from the loop, we cannot produce a CTR loop because the
769  // address for that value will be computed in the loop.
770  SmallVector<BasicBlock *, 4> ExitBlocks;
771  L->getExitBlocks(ExitBlocks);
772  for (auto &BB : ExitBlocks) {
773  for (auto &PHI : BB->phis()) {
774  for (int Idx = 0, EndIdx = PHI.getNumIncomingValues(); Idx < EndIdx;
775  Idx++) {
776  const BasicBlock *IncomingBB = PHI.getIncomingBlock(Idx);
777  const Value *IncomingValue = PHI.getIncomingValue(Idx);
778  if (L->contains(IncomingBB) &&
779  memAddrUsesCTR(IncomingValue, TM, Visited))
780  return false;
781  }
782  }
783  }
784 
785  LLVMContext &C = L->getHeader()->getContext();
786  HWLoopInfo.CountType = TM.isPPC64() ?
788  HWLoopInfo.LoopDecrement = ConstantInt::get(HWLoopInfo.CountType, 1);
789  return true;
790 }
791 
795  if (ST->getCPUDirective() == PPC::DIR_A2) {
796  // The A2 is in-order with a deep pipeline, and concatenation unrolling
797  // helps expose latency-hiding opportunities to the instruction scheduler.
798  UP.Partial = UP.Runtime = true;
799 
800  // We unroll a lot on the A2 (hundreds of instructions), and the benefits
801  // often outweigh the cost of a division to compute the trip count.
802  UP.AllowExpensiveTripCount = true;
803  }
804 
805  BaseT::getUnrollingPreferences(L, SE, UP, ORE);
806 }
807 
810  BaseT::getPeelingPreferences(L, SE, PP);
811 }
812 // This function returns true to allow using coldcc calling convention.
813 // Returning true results in coldcc being used for functions which are cold at
814 // all call sites when the callers of the functions are not calling any other
815 // non coldcc functions.
817  return EnablePPCColdCC;
818 }
819 
820 bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
821  // On the A2, always unroll aggressively.
822  if (ST->getCPUDirective() == PPC::DIR_A2)
823  return true;
824 
825  return LoopHasReductions;
826 }
827 
829 PPCTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
831  Options.LoadSizes = {8, 4, 2, 1};
832  Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
833  return Options;
834 }
835 
837  return true;
838 }
839 
840 unsigned PPCTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
841  assert(ClassID == GPRRC || ClassID == FPRRC ||
842  ClassID == VRRC || ClassID == VSXRC);
843  if (ST->hasVSX()) {
844  assert(ClassID == GPRRC || ClassID == VSXRC || ClassID == VRRC);
845  return ClassID == VSXRC ? 64 : 32;
846  }
847  assert(ClassID == GPRRC || ClassID == FPRRC || ClassID == VRRC);
848  return 32;
849 }
850 
851 unsigned PPCTTIImpl::getRegisterClassForType(bool Vector, Type *Ty) const {
852  if (Vector)
853  return ST->hasVSX() ? VSXRC : VRRC;
854  else if (Ty && (Ty->getScalarType()->isFloatTy() ||
855  Ty->getScalarType()->isDoubleTy()))
856  return ST->hasVSX() ? VSXRC : FPRRC;
857  else if (Ty && (Ty->getScalarType()->isFP128Ty() ||
858  Ty->getScalarType()->isPPC_FP128Ty()))
859  return VRRC;
860  else if (Ty && Ty->getScalarType()->isHalfTy())
861  return VSXRC;
862  else
863  return GPRRC;
864 }
865 
866 const char* PPCTTIImpl::getRegisterClassName(unsigned ClassID) const {
867 
868  switch (ClassID) {
869  default:
870  llvm_unreachable("unknown register class");
871  return "PPC::unknown register class";
872  case GPRRC: return "PPC::GPRRC";
873  case FPRRC: return "PPC::FPRRC";
874  case VRRC: return "PPC::VRRC";
875  case VSXRC: return "PPC::VSXRC";
876  }
877 }
878 
879 TypeSize
881  switch (K) {
883  return TypeSize::getFixed(ST->isPPC64() ? 64 : 32);
885  return TypeSize::getFixed(ST->hasAltivec() ? 128 : 0);
887  return TypeSize::getScalable(0);
888  }
889 
890  llvm_unreachable("Unsupported register kind");
891 }
892 
894  // Check first if the user specified a custom line size.
895  if (CacheLineSize.getNumOccurrences() > 0)
896  return CacheLineSize;
897 
898  // Starting with P7 we have a cache line size of 128.
899  unsigned Directive = ST->getCPUDirective();
900  // Assume that Future CPU has the same cache line size as the others.
904  return 128;
905 
906  // On other processors return a default of 64 bytes.
907  return 64;
908 }
909 
911  return 300;
912 }
913 
914 unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
915  unsigned Directive = ST->getCPUDirective();
916  // The 440 has no SIMD support, but floating-point instructions
917  // have a 5-cycle latency, so unroll by 5x for latency hiding.
918  if (Directive == PPC::DIR_440)
919  return 5;
920 
921  // The A2 has no SIMD support, but floating-point instructions
922  // have a 6-cycle latency, so unroll by 6x for latency hiding.
923  if (Directive == PPC::DIR_A2)
924  return 6;
925 
926  // FIXME: For lack of any better information, do no harm...
928  return 1;
929 
930  // For P7 and P8, floating-point instructions have a 6-cycle latency and
931  // there are two execution units, so unroll by 12x for latency hiding.
932  // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
933  // FIXME: the same for P10 as previous gen until POWER10 scheduling is ready
934  // Assume that future is the same as the others.
938  return 12;
939 
940  // For most things, modern systems have two execution units (and
941  // out-of-order execution).
942  return 2;
943 }
944 
945 // Adjust the cost of vector instructions on targets which there is overlap
946 // between the vector and scalar units, thereby reducing the overall throughput
947 // of vector code wrt. scalar code.
949  unsigned Opcode, Type *Ty1,
950  Type *Ty2) {
951  if (!ST->vectorsUseTwoUnits() || !Ty1->isVectorTy())
952  return Cost;
953 
954  std::pair<InstructionCost, MVT> LT1 = TLI->getTypeLegalizationCost(DL, Ty1);
955  // If type legalization involves splitting the vector, we don't want to
956  // double the cost at every step - only the last step.
957  if (LT1.first != 1 || !LT1.second.isVector())
958  return Cost;
959 
960  int ISD = TLI->InstructionOpcodeToISD(Opcode);
961  if (TLI->isOperationExpand(ISD, LT1.second))
962  return Cost;
963 
964  if (Ty2) {
965  std::pair<InstructionCost, MVT> LT2 = TLI->getTypeLegalizationCost(DL, Ty2);
966  if (LT2.first != 1 || !LT2.second.isVector())
967  return Cost;
968  }
969 
970  return Cost * 2;
971 }
972 
974  unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
976  TTI::OperandValueProperties Opd1PropInfo,
978  const Instruction *CxtI) {
979  assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
980  // TODO: Handle more cost kinds.
982  return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
983  Op2Info, Opd1PropInfo,
984  Opd2PropInfo, Args, CxtI);
985 
986  // Fallback to the default implementation.
988  Opcode, Ty, CostKind, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo);
989  return vectorCostAdjustment(Cost, Opcode, Ty, nullptr);
990 }
991 
993  ArrayRef<int> Mask, int Index,
994  Type *SubTp) {
995  // Legalize the type.
996  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
997 
998  // PPC, for both Altivec/VSX, support cheap arbitrary permutations
999  // (at least in the sense that there need only be one non-loop-invariant
1000  // instruction). We need one such shuffle instruction for each actual
1001  // register (this is not true for arbitrary shuffles, but is true for the
1002  // structured types of shuffles covered by TTI::ShuffleKind).
1003  return vectorCostAdjustment(LT.first, Instruction::ShuffleVector, Tp,
1004  nullptr);
1005 }
1006 
1009  const Instruction *I) {
1011  return Opcode == Instruction::PHI ? 0 : 1;
1012  // Branches are assumed to be predicted.
1013  return 0;
1014 }
1015 
1017  Type *Src,
1020  const Instruction *I) {
1021  assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
1022 
1023  InstructionCost Cost =
1024  BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
1025  Cost = vectorCostAdjustment(Cost, Opcode, Dst, Src);
1026  // TODO: Allow non-throughput costs that aren't binary.
1028  return Cost == 0 ? 0 : 1;
1029  return Cost;
1030 }
1031 
1033  Type *CondTy,
1034  CmpInst::Predicate VecPred,
1036  const Instruction *I) {
1037  InstructionCost Cost =
1038  BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
1039  // TODO: Handle other cost kinds.
1041  return Cost;
1042  return vectorCostAdjustment(Cost, Opcode, ValTy, nullptr);
1043 }
1044 
1046  unsigned Index) {
1047  assert(Val->isVectorTy() && "This must be a vector type");
1048 
1049  int ISD = TLI->InstructionOpcodeToISD(Opcode);
1050  assert(ISD && "Invalid opcode");
1051 
1052  InstructionCost Cost = BaseT::getVectorInstrCost(Opcode, Val, Index);
1053  Cost = vectorCostAdjustment(Cost, Opcode, Val, nullptr);
1054 
1055  if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
1056  // Double-precision scalars are already located in index #0 (or #1 if LE).
1057  if (ISD == ISD::EXTRACT_VECTOR_ELT &&
1058  Index == (ST->isLittleEndian() ? 1 : 0))
1059  return 0;
1060 
1061  return Cost;
1062 
1063  } else if (Val->getScalarType()->isIntegerTy() && Index != -1U) {
1064  if (ST->hasP9Altivec()) {
1065  if (ISD == ISD::INSERT_VECTOR_ELT)
1066  // A move-to VSR and a permute/insert. Assume vector operation cost
1067  // for both (cost will be 2x on P9).
1068  return vectorCostAdjustment(2, Opcode, Val, nullptr);
1069 
1070  // It's an extract. Maybe we can do a cheap move-from VSR.
1071  unsigned EltSize = Val->getScalarSizeInBits();
1072  if (EltSize == 64) {
1073  unsigned MfvsrdIndex = ST->isLittleEndian() ? 1 : 0;
1074  if (Index == MfvsrdIndex)
1075  return 1;
1076  } else if (EltSize == 32) {
1077  unsigned MfvsrwzIndex = ST->isLittleEndian() ? 2 : 1;
1078  if (Index == MfvsrwzIndex)
1079  return 1;
1080  }
1081 
1082  // We need a vector extract (or mfvsrld). Assume vector operation cost.
1083  // The cost of the load constant for a vector extract is disregarded
1084  // (invariant, easily schedulable).
1085  return vectorCostAdjustment(1, Opcode, Val, nullptr);
1086 
1087  } else if (ST->hasDirectMove())
1088  // Assume permute has standard cost.
1089  // Assume move-to/move-from VSR have 2x standard cost.
1090  return 3;
1091  }
1092 
1093  // Estimated cost of a load-hit-store delay. This was obtained
1094  // experimentally as a minimum needed to prevent unprofitable
1095  // vectorization for the paq8p benchmark. It may need to be
1096  // raised further if other unprofitable cases remain.
1097  unsigned LHSPenalty = 2;
1098  if (ISD == ISD::INSERT_VECTOR_ELT)
1099  LHSPenalty += 7;
1100 
1101  // Vector element insert/extract with Altivec is very expensive,
1102  // because they require store and reload with the attendant
1103  // processor stall for load-hit-store. Until VSX is available,
1104  // these need to be estimated as very costly.
1105  if (ISD == ISD::EXTRACT_VECTOR_ELT ||
1106  ISD == ISD::INSERT_VECTOR_ELT)
1107  return LHSPenalty + Cost;
1108 
1109  return Cost;
1110 }
1111 
1113  MaybeAlign Alignment,
1114  unsigned AddressSpace,
1116  const Instruction *I) {
1117  if (TLI->getValueType(DL, Src, true) == MVT::Other)
1118  return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
1119  CostKind);
1120  // Legalize the type.
1121  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
1122  assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
1123  "Invalid Opcode");
1124 
1125  InstructionCost Cost =
1126  BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
1127  // TODO: Handle other cost kinds.
1129  return Cost;
1130 
1131  Cost = vectorCostAdjustment(Cost, Opcode, Src, nullptr);
1132 
1133  bool IsAltivecType = ST->hasAltivec() &&
1134  (LT.second == MVT::v16i8 || LT.second == MVT::v8i16 ||
1135  LT.second == MVT::v4i32 || LT.second == MVT::v4f32);
1136  bool IsVSXType = ST->hasVSX() &&
1137  (LT.second == MVT::v2f64 || LT.second == MVT::v2i64);
1138 
1139  // VSX has 32b/64b load instructions. Legalization can handle loading of
1140  // 32b/64b to VSR correctly and cheaply. But BaseT::getMemoryOpCost and
1141  // PPCTargetLowering can't compute the cost appropriately. So here we
1142  // explicitly check this case.
1143  unsigned MemBytes = Src->getPrimitiveSizeInBits();
1144  if (Opcode == Instruction::Load && ST->hasVSX() && IsAltivecType &&
1145  (MemBytes == 64 || (ST->hasP8Vector() && MemBytes == 32)))
1146  return 1;
1147 
1148  // Aligned loads and stores are easy.
1149  unsigned SrcBytes = LT.second.getStoreSize();
1150  if (!SrcBytes || !Alignment || *Alignment >= SrcBytes)
1151  return Cost;
1152 
1153  // If we can use the permutation-based load sequence, then this is also
1154  // relatively cheap (not counting loop-invariant instructions): one load plus
1155  // one permute (the last load in a series has extra cost, but we're
1156  // neglecting that here). Note that on the P7, we could do unaligned loads
1157  // for Altivec types using the VSX instructions, but that's more expensive
1158  // than using the permutation-based load sequence. On the P8, that's no
1159  // longer true.
1160  if (Opcode == Instruction::Load && (!ST->hasP8Vector() && IsAltivecType) &&
1161  *Alignment >= LT.second.getScalarType().getStoreSize())
1162  return Cost + LT.first; // Add the cost of the permutations.
1163 
1164  // For VSX, we can do unaligned loads and stores on Altivec/VSX types. On the
1165  // P7, unaligned vector loads are more expensive than the permutation-based
1166  // load sequence, so that might be used instead, but regardless, the net cost
1167  // is about the same (not counting loop-invariant instructions).
1168  if (IsVSXType || (ST->hasVSX() && IsAltivecType))
1169  return Cost;
1170 
1171  // Newer PPC supports unaligned memory access.
1172  if (TLI->allowsMisalignedMemoryAccesses(LT.second, 0))
1173  return Cost;
1174 
1175  // PPC in general does not support unaligned loads and stores. They'll need
1176  // to be decomposed based on the alignment factor.
1177 
1178  // Add the cost of each scalar load or store.
1179  assert(Alignment);
1180  Cost += LT.first * ((SrcBytes / Alignment->value()) - 1);
1181 
1182  // For a vector type, there is also scalarization overhead (only for
1183  // stores, loads are expanded using the vector-load + permutation sequence,
1184  // which is much less expensive).
1185  if (Src->isVectorTy() && Opcode == Instruction::Store)
1186  for (int i = 0, e = cast<FixedVectorType>(Src)->getNumElements(); i < e;
1187  ++i)
1188  Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i);
1189 
1190  return Cost;
1191 }
1192 
1194  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1195  Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
1196  bool UseMaskForCond, bool UseMaskForGaps) {
1197  if (UseMaskForCond || UseMaskForGaps)
1198  return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
1199  Alignment, AddressSpace, CostKind,
1200  UseMaskForCond, UseMaskForGaps);
1201 
1202  assert(isa<VectorType>(VecTy) &&
1203  "Expect a vector type for interleaved memory op");
1204 
1205  // Legalize the type.
1206  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy);
1207 
1208  // Firstly, the cost of load/store operation.
1209  InstructionCost Cost = getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment),
1211 
1212  // PPC, for both Altivec/VSX, support cheap arbitrary permutations
1213  // (at least in the sense that there need only be one non-loop-invariant
1214  // instruction). For each result vector, we need one shuffle per incoming
1215  // vector (except that the first shuffle can take two incoming vectors
1216  // because it does not need to take itself).
1217  Cost += Factor*(LT.first-1);
1218 
1219  return Cost;
1220 }
1221 
1226 }
1227 
1229  const Function *Caller, const Function *Callee,
1231 
1232  // We need to ensure that argument promotion does not
1233  // attempt to promote pointers to MMA types (__vector_pair
1234  // and __vector_quad) since these types explicitly cannot be
1235  // passed as arguments. Both of these types are larger than
1236  // the 128-bit Altivec vectors and have a scalar size of 1 bit.
1238  return false;
1239 
1240  return llvm::none_of(Args, [](Argument *A) {
1241  auto *EltTy = cast<PointerType>(A->getType())->getElementType();
1242  if (EltTy->isSized())
1243  return (EltTy->isIntOrIntVectorTy(1) &&
1244  EltTy->getPrimitiveSizeInBits() > 128);
1245  return false;
1246  });
1247 }
1248 
1250  LoopInfo *LI, DominatorTree *DT,
1251  AssumptionCache *AC, TargetLibraryInfo *LibInfo) {
1252  // Process nested loops first.
1253  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
1254  if (canSaveCmp(*I, BI, SE, LI, DT, AC, LibInfo))
1255  return false; // Stop search.
1256 
1257  HardwareLoopInfo HWLoopInfo(L);
1258 
1259  if (!HWLoopInfo.canAnalyze(*LI))
1260  return false;
1261 
1262  if (!isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo))
1263  return false;
1264 
1265  if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT))
1266  return false;
1267 
1268  *BI = HWLoopInfo.ExitBranch;
1269  return true;
1270 }
1271 
1274  // PowerPC default behaviour here is "instruction number 1st priority".
1275  // If LsrNoInsnsCost is set, call default implementation.
1276  if (!LsrNoInsnsCost)
1277  return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, C1.NumIVMuls,
1278  C1.NumBaseAdds, C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
1279  std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, C2.NumIVMuls,
1280  C2.NumBaseAdds, C2.ScaleCost, C2.ImmCost, C2.SetupCost);
1281  else
1283 }
1284 
1286  return false;
1287 }
1288 
1290  const PPCTargetMachine &TM = ST->getTargetMachine();
1291  // XCOFF hasn't implemented lowerRelativeReference, disable non-ELF for now.
1292  if (!TM.isELFv2ABI())
1293  return false;
1295 }
1296 
1299  switch (Inst->getIntrinsicID()) {
1300  case Intrinsic::ppc_altivec_lvx:
1301  case Intrinsic::ppc_altivec_lvxl:
1302  case Intrinsic::ppc_altivec_lvebx:
1303  case Intrinsic::ppc_altivec_lvehx:
1304  case Intrinsic::ppc_altivec_lvewx:
1305  case Intrinsic::ppc_vsx_lxvd2x:
1306  case Intrinsic::ppc_vsx_lxvw4x:
1307  case Intrinsic::ppc_vsx_lxvd2x_be:
1308  case Intrinsic::ppc_vsx_lxvw4x_be:
1309  case Intrinsic::ppc_vsx_lxvl:
1310  case Intrinsic::ppc_vsx_lxvll:
1311  case Intrinsic::ppc_vsx_lxvp: {
1312  Info.PtrVal = Inst->getArgOperand(0);
1313  Info.ReadMem = true;
1314  Info.WriteMem = false;
1315  return true;
1316  }
1317  case Intrinsic::ppc_altivec_stvx:
1318  case Intrinsic::ppc_altivec_stvxl:
1319  case Intrinsic::ppc_altivec_stvebx:
1320  case Intrinsic::ppc_altivec_stvehx:
1321  case Intrinsic::ppc_altivec_stvewx:
1322  case Intrinsic::ppc_vsx_stxvd2x:
1323  case Intrinsic::ppc_vsx_stxvw4x:
1324  case Intrinsic::ppc_vsx_stxvd2x_be:
1325  case Intrinsic::ppc_vsx_stxvw4x_be:
1326  case Intrinsic::ppc_vsx_stxvl:
1327  case Intrinsic::ppc_vsx_stxvll:
1328  case Intrinsic::ppc_vsx_stxvp: {
1329  Info.PtrVal = Inst->getArgOperand(1);
1330  Info.ReadMem = false;
1331  Info.WriteMem = true;
1332  return true;
1333  }
1334  default:
1335  break;
1336  }
1337 
1338  return false;
1339 }
i
i
Definition: README.txt:29
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::TargetTransformInfo::PSK_FastHardware
@ PSK_FastHardware
Definition: TargetTransformInfo.h:592
llvm::PPCTTIImpl::VSXRC
@ VSXRC
Definition: PPCTargetTransformInfo.h:94
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
llvm::ISD::STRICT_FSETCC
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:462
llvm::BasicTTIImplBase< PPCTTIImpl >::DL
const DataLayout & DL
Definition: TargetTransformInfoImpl.h:38
llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition: TargetTransformInfo.h:485
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:211
llvm::PPCSubtarget::hasPOPCNTD
POPCNTDKind hasPOPCNTD() const
Definition: PPCSubtarget.h:337
llvm::PPCTTIImpl::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
Definition: PPCTargetTransformInfo.cpp:792
llvm::TargetTransformInfo::LSRCost::NumRegs
unsigned NumRegs
Definition: TargetTransformInfo.h:418
llvm::BasicTTIImplBase< PPCTTIImpl >::shouldBuildRelLookupTables
bool shouldBuildRelLookupTables() const
Definition: BasicTTIImpl.h:436
llvm::ISD::UMULO
@ UMULO
Definition: ISDOpcodes.h:319
llvm::ISD::STRICT_FSQRT
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:398
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::none_of
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1561
llvm::LoopBase::getExitBlocks
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
Definition: LoopInfoImpl.h:62
llvm::PPCTTIImpl::getMaxInterleaveFactor
unsigned getMaxInterleaveFactor(unsigned VF)
Definition: PPCTargetTransformInfo.cpp:914
llvm::PPCTTIImpl::getIntImmCost
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
Definition: PPCTargetTransformInfo.cpp:170
llvm::HardwareLoopInfo::LoopDecrement
Value * LoopDecrement
Definition: TargetTransformInfo.h:103
InstCombiner.h
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:720
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::InstCombiner::getDominatorTree
DominatorTree & getDominatorTree() const
Definition: InstCombiner.h:368
llvm::InlineAsm::ConstraintInfoVector
std::vector< ConstraintInfo > ConstraintInfoVector
Definition: InlineAsm.h:118
ceil
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g ceil
Definition: README-FPStack.txt:54
llvm::ISD::FMINNUM
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:898
llvm::ISD::STRICT_FMAXNUM
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:410
llvm::PPCTTIImpl::getIntImmCostIntrin
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
Definition: PPCTargetTransformInfo.cpp:200
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::TargetTransformInfo::PopcntSupportKind
PopcntSupportKind
Flags indicating the kind of support for population count.
Definition: TargetTransformInfo.h:592
llvm::ISD::STRICT_FMINNUM
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:411
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:52
llvm::TargetTransformInfoImplCRTPBase< PPCTTIImpl >::getUserCost
InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
Definition: TargetTransformInfoImpl.h:929
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:319
llvm::TLSModel::GeneralDynamic
@ GeneralDynamic
Definition: CodeGen.h:43
llvm::PPCTTIImpl::isLSRCostLess
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2)
Definition: PPCTargetTransformInfo.cpp:1272
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::InstCombiner::Builder
BuilderTy & Builder
Definition: InstCombiner.h:56
llvm::CodeMetrics
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:30
llvm::PPCTTIImpl::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: PPCTargetTransformInfo.cpp:1016
llvm::TargetTransformInfoImplBase::isLSRCostLess
bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) const
Definition: TargetTransformInfoImpl.h:211
llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1467
llvm::PPCSubtarget::hasP8Vector
bool hasP8Vector() const
Definition: PPCSubtarget.h:271
llvm::BasicTTIImplBase< PPCTTIImpl >::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
Definition: BasicTTIImpl.h:750
llvm::PPCSubtarget::isLittleEndian
bool isLittleEndian() const
Definition: PPCSubtarget.h:251
llvm::CastInst::Create
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Definition: Instructions.cpp:3038
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:461
llvm::TargetTransformInfo::RGK_Scalar
@ RGK_Scalar
Definition: TargetTransformInfo.h:907
llvm::PPCSubtarget::hasVSX
bool hasVSX() const
Definition: PPCSubtarget.h:269
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
DisablePPCConstHoist
static cl::opt< bool > DisablePPCConstHoist("disable-ppc-constant-hoisting", cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden)
llvm::HardwareLoopInfo::ExitBranch
BranchInst * ExitBranch
Definition: TargetTransformInfo.h:100
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:466
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::TargetTransformInfo::LSRCost::NumIVMuls
unsigned NumIVMuls
Definition: TargetTransformInfo.h:420
llvm::isShiftedMask_32
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:479
llvm::HardwareLoopInfo::isHardwareLoopCandidate
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Definition: TargetTransformInfo.cpp:100
llvm::TargetTransformInfo::UnrollingPreferences::Partial
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Definition: TargetTransformInfo.h:481
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1403
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:535
llvm::getOrEnforceKnownAlignment
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1343
llvm::Optional
Definition: APInt.h:33
llvm::LoopBase::begin
iterator begin() const
Definition: LoopInfo.h:154
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::PPCTTIImpl::isHardwareLoopProfitable
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
Definition: PPCTargetTransformInfo.cpp:714
llvm::PPCSubtarget::getTargetMachine
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:216
llvm::TargetLoweringBase::getTypeLegalizationCost
std::pair< InstructionCost, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
Definition: TargetLoweringBase.cpp:1842
llvm::TargetTransformInfoImplBase::areFunctionArgsABICompatible
bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, SmallPtrSetImpl< Argument * > &Args) const
Definition: TargetTransformInfoImpl.h:691
llvm::PPCTTIImpl::getRegisterClassName
const char * getRegisterClassName(unsigned ClassID) const
Definition: PPCTargetTransformInfo.cpp:866
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
llvm::Intrinsic::not_intrinsic
@ not_intrinsic
Definition: Intrinsics.h:45
llvm::TargetTransformInfoImplBase::getIntImmCost
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
Definition: TargetTransformInfoImpl.h:361
llvm::PPCSubtarget::vectorsUseTwoUnits
bool vectorsUseTwoUnits() const
Definition: PPCSubtarget.h:294
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
llvm::MVT::v2f64
@ v2f64
Definition: MachineValueType.h:172
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::TargetTransformInfo::UnrollingPreferences::AllowExpensiveTripCount
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
Definition: TargetTransformInfo.h:490
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:203
llvm::TargetTransformInfo::LSRCost::Insns
unsigned Insns
TODO: Some of these could be merged.
Definition: TargetTransformInfo.h:417
llvm::PPCTTIImpl::isNumRegsMajorCostOfLSR
bool isNumRegsMajorCostOfLSR()
Definition: PPCTargetTransformInfo.cpp:1285
llvm::CastInst::getDestTy
Type * getDestTy() const
Return the destination type, as a convenience.
Definition: InstrTypes.h:684
F
#define F(x, y, z)
Definition: MD5.cpp:56
KnownBits.h
llvm::LoopBase::block_end
block_iterator block_end() const
Definition: LoopInfo.h:177
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::TargetTransformInfo::LSRCost::AddRecCost
unsigned AddRecCost
Definition: TargetTransformInfo.h:419
floor
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g floor
Definition: README-FPStack.txt:54
llvm::ISD::STRICT_FROUND
@ STRICT_FROUND
Definition: ISDOpcodes.h:414
llvm::Reloc::Model
Model
Definition: CodeGen.h:22
llvm::AArch64CC::LT
@ LT
Definition: AArch64BaseInfo.h:266
llvm::TargetSchedModel::init
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
Definition: TargetSchedule.cpp:63
llvm::PPCTTIImpl::instCombineIntrinsic
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Definition: PPCTargetTransformInfo.cpp:68
llvm::TargetTransformInfo::LSRCost::SetupCost
unsigned SetupCost
Definition: TargetTransformInfo.h:423
llvm::ISD::FFLOOR
@ FFLOOR
Definition: ISDOpcodes.h:885
llvm::PPCTTIImpl::GPRRC
@ GPRRC
Definition: PPCTargetTransformInfo.h:94
CommandLine.h
CodeMetrics.h
TargetLowering.h
llvm::PPC::DIR_PWR10
@ DIR_PWR10
Definition: PPCSubtarget.h:63
llvm::TargetLoweringBase::isOperationLegalOrCustom
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Definition: TargetLowering.h:1114
llvm::BasicTTIImplBase< PPCTTIImpl >::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
Definition: BasicTTIImpl.h:487
llvm::TargetSchedModel::getIssueWidth
unsigned getIssueWidth() const
Maximum number of micro-ops that may be scheduled per cycle.
Definition: TargetSchedule.h:99
llvm::PPCTTIImpl::getShuffleCost
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, ArrayRef< int > Mask, int Index, Type *SubTp)
Definition: PPCTargetTransformInfo.cpp:992
llvm::ISD::STRICT_FRINT
@ STRICT_FRINT
Definition: ISDOpcodes.h:408
llvm::BasicTTIImplBase< PPCTTIImpl >::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:1108
llvm::TargetTransformInfo::ShuffleKind
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Definition: TargetTransformInfo.h:859
llvm::PPCTTIImpl::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned ClassID) const
Definition: PPCTargetTransformInfo.cpp:840
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::TargetTransformInfo::CastContextHint
CastContextHint
Represents a hint about the context in which a cast is used.
Definition: TargetTransformInfo.h:1062
llvm::User
Definition: User.h:44
llvm::LibFunc
LibFunc
Definition: TargetLibraryInfo.h:34
llvm::ISD::STRICT_FNEARBYINT
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:409
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
LsrNoInsnsCost
static cl::opt< bool > LsrNoInsnsCost("ppc-lsr-no-insns-cost", cl::Hidden, cl::init(false), cl::desc("Do not add instruction count to lsr cost model"))
round
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
llvm::isShiftedMask_64
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:485
llvm::PPC::DIR_A2
@ DIR_A2
Definition: PPCSubtarget.h:50
llvm::ISD::FROUND
@ FROUND
Definition: ISDOpcodes.h:883
llvm::LoopBase::end
iterator end() const
Definition: LoopInfo.h:155
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
llvm::ISD::LLROUND
@ LLROUND
Definition: ISDOpcodes.h:887
TargetLibraryInfo.h
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:237
llvm::PPCSubtarget::isISA3_0
bool isISA3_0() const
Definition: PPCSubtarget.h:326
llvm::InlineAsm::isInput
@ isInput
Definition: InlineAsm.h:94
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:109
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::TargetLibraryInfo::getLibFunc
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Definition: TargetLibraryInfo.h:289
llvm::Instruction
Definition: Instruction.h:45
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:153
llvm::TargetTransformInfoImplBase::getIntImmCostInst
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const
Definition: TargetTransformInfoImpl.h:366
llvm::PPCSubtarget::isPPC64
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
Definition: PPCSubtarget.cpp:243
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1453
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
llvm::IRBuilderBase::CreateInsertElement
Value * CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2455
llvm::ISD::FNEARBYINT
@ FNEARBYINT
Definition: ISDOpcodes.h:882
llvm::ISD::FRINT
@ FRINT
Definition: ISDOpcodes.h:881
llvm::LoopBase::getExitingBlocks
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:34
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1771
llvm::PPCTTIImpl::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: PPCTargetTransformInfo.cpp:1112
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:900
llvm::PPCTTIImpl::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: PPCTargetTransformInfo.cpp:808
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::PPCTTIImpl::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: PPCTargetTransformInfo.cpp:1045
llvm::ISD::SMULO
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:318
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::TargetTransformInfo::RGK_FixedWidthVector
@ RGK_FixedWidthVector
Definition: TargetTransformInfo.h:907
llvm::PPCTTIImpl::FPRRC
@ FPRRC
Definition: PPCTargetTransformInfo.h:94
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:153
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::CastInst::getSrcTy
Type * getSrcTy() const
Return the source type, as a convenience.
Definition: InstrTypes.h:682
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::BasicTTIImplBase< PPCTTIImpl >::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1060
llvm::BasicTTIImplBase< PPCTTIImpl >::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:890
llvm::None
const NoneType None
Definition: None.h:23
llvm::LinearPolySize< TypeSize >::getFixed
static TypeSize getFixed(ScalarTy MinVal)
Definition: TypeSize.h:284
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::IntrinsicCostAttributes
Definition: TargetTransformInfo.h:118
llvm::PPCTTIImpl::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: PPCTargetTransformInfo.cpp:1193
llvm::TargetTransformInfo::PSK_Software
@ PSK_Software
Definition: TargetTransformInfo.h:592
llvm::maxnum
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1309
llvm::PPCTTIImpl::getRegisterBitWidth
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
Definition: PPCTargetTransformInfo.cpp:880
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:78
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::LoopBase::block_begin
block_iterator block_begin() const
Definition: LoopInfo.h:176
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:201
llvm::ScalarEvolution::getSmallConstantTripCount
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
Definition: ScalarEvolution.cpp:7179
CacheLineSize
static cl::opt< unsigned > CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64), cl::desc("The loop prefetch cache line size"))
llvm::InlineAsm
Definition: InlineAsm.h:31
llvm::ISD::STRICT_FSETCCS
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:463
llvm::cl::opt< bool >
llvm::LoopBase< BasicBlock, Loop >::block_iterator
ArrayRef< BasicBlock * >::const_iterator block_iterator
Definition: LoopInfo.h:175
llvm::TargetLoweringBase::getMinimumJumpTableEntries
virtual unsigned getMinimumJumpTableEntries() const
Return lower limit for number of blocks in a jump table.
Definition: TargetLoweringBase.cpp:2001
llvm::IRBuilderBase::CreateBitCast
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2117
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:304
llvm::PPCTTIImpl::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Definition: PPCTargetTransformInfo.cpp:1223
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
TargetSchedule.h
llvm::MVT::v16i8
@ v16i8
Definition: MachineValueType.h:80
llvm::isInt< 32 >
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:373
llvm::StringRef::equals_insensitive
LLVM_NODISCARD bool equals_insensitive(StringRef RHS) const
Check for string equality, ignoring case.
Definition: StringRef.h:194
llvm::PPCSubtarget::POPCNTD_Unavailable
@ POPCNTD_Unavailable
Definition: PPCSubtarget.h:74
llvm::PointerType::getUnqual
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:651
llvm::isUInt< 16 >
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:408
llvm::InstCombiner::getAssumptionCache
AssumptionCache & getAssumptionCache() const
Definition: InstCombiner.h:366
llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition: TargetSchedule.h:31
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::MVT::v2i64
@ v2i64
Definition: MachineValueType.h:118
uint64_t
llvm::TLSModel::LocalDynamic
@ LocalDynamic
Definition: CodeGen.h:44
llvm::TargetTransformInfo::LSRCost
Definition: TargetTransformInfo.h:414
llvm::InstCombiner::getDataLayout
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:369
llvm::PPCSubtarget::hasP9Altivec
bool hasP9Altivec() const
Definition: PPCSubtarget.h:275
llvm::PPCTTIImpl::getPopcntSupport
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
Definition: PPCTargetTransformInfo.cpp:59
llvm::IRBuilderBase::getInt32
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:478
llvm::ISD::STRICT_LRINT
@ STRICT_LRINT
Definition: ISDOpcodes.h:419
llvm::TargetLoweringBase::getMaxExpandSizeMemcmp
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
Definition: TargetLowering.h:1618
llvm::DataLayout::isLittleEndian
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:241
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::PPCTTIImpl::getTgtMemIntrinsic
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
Definition: PPCTargetTransformInfo.cpp:1297
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:511
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:428
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::ISD::LRINT
@ LRINT
Definition: ISDOpcodes.h:888
llvm::TargetTransformInfo::OperandValueProperties
OperandValueProperties
Additional properties of an operand's values.
Definition: TargetTransformInfo.h:886
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
Metrics
Machine Trace Metrics
Definition: MachineTraceMetrics.cpp:53
llvm::Type::isHalfTy
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:141
llvm::MVT::v4f32
@ v4f32
Definition: MachineValueType.h:157
llvm::InlineAsm::ConstraintInfo
Definition: InlineAsm.h:120
llvm::ISD::STRICT_LROUND
@ STRICT_LROUND
Definition: ISDOpcodes.h:417
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
llvm::PPCTargetLowering::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
Definition: PPCISelLowering.cpp:16362
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::PPCTTIImpl::shouldBuildRelLookupTables
bool shouldBuildRelLookupTables() const
Definition: PPCTargetTransformInfo.cpp:1289
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::PPCTTIImpl::enableInterleavedAccessVectorization
bool enableInterleavedAccessVectorization()
Definition: PPCTargetTransformInfo.cpp:836
llvm::IRBuilderBase::CreateExtractElement
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2442
llvm::PPC::DIR_PWR7
@ DIR_PWR7
Definition: PPCSubtarget.h:60
llvm::TargetTransformInfo::LSRCost::ScaleCost
unsigned ScaleCost
Definition: TargetTransformInfo.h:424
llvm::BasicTTIImplBase< PPCTTIImpl >::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: BasicTTIImpl.h:1176
llvm::PPC::DIR_440
@ DIR_440
Definition: PPCSubtarget.h:43
llvm::TargetTransformInfo::PSK_SlowHardware
@ PSK_SlowHardware
Definition: TargetTransformInfo.h:592
SmallCTRLoopThreshold
static cl::opt< unsigned > SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden, cl::desc("Loops with a constant trip count smaller than " "this value will not use the count register."))
llvm::ISD::STRICT_LLRINT
@ STRICT_LLRINT
Definition: ISDOpcodes.h:420
llvm::TargetTransformInfo::OperandValueKind
OperandValueKind
Additional information about an operand's possible values.
Definition: TargetTransformInfo.h:878
llvm::PPCTTIImpl::useColdCCForColdCall
bool useColdCCForColdCall(Function &F)
Definition: PPCTargetTransformInfo.cpp:816
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::TargetTransformInfo::MemCmpExpansionOptions
Returns options for expansion of memcmp. IsZeroCmp is.
Definition: TargetTransformInfo.h:761
llvm::PPCSubtarget::hasDirectMove
bool hasDirectMove() const
Definition: PPCSubtarget.h:307
llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition: TargetTransformInfo.h:262
llvm::PPC::DIR_E500mc
@ DIR_E500mc
Definition: PPCSubtarget.h:52
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::EVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:155
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::ISD::STRICT_FTRUNC
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:416
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::PPC::DIR_PWR8
@ DIR_PWR8
Definition: PPCSubtarget.h:61
llvm::PPCTTIImpl::getIntImmCostInst
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
Definition: PPCTargetTransformInfo.cpp:235
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
if
if(llvm_vc STREQUAL "") set(fake_version_inc "$
Definition: CMakeLists.txt:14
llvm::ISD::LLRINT
@ LLRINT
Definition: ISDOpcodes.h:889
EnablePPCColdCC
static cl::opt< bool > EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false), cl::desc("Enable using coldcc calling conv for cold " "internal functions"))
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
trunc
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g trunc
Definition: README-FPStack.txt:63
llvm::PPCTTIImpl::canSaveCmp
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo)
Definition: PPCTargetTransformInfo.cpp:1249
llvm::TargetLoweringBase::InstructionOpcodeToISD
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
Definition: TargetLoweringBase.cpp:1762
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:430
llvm::PPCTTIImpl::getRegisterClassForType
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
Definition: PPCTargetTransformInfo.cpp:851
llvm::MVT::v4i32
@ v4i32
Definition: MachineValueType.h:103
llvm::BasicTTIImplBase< PPCTTIImpl >::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: BasicTTIImpl.h:559
llvm::ifs::IFSSymbolType::Func
@ Func
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:273
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
llvm::PPCTTIImpl::enableAggressiveInterleaving
bool enableAggressiveInterleaving(bool LoopHasReductions)
Definition: PPCTargetTransformInfo.cpp:820
llvm::PPCTTIImpl::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
Definition: PPCTargetTransformInfo.cpp:973
llvm::PPCTTIImpl::areFunctionArgsABICompatible
bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, SmallPtrSetImpl< Argument * > &Args) const
Definition: PPCTargetTransformInfo.cpp:1228
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:206
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
llvm::ISD::FSQRT
@ FSQRT
Definition: ISDOpcodes.h:868
llvm::MCID::Select
@ Select
Definition: MCInstrDesc.h:162
llvm::isInt< 16 >
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:370
llvm::PPCTTIImpl::getUserCost
InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
Definition: PPCTargetTransformInfo.cpp:321
j
return j(j<< 16)
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::minnum
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1298
llvm::ISD::STRICT_FMA
@ STRICT_FMA
Definition: ISDOpcodes.h:392
llvm::ISD::FMAXNUM
@ FMAXNUM
Definition: ISDOpcodes.h:899
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:204
llvm::TargetTransformInfo::LSRCost::NumBaseAdds
unsigned NumBaseAdds
Definition: TargetTransformInfo.h:421
llvm::Type::isFloatTy
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:147
llvm::PPCSubtarget::useSoftFloat
bool useSoftFloat() const
Definition: PPCSubtarget.h:235
llvm::TargetTransformInfoImplBase::getIntImmCostIntrin
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
Definition: TargetTransformInfoImpl.h:373
CostTable.h
llvm::EVT::getScalarType
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:296
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:321
llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:879
llvm::MVT::v8i16
@ v8i16
Definition: MachineValueType.h:92
llvm::TypeSize
Definition: TypeSize.h:417
llvm::PPC::DIR_PWR9
@ DIR_PWR9
Definition: PPCSubtarget.h:62
llvm::ISD::STRICT_FCEIL
@ STRICT_FCEIL
Definition: ISDOpcodes.h:412
llvm::TargetLoweringBase::isOperationExpand
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
Definition: TargetLowering.h:1213
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::LinearPolySize< TypeSize >::getScalable
static TypeSize getScalable(ScalarTy MinVal)
Definition: TypeSize.h:287
llvm::PPCTTIImpl::vectorCostAdjustment
InstructionCost vectorCostAdjustment(InstructionCost Cost, unsigned Opcode, Type *Ty1, Type *Ty2)
Definition: PPCTargetTransformInfo.cpp:948
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:219
llvm::Type::isDoubleTy
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:150
powi
This is blocked on not handling X *X *X powi(X, 3)(see note above). The issue is that we end up getting t
llvm::log2
static double log2(double V)
Definition: AMDGPULibCalls.cpp:842
llvm::Type::isPPC_FP128Ty
bool isPPC_FP128Ty() const
Return true if this is powerpc long double.
Definition: Type.h:159
llvm::TargetTransformInfo::LSRCost::ImmCost
unsigned ImmCost
Definition: TargetTransformInfo.h:422
llvm::TLSModel::Model
Model
Definition: CodeGen.h:42
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:183
llvm::PPCTargetMachine
Common code between 32-bit and 64-bit PowerPC targets.
Definition: PPCTargetMachine.h:25
llvm::InstCombiner
The core instruction combiner logic.
Definition: InstCombiner.h:45
PPCTargetTransformInfo.h
llvm::TargetTransformInfo::RGK_ScalableVector
@ RGK_ScalableVector
Definition: TargetTransformInfo.h:907
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
llvm::HardwareLoopInfo
Attributes of a target dependent hardware loop.
Definition: TargetTransformInfo.h:95
llvm::ISD::STRICT_FFLOOR
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:413
Vector
So we should use XX3Form_Rcr to implement instrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store Vector
Definition: README_P9.txt:497
llvm::PPCTTIImpl::getPrefetchDistance
unsigned getPrefetchDistance() const override
Definition: PPCTargetTransformInfo.cpp:910
llvm::PPCTTIImpl::enableMemCmpExpansion
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
Definition: PPCTargetTransformInfo.cpp:829
memAddrUsesCTR
static bool memAddrUsesCTR(const Value *MemAddr, const PPCTargetMachine &TM, SmallPtrSetImpl< const Value * > &Visited)
Definition: PPCTargetTransformInfo.cpp:341
llvm::PPCSubtarget::hasAltivec
bool hasAltivec() const
Definition: PPCSubtarget.h:265
llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition: TargetTransformInfo.h:907
llvm::LoopBase< BasicBlock, Loop >::iterator
std::vector< Loop * >::const_iterator iterator
Definition: LoopInfo.h:151
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1338
llvm::BasicTTIImplBase< PPCTTIImpl >::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
Definition: BasicTTIImpl.h:1332
llvm::PPCTTIImpl::getCacheLineSize
unsigned getCacheLineSize() const override
Definition: PPCTargetTransformInfo.cpp:893
TargetTransformInfo.h
llvm::PPCTTIImpl::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: PPCTargetTransformInfo.cpp:1032
llvm::PPC::DIR_E5500
@ DIR_E5500
Definition: PPCSubtarget.h:53
llvm::BasicTTIImplBase< PPCTTIImpl >::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1116
llvm::ISD::STRICT_LLROUND
@ STRICT_LLROUND
Definition: ISDOpcodes.h:418
llvm::HardwareLoopInfo::canAnalyze
bool canAnalyze(LoopInfo &LI)
Definition: TargetTransformInfo.cpp:47
llvm::PPC::DIR_PWR_FUTURE
@ DIR_PWR_FUTURE
Definition: PPCSubtarget.h:64
llvm::MemIntrinsicInfo
Information about a load/store intrinsic defined by the target.
Definition: TargetTransformInfo.h:70
llvm::ISD::LROUND
@ LROUND
Definition: ISDOpcodes.h:886
llvm::SmallPtrSetImpl< const Value * >
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1475
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::PPCSubtarget::getCPUDirective
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:195
llvm::TargetLibraryInfo::hasOptimizedCodeGen
bool hasOptimizedCodeGen(LibFunc F) const
Tests if the function is both available and a candidate for optimized code generation.
Definition: TargetLibraryInfo.h:338
llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition: TargetTransformInfo.h:263
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3204
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
BasicTTIImpl.h
llvm::cl::desc
Definition: CommandLine.h:414
llvm::TargetLoweringBase::getValueType
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
Definition: TargetLowering.h:1409
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3060
llvm::ISD::FTRUNC
@ FTRUNC
Definition: ISDOpcodes.h:880
llvm::PPCTTIImpl::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: PPCTargetTransformInfo.cpp:1007
llvm::PPCSubtarget::POPCNTD_Slow
@ POPCNTD_Slow
Definition: PPCSubtarget.h:75
llvm::ISD::INSERT_VECTOR_ELT
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:500
llvm::HardwareLoopInfo::CountType
IntegerType * CountType
Definition: TargetTransformInfo.h:102
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::TargetTransformInfo::TCK_RecipThroughput
@ TCK_RecipThroughput
Reciprocal throughput.
Definition: TargetTransformInfo.h:212
llvm::Directive
Definition: DirectiveEmitter.h:100
llvm::Type::isFP128Ty
bool isFP128Ty() const
Return true if this is 'fp128'.
Definition: Type.h:156
Debug.h
llvm::PPCTTIImpl::VRRC
@ VRRC
Definition: PPCTargetTransformInfo.h:94
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:128
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37