LLVM  14.0.0git
PPCTargetTransformInfo.cpp
Go to the documentation of this file.
1 //===-- PPCTargetTransformInfo.cpp - PPC specific TTI ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
14 #include "llvm/CodeGen/CostTable.h"
17 #include "llvm/IR/IntrinsicsPowerPC.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/KnownBits.h"
23 
24 using namespace llvm;
25 
26 #define DEBUG_TYPE "ppctti"
27 
28 static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
29 cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
30 
31 // This is currently only used for the data prefetch pass
32 static cl::opt<unsigned>
33 CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64),
34  cl::desc("The loop prefetch cache line size"));
35 
36 static cl::opt<bool>
37 EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false),
38  cl::desc("Enable using coldcc calling conv for cold "
39  "internal functions"));
40 
41 static cl::opt<bool>
42 LsrNoInsnsCost("ppc-lsr-no-insns-cost", cl::Hidden, cl::init(false),
43  cl::desc("Do not add instruction count to lsr cost model"));
44 
45 // The latency of mtctr is only justified if there are more than 4
46 // comparisons that will be removed as a result.
47 static cl::opt<unsigned>
48 SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden,
49  cl::desc("Loops with a constant trip count smaller than "
50  "this value will not use the count register."));
51 
52 //===----------------------------------------------------------------------===//
53 //
54 // PPC cost model.
55 //
56 //===----------------------------------------------------------------------===//
57 
59 PPCTTIImpl::getPopcntSupport(unsigned TyWidth) {
60  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
61  if (ST->hasPOPCNTD() != PPCSubtarget::POPCNTD_Unavailable && TyWidth <= 64)
62  return ST->hasPOPCNTD() == PPCSubtarget::POPCNTD_Slow ?
64  return TTI::PSK_Software;
65 }
66 
69  Intrinsic::ID IID = II.getIntrinsicID();
70  switch (IID) {
71  default:
72  break;
73  case Intrinsic::ppc_altivec_lvx:
74  case Intrinsic::ppc_altivec_lvxl:
75  // Turn PPC lvx -> load if the pointer is known aligned.
77  II.getArgOperand(0), Align(16), IC.getDataLayout(), &II,
78  &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
79  Value *Ptr = IC.Builder.CreateBitCast(
81  return new LoadInst(II.getType(), Ptr, "", false, Align(16));
82  }
83  break;
84  case Intrinsic::ppc_vsx_lxvw4x:
85  case Intrinsic::ppc_vsx_lxvd2x: {
86  // Turn PPC VSX loads into normal loads.
87  Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(0),
89  return new LoadInst(II.getType(), Ptr, Twine(""), false, Align(1));
90  }
91  case Intrinsic::ppc_altivec_stvx:
92  case Intrinsic::ppc_altivec_stvxl:
93  // Turn stvx -> store if the pointer is known aligned.
95  II.getArgOperand(1), Align(16), IC.getDataLayout(), &II,
96  &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
97  Type *OpPtrTy = PointerType::getUnqual(II.getArgOperand(0)->getType());
98  Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
99  return new StoreInst(II.getArgOperand(0), Ptr, false, Align(16));
100  }
101  break;
102  case Intrinsic::ppc_vsx_stxvw4x:
103  case Intrinsic::ppc_vsx_stxvd2x: {
104  // Turn PPC VSX stores into normal stores.
105  Type *OpPtrTy = PointerType::getUnqual(II.getArgOperand(0)->getType());
106  Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
107  return new StoreInst(II.getArgOperand(0), Ptr, false, Align(1));
108  }
109  case Intrinsic::ppc_altivec_vperm:
110  // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
111  // Note that ppc_altivec_vperm has a big-endian bias, so when creating
112  // a vectorshuffle for little endian, we must undo the transformation
113  // performed on vec_perm in altivec.h. That is, we must complement
114  // the permutation mask with respect to 31 and reverse the order of
115  // V1 and V2.
116  if (Constant *Mask = dyn_cast<Constant>(II.getArgOperand(2))) {
117  assert(cast<FixedVectorType>(Mask->getType())->getNumElements() == 16 &&
118  "Bad type for intrinsic!");
119 
120  // Check that all of the elements are integer constants or undefs.
121  bool AllEltsOk = true;
122  for (unsigned i = 0; i != 16; ++i) {
123  Constant *Elt = Mask->getAggregateElement(i);
124  if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
125  AllEltsOk = false;
126  break;
127  }
128  }
129 
130  if (AllEltsOk) {
131  // Cast the input vectors to byte vectors.
132  Value *Op0 =
133  IC.Builder.CreateBitCast(II.getArgOperand(0), Mask->getType());
134  Value *Op1 =
135  IC.Builder.CreateBitCast(II.getArgOperand(1), Mask->getType());
136  Value *Result = UndefValue::get(Op0->getType());
137 
138  // Only extract each element once.
139  Value *ExtractedElts[32];
140  memset(ExtractedElts, 0, sizeof(ExtractedElts));
141 
142  for (unsigned i = 0; i != 16; ++i) {
143  if (isa<UndefValue>(Mask->getAggregateElement(i)))
144  continue;
145  unsigned Idx =
146  cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
147  Idx &= 31; // Match the hardware behavior.
148  if (DL.isLittleEndian())
149  Idx = 31 - Idx;
150 
151  if (!ExtractedElts[Idx]) {
152  Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
153  Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
154  ExtractedElts[Idx] = IC.Builder.CreateExtractElement(
155  Idx < 16 ? Op0ToUse : Op1ToUse, IC.Builder.getInt32(Idx & 15));
156  }
157 
158  // Insert this value into the result vector.
159  Result = IC.Builder.CreateInsertElement(Result, ExtractedElts[Idx],
160  IC.Builder.getInt32(i));
161  }
162  return CastInst::Create(Instruction::BitCast, Result, II.getType());
163  }
164  }
165  break;
166  }
167  return None;
168 }
169 
173  return BaseT::getIntImmCost(Imm, Ty, CostKind);
174 
175  assert(Ty->isIntegerTy());
176 
177  unsigned BitSize = Ty->getPrimitiveSizeInBits();
178  if (BitSize == 0)
179  return ~0U;
180 
181  if (Imm == 0)
182  return TTI::TCC_Free;
183 
184  if (Imm.getBitWidth() <= 64) {
185  if (isInt<16>(Imm.getSExtValue()))
186  return TTI::TCC_Basic;
187 
188  if (isInt<32>(Imm.getSExtValue())) {
189  // A constant that can be materialized using lis.
190  if ((Imm.getZExtValue() & 0xFFFF) == 0)
191  return TTI::TCC_Basic;
192 
193  return 2 * TTI::TCC_Basic;
194  }
195  }
196 
197  return 4 * TTI::TCC_Basic;
198 }
199 
201  const APInt &Imm, Type *Ty,
204  return BaseT::getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
205 
206  assert(Ty->isIntegerTy());
207 
208  unsigned BitSize = Ty->getPrimitiveSizeInBits();
209  if (BitSize == 0)
210  return ~0U;
211 
212  switch (IID) {
213  default:
214  return TTI::TCC_Free;
215  case Intrinsic::sadd_with_overflow:
216  case Intrinsic::uadd_with_overflow:
217  case Intrinsic::ssub_with_overflow:
218  case Intrinsic::usub_with_overflow:
219  if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue()))
220  return TTI::TCC_Free;
221  break;
222  case Intrinsic::experimental_stackmap:
223  if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
224  return TTI::TCC_Free;
225  break;
226  case Intrinsic::experimental_patchpoint_void:
227  case Intrinsic::experimental_patchpoint_i64:
228  if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
229  return TTI::TCC_Free;
230  break;
231  }
232  return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind);
233 }
234 
235 InstructionCost PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
236  const APInt &Imm, Type *Ty,
238  Instruction *Inst) {
240  return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind, Inst);
241 
242  assert(Ty->isIntegerTy());
243 
244  unsigned BitSize = Ty->getPrimitiveSizeInBits();
245  if (BitSize == 0)
246  return ~0U;
247 
248  unsigned ImmIdx = ~0U;
249  bool ShiftedFree = false, RunFree = false, UnsignedFree = false,
250  ZeroFree = false;
251  switch (Opcode) {
252  default:
253  return TTI::TCC_Free;
254  case Instruction::GetElementPtr:
255  // Always hoist the base address of a GetElementPtr. This prevents the
256  // creation of new constants for every base constant that gets constant
257  // folded with the offset.
258  if (Idx == 0)
259  return 2 * TTI::TCC_Basic;
260  return TTI::TCC_Free;
261  case Instruction::And:
262  RunFree = true; // (for the rotate-and-mask instructions)
264  case Instruction::Add:
265  case Instruction::Or:
266  case Instruction::Xor:
267  ShiftedFree = true;
269  case Instruction::Sub:
270  case Instruction::Mul:
271  case Instruction::Shl:
272  case Instruction::LShr:
273  case Instruction::AShr:
274  ImmIdx = 1;
275  break;
276  case Instruction::ICmp:
277  UnsignedFree = true;
278  ImmIdx = 1;
279  // Zero comparisons can use record-form instructions.
281  case Instruction::Select:
282  ZeroFree = true;
283  break;
284  case Instruction::PHI:
285  case Instruction::Call:
286  case Instruction::Ret:
287  case Instruction::Load:
288  case Instruction::Store:
289  break;
290  }
291 
292  if (ZeroFree && Imm == 0)
293  return TTI::TCC_Free;
294 
295  if (Idx == ImmIdx && Imm.getBitWidth() <= 64) {
296  if (isInt<16>(Imm.getSExtValue()))
297  return TTI::TCC_Free;
298 
299  if (RunFree) {
300  if (Imm.getBitWidth() <= 32 &&
301  (isShiftedMask_32(Imm.getZExtValue()) ||
303  return TTI::TCC_Free;
304 
305  if (ST->isPPC64() &&
306  (isShiftedMask_64(Imm.getZExtValue()) ||
308  return TTI::TCC_Free;
309  }
310 
311  if (UnsignedFree && isUInt<16>(Imm.getZExtValue()))
312  return TTI::TCC_Free;
313 
314  if (ShiftedFree && (Imm.getZExtValue() & 0xFFFF) == 0)
315  return TTI::TCC_Free;
316  }
317 
318  return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind);
319 }
320 
321 // Check if the current Type is an MMA vector type. Valid MMA types are
322 // v256i1 and v512i1 respectively.
323 static bool isMMAType(Type *Ty) {
324  return Ty->isVectorTy() && (Ty->getScalarSizeInBits() == 1) &&
325  (Ty->getPrimitiveSizeInBits() > 128);
326 }
327 
331  // We already implement getCastInstrCost and getMemoryOpCost where we perform
332  // the vector adjustment there.
333  if (isa<CastInst>(U) || isa<LoadInst>(U) || isa<StoreInst>(U))
335 
336  if (U->getType()->isVectorTy()) {
337  // Instructions that need to be split should cost more.
338  std::pair<InstructionCost, MVT> LT =
339  TLI->getTypeLegalizationCost(DL, U->getType());
340  return LT.first * BaseT::getUserCost(U, Operands, CostKind);
341  }
342 
344 }
345 
346 // Determining the address of a TLS variable results in a function call in
347 // certain TLS models.
348 static bool memAddrUsesCTR(const Value *MemAddr, const PPCTargetMachine &TM,
350  // No need to traverse again if we already checked this operand.
351  if (!Visited.insert(MemAddr).second)
352  return false;
353  const auto *GV = dyn_cast<GlobalValue>(MemAddr);
354  if (!GV) {
355  // Recurse to check for constants that refer to TLS global variables.
356  if (const auto *CV = dyn_cast<Constant>(MemAddr))
357  for (const auto &CO : CV->operands())
358  if (memAddrUsesCTR(CO, TM, Visited))
359  return true;
360  return false;
361  }
362 
363  if (!GV->isThreadLocal())
364  return false;
365  TLSModel::Model Model = TM.getTLSModel(GV);
367 }
368 
369 bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
371  const PPCTargetMachine &TM = ST->getTargetMachine();
372 
373  // Loop through the inline asm constraints and look for something that
374  // clobbers ctr.
375  auto asmClobbersCTR = [](InlineAsm *IA) {
376  InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
377  for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
378  InlineAsm::ConstraintInfo &C = CIV[i];
379  if (C.Type != InlineAsm::isInput)
380  for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
381  if (StringRef(C.Codes[j]).equals_insensitive("{ctr}"))
382  return true;
383  }
384  return false;
385  };
386 
387  auto isLargeIntegerTy = [](bool Is32Bit, Type *Ty) {
388  if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
389  return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
390 
391  return false;
392  };
393 
394  auto supportedHalfPrecisionOp = [](Instruction *Inst) {
395  switch (Inst->getOpcode()) {
396  default:
397  return false;
398  case Instruction::FPTrunc:
399  case Instruction::FPExt:
400  case Instruction::Load:
401  case Instruction::Store:
402  case Instruction::FPToUI:
403  case Instruction::UIToFP:
404  case Instruction::FPToSI:
405  case Instruction::SIToFP:
406  return true;
407  }
408  };
409 
410  for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
411  J != JE; ++J) {
412  // There are no direct operations on half precision so assume that
413  // anything with that type requires a call except for a few select
414  // operations with Power9.
415  if (Instruction *CurrInst = dyn_cast<Instruction>(J)) {
416  for (const auto &Op : CurrInst->operands()) {
417  if (Op->getType()->getScalarType()->isHalfTy() ||
418  CurrInst->getType()->getScalarType()->isHalfTy())
419  return !(ST->isISA3_0() && supportedHalfPrecisionOp(CurrInst));
420  }
421  }
422  if (CallInst *CI = dyn_cast<CallInst>(J)) {
423  // Inline ASM is okay, unless it clobbers the ctr register.
424  if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand())) {
425  if (asmClobbersCTR(IA))
426  return true;
427  continue;
428  }
429 
430  if (Function *F = CI->getCalledFunction()) {
431  // Most intrinsics don't become function calls, but some might.
432  // sin, cos, exp and log are always calls.
433  unsigned Opcode = 0;
434  if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
435  switch (F->getIntrinsicID()) {
436  default: continue;
437  // If we have a call to loop_decrement or set_loop_iterations,
438  // we're definitely using CTR.
439  case Intrinsic::set_loop_iterations:
440  case Intrinsic::loop_decrement:
441  return true;
442 
443  // Binary operations on 128-bit value will use CTR.
444  case Intrinsic::experimental_constrained_fadd:
445  case Intrinsic::experimental_constrained_fsub:
446  case Intrinsic::experimental_constrained_fmul:
447  case Intrinsic::experimental_constrained_fdiv:
448  case Intrinsic::experimental_constrained_frem:
449  if (F->getType()->getScalarType()->isFP128Ty() ||
450  F->getType()->getScalarType()->isPPC_FP128Ty())
451  return true;
452  break;
453 
454  case Intrinsic::experimental_constrained_fptosi:
455  case Intrinsic::experimental_constrained_fptoui:
456  case Intrinsic::experimental_constrained_sitofp:
457  case Intrinsic::experimental_constrained_uitofp: {
458  Type *SrcType = CI->getArgOperand(0)->getType()->getScalarType();
459  Type *DstType = CI->getType()->getScalarType();
460  if (SrcType->isPPC_FP128Ty() || DstType->isPPC_FP128Ty() ||
461  isLargeIntegerTy(!TM.isPPC64(), SrcType) ||
462  isLargeIntegerTy(!TM.isPPC64(), DstType))
463  return true;
464  break;
465  }
466 
467  // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
468  // because, although it does clobber the counter register, the
469  // control can't then return to inside the loop unless there is also
470  // an eh_sjlj_setjmp.
471  case Intrinsic::eh_sjlj_setjmp:
472 
473  case Intrinsic::memcpy:
474  case Intrinsic::memmove:
475  case Intrinsic::memset:
476  case Intrinsic::powi:
477  case Intrinsic::log:
478  case Intrinsic::log2:
479  case Intrinsic::log10:
480  case Intrinsic::exp:
481  case Intrinsic::exp2:
482  case Intrinsic::pow:
483  case Intrinsic::sin:
484  case Intrinsic::cos:
485  case Intrinsic::experimental_constrained_powi:
486  case Intrinsic::experimental_constrained_log:
487  case Intrinsic::experimental_constrained_log2:
488  case Intrinsic::experimental_constrained_log10:
489  case Intrinsic::experimental_constrained_exp:
490  case Intrinsic::experimental_constrained_exp2:
491  case Intrinsic::experimental_constrained_pow:
492  case Intrinsic::experimental_constrained_sin:
493  case Intrinsic::experimental_constrained_cos:
494  return true;
495  // There is no corresponding FMA instruction for PPC double double.
496  // Thus, we need to disable CTR loop generation for this type.
497  case Intrinsic::fmuladd:
498  case Intrinsic::copysign:
499  if (CI->getArgOperand(0)->getType()->getScalarType()->
500  isPPC_FP128Ty())
501  return true;
502  else
503  continue; // ISD::FCOPYSIGN is never a library call.
504  case Intrinsic::fma: Opcode = ISD::FMA; break;
505  case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
506  case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
507  case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
508  case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
509  case Intrinsic::rint: Opcode = ISD::FRINT; break;
510  case Intrinsic::lrint: Opcode = ISD::LRINT; break;
511  case Intrinsic::llrint: Opcode = ISD::LLRINT; break;
512  case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
513  case Intrinsic::round: Opcode = ISD::FROUND; break;
514  case Intrinsic::lround: Opcode = ISD::LROUND; break;
515  case Intrinsic::llround: Opcode = ISD::LLROUND; break;
516  case Intrinsic::minnum: Opcode = ISD::FMINNUM; break;
517  case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break;
518  case Intrinsic::experimental_constrained_fcmp:
519  Opcode = ISD::STRICT_FSETCC;
520  break;
521  case Intrinsic::experimental_constrained_fcmps:
522  Opcode = ISD::STRICT_FSETCCS;
523  break;
524  case Intrinsic::experimental_constrained_fma:
525  Opcode = ISD::STRICT_FMA;
526  break;
527  case Intrinsic::experimental_constrained_sqrt:
528  Opcode = ISD::STRICT_FSQRT;
529  break;
530  case Intrinsic::experimental_constrained_floor:
531  Opcode = ISD::STRICT_FFLOOR;
532  break;
533  case Intrinsic::experimental_constrained_ceil:
534  Opcode = ISD::STRICT_FCEIL;
535  break;
536  case Intrinsic::experimental_constrained_trunc:
537  Opcode = ISD::STRICT_FTRUNC;
538  break;
539  case Intrinsic::experimental_constrained_rint:
540  Opcode = ISD::STRICT_FRINT;
541  break;
542  case Intrinsic::experimental_constrained_lrint:
543  Opcode = ISD::STRICT_LRINT;
544  break;
545  case Intrinsic::experimental_constrained_llrint:
546  Opcode = ISD::STRICT_LLRINT;
547  break;
548  case Intrinsic::experimental_constrained_nearbyint:
549  Opcode = ISD::STRICT_FNEARBYINT;
550  break;
551  case Intrinsic::experimental_constrained_round:
552  Opcode = ISD::STRICT_FROUND;
553  break;
554  case Intrinsic::experimental_constrained_lround:
555  Opcode = ISD::STRICT_LROUND;
556  break;
557  case Intrinsic::experimental_constrained_llround:
558  Opcode = ISD::STRICT_LLROUND;
559  break;
560  case Intrinsic::experimental_constrained_minnum:
561  Opcode = ISD::STRICT_FMINNUM;
562  break;
563  case Intrinsic::experimental_constrained_maxnum:
564  Opcode = ISD::STRICT_FMAXNUM;
565  break;
566  case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break;
567  case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break;
568  }
569  }
570 
571  // PowerPC does not use [US]DIVREM or other library calls for
572  // operations on regular types which are not otherwise library calls
573  // (i.e. soft float or atomics). If adapting for targets that do,
574  // additional care is required here.
575 
576  LibFunc Func;
577  if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
578  LibInfo->getLibFunc(F->getName(), Func) &&
579  LibInfo->hasOptimizedCodeGen(Func)) {
580  // Non-read-only functions are never treated as intrinsics.
581  if (!CI->onlyReadsMemory())
582  return true;
583 
584  // Conversion happens only for FP calls.
585  if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
586  return true;
587 
588  switch (Func) {
589  default: return true;
590  case LibFunc_copysign:
591  case LibFunc_copysignf:
592  continue; // ISD::FCOPYSIGN is never a library call.
593  case LibFunc_copysignl:
594  return true;
595  case LibFunc_fabs:
596  case LibFunc_fabsf:
597  case LibFunc_fabsl:
598  continue; // ISD::FABS is never a library call.
599  case LibFunc_sqrt:
600  case LibFunc_sqrtf:
601  case LibFunc_sqrtl:
602  Opcode = ISD::FSQRT; break;
603  case LibFunc_floor:
604  case LibFunc_floorf:
605  case LibFunc_floorl:
606  Opcode = ISD::FFLOOR; break;
607  case LibFunc_nearbyint:
608  case LibFunc_nearbyintf:
609  case LibFunc_nearbyintl:
610  Opcode = ISD::FNEARBYINT; break;
611  case LibFunc_ceil:
612  case LibFunc_ceilf:
613  case LibFunc_ceill:
614  Opcode = ISD::FCEIL; break;
615  case LibFunc_rint:
616  case LibFunc_rintf:
617  case LibFunc_rintl:
618  Opcode = ISD::FRINT; break;
619  case LibFunc_round:
620  case LibFunc_roundf:
621  case LibFunc_roundl:
622  Opcode = ISD::FROUND; break;
623  case LibFunc_trunc:
624  case LibFunc_truncf:
625  case LibFunc_truncl:
626  Opcode = ISD::FTRUNC; break;
627  case LibFunc_fmin:
628  case LibFunc_fminf:
629  case LibFunc_fminl:
630  Opcode = ISD::FMINNUM; break;
631  case LibFunc_fmax:
632  case LibFunc_fmaxf:
633  case LibFunc_fmaxl:
634  Opcode = ISD::FMAXNUM; break;
635  }
636  }
637 
638  if (Opcode) {
639  EVT EVTy =
640  TLI->getValueType(DL, CI->getArgOperand(0)->getType(), true);
641 
642  if (EVTy == MVT::Other)
643  return true;
644 
645  if (TLI->isOperationLegalOrCustom(Opcode, EVTy))
646  continue;
647  else if (EVTy.isVector() &&
648  TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType()))
649  continue;
650 
651  return true;
652  }
653  }
654 
655  return true;
656  } else if (isa<BinaryOperator>(J) &&
657  (J->getType()->getScalarType()->isFP128Ty() ||
658  J->getType()->getScalarType()->isPPC_FP128Ty())) {
659  // Most operations on f128 or ppc_f128 values become calls.
660  return true;
661  } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
662  isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
663  CastInst *CI = cast<CastInst>(J);
664  if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
665  CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
666  isLargeIntegerTy(!TM.isPPC64(), CI->getSrcTy()->getScalarType()) ||
667  isLargeIntegerTy(!TM.isPPC64(), CI->getDestTy()->getScalarType()))
668  return true;
669  } else if (isLargeIntegerTy(!TM.isPPC64(),
670  J->getType()->getScalarType()) &&
671  (J->getOpcode() == Instruction::UDiv ||
672  J->getOpcode() == Instruction::SDiv ||
673  J->getOpcode() == Instruction::URem ||
674  J->getOpcode() == Instruction::SRem)) {
675  return true;
676  } else if (!TM.isPPC64() &&
677  isLargeIntegerTy(false, J->getType()->getScalarType()) &&
678  (J->getOpcode() == Instruction::Shl ||
679  J->getOpcode() == Instruction::AShr ||
680  J->getOpcode() == Instruction::LShr)) {
681  // Only on PPC32, for 128-bit integers (specifically not 64-bit
682  // integers), these might be runtime calls.
683  return true;
684  } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
685  // On PowerPC, indirect jumps use the counter register.
686  return true;
687  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
688  if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
689  return true;
690  }
691 
692  // FREM is always a call.
693  if (J->getOpcode() == Instruction::FRem)
694  return true;
695 
696  if (ST->useSoftFloat()) {
697  switch(J->getOpcode()) {
698  case Instruction::FAdd:
699  case Instruction::FSub:
700  case Instruction::FMul:
701  case Instruction::FDiv:
702  case Instruction::FPTrunc:
703  case Instruction::FPExt:
704  case Instruction::FPToUI:
705  case Instruction::FPToSI:
706  case Instruction::UIToFP:
707  case Instruction::SIToFP:
708  case Instruction::FCmp:
709  return true;
710  }
711  }
712 
713  for (Value *Operand : J->operands())
714  if (memAddrUsesCTR(Operand, TM, Visited))
715  return true;
716  }
717 
718  return false;
719 }
720 
722  AssumptionCache &AC,
723  TargetLibraryInfo *LibInfo,
724  HardwareLoopInfo &HWLoopInfo) {
725  const PPCTargetMachine &TM = ST->getTargetMachine();
726  TargetSchedModel SchedModel;
727  SchedModel.init(ST);
728 
729  // Do not convert small short loops to CTR loop.
730  unsigned ConstTripCount = SE.getSmallConstantTripCount(L);
731  if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {
733  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
735  for (BasicBlock *BB : L->blocks())
736  Metrics.analyzeBasicBlock(BB, *this, EphValues);
737  // 6 is an approximate latency for the mtctr instruction.
738  if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth()))
739  return false;
740  }
741 
742  // We don't want to spill/restore the counter register, and so we don't
743  // want to use the counter register if the loop contains calls.
745  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
746  I != IE; ++I)
747  if (mightUseCTR(*I, LibInfo, Visited))
748  return false;
749 
750  SmallVector<BasicBlock*, 4> ExitingBlocks;
751  L->getExitingBlocks(ExitingBlocks);
752 
753  // If there is an exit edge known to be frequently taken,
754  // we should not transform this loop.
755  for (auto &BB : ExitingBlocks) {
756  Instruction *TI = BB->getTerminator();
757  if (!TI) continue;
758 
759  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
760  uint64_t TrueWeight = 0, FalseWeight = 0;
761  if (!BI->isConditional() ||
762  !BI->extractProfMetadata(TrueWeight, FalseWeight))
763  continue;
764 
765  // If the exit path is more frequent than the loop path,
766  // we return here without further analysis for this loop.
767  bool TrueIsExit = !L->contains(BI->getSuccessor(0));
768  if (( TrueIsExit && FalseWeight < TrueWeight) ||
769  (!TrueIsExit && FalseWeight > TrueWeight))
770  return false;
771  }
772  }
773 
774  // If an exit block has a PHI that accesses a TLS variable as one of the
775  // incoming values from the loop, we cannot produce a CTR loop because the
776  // address for that value will be computed in the loop.
777  SmallVector<BasicBlock *, 4> ExitBlocks;
778  L->getExitBlocks(ExitBlocks);
779  for (auto &BB : ExitBlocks) {
780  for (auto &PHI : BB->phis()) {
781  for (int Idx = 0, EndIdx = PHI.getNumIncomingValues(); Idx < EndIdx;
782  Idx++) {
783  const BasicBlock *IncomingBB = PHI.getIncomingBlock(Idx);
784  const Value *IncomingValue = PHI.getIncomingValue(Idx);
785  if (L->contains(IncomingBB) &&
786  memAddrUsesCTR(IncomingValue, TM, Visited))
787  return false;
788  }
789  }
790  }
791 
792  LLVMContext &C = L->getHeader()->getContext();
793  HWLoopInfo.CountType = TM.isPPC64() ?
795  HWLoopInfo.LoopDecrement = ConstantInt::get(HWLoopInfo.CountType, 1);
796  return true;
797 }
798 
802  if (ST->getCPUDirective() == PPC::DIR_A2) {
803  // The A2 is in-order with a deep pipeline, and concatenation unrolling
804  // helps expose latency-hiding opportunities to the instruction scheduler.
805  UP.Partial = UP.Runtime = true;
806 
807  // We unroll a lot on the A2 (hundreds of instructions), and the benefits
808  // often outweigh the cost of a division to compute the trip count.
809  UP.AllowExpensiveTripCount = true;
810  }
811 
812  BaseT::getUnrollingPreferences(L, SE, UP, ORE);
813 }
814 
817  BaseT::getPeelingPreferences(L, SE, PP);
818 }
819 // This function returns true to allow using coldcc calling convention.
820 // Returning true results in coldcc being used for functions which are cold at
821 // all call sites when the callers of the functions are not calling any other
822 // non coldcc functions.
824  return EnablePPCColdCC;
825 }
826 
827 bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
828  // On the A2, always unroll aggressively.
829  if (ST->getCPUDirective() == PPC::DIR_A2)
830  return true;
831 
832  return LoopHasReductions;
833 }
834 
836 PPCTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
838  Options.LoadSizes = {8, 4, 2, 1};
839  Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
840  return Options;
841 }
842 
844  return true;
845 }
846 
847 unsigned PPCTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
848  assert(ClassID == GPRRC || ClassID == FPRRC ||
849  ClassID == VRRC || ClassID == VSXRC);
850  if (ST->hasVSX()) {
851  assert(ClassID == GPRRC || ClassID == VSXRC || ClassID == VRRC);
852  return ClassID == VSXRC ? 64 : 32;
853  }
854  assert(ClassID == GPRRC || ClassID == FPRRC || ClassID == VRRC);
855  return 32;
856 }
857 
858 unsigned PPCTTIImpl::getRegisterClassForType(bool Vector, Type *Ty) const {
859  if (Vector)
860  return ST->hasVSX() ? VSXRC : VRRC;
861  else if (Ty && (Ty->getScalarType()->isFloatTy() ||
862  Ty->getScalarType()->isDoubleTy()))
863  return ST->hasVSX() ? VSXRC : FPRRC;
864  else if (Ty && (Ty->getScalarType()->isFP128Ty() ||
865  Ty->getScalarType()->isPPC_FP128Ty()))
866  return VRRC;
867  else if (Ty && Ty->getScalarType()->isHalfTy())
868  return VSXRC;
869  else
870  return GPRRC;
871 }
872 
873 const char* PPCTTIImpl::getRegisterClassName(unsigned ClassID) const {
874 
875  switch (ClassID) {
876  default:
877  llvm_unreachable("unknown register class");
878  return "PPC::unknown register class";
879  case GPRRC: return "PPC::GPRRC";
880  case FPRRC: return "PPC::FPRRC";
881  case VRRC: return "PPC::VRRC";
882  case VSXRC: return "PPC::VSXRC";
883  }
884 }
885 
886 TypeSize
888  switch (K) {
890  return TypeSize::getFixed(ST->isPPC64() ? 64 : 32);
892  return TypeSize::getFixed(ST->hasAltivec() ? 128 : 0);
894  return TypeSize::getScalable(0);
895  }
896 
897  llvm_unreachable("Unsupported register kind");
898 }
899 
901  // Check first if the user specified a custom line size.
902  if (CacheLineSize.getNumOccurrences() > 0)
903  return CacheLineSize;
904 
905  // Starting with P7 we have a cache line size of 128.
906  unsigned Directive = ST->getCPUDirective();
907  // Assume that Future CPU has the same cache line size as the others.
911  return 128;
912 
913  // On other processors return a default of 64 bytes.
914  return 64;
915 }
916 
918  return 300;
919 }
920 
921 unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
922  unsigned Directive = ST->getCPUDirective();
923  // The 440 has no SIMD support, but floating-point instructions
924  // have a 5-cycle latency, so unroll by 5x for latency hiding.
925  if (Directive == PPC::DIR_440)
926  return 5;
927 
928  // The A2 has no SIMD support, but floating-point instructions
929  // have a 6-cycle latency, so unroll by 6x for latency hiding.
930  if (Directive == PPC::DIR_A2)
931  return 6;
932 
933  // FIXME: For lack of any better information, do no harm...
935  return 1;
936 
937  // For P7 and P8, floating-point instructions have a 6-cycle latency and
938  // there are two execution units, so unroll by 12x for latency hiding.
939  // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
940  // FIXME: the same for P10 as previous gen until POWER10 scheduling is ready
941  // Assume that future is the same as the others.
945  return 12;
946 
947  // For most things, modern systems have two execution units (and
948  // out-of-order execution).
949  return 2;
950 }
951 
952 // Returns a cost adjustment factor to adjust the cost of vector instructions
953 // on targets which there is overlap between the vector and scalar units,
954 // thereby reducing the overall throughput of vector code wrt. scalar code.
955 // An invalid instruction cost is returned if the type is an MMA vector type.
957  Type *Ty1, Type *Ty2) {
958  // If the vector type is of an MMA type (v256i1, v512i1), an invalid
959  // instruction cost is returned. This is to signify to other cost computing
960  // functions to return the maximum instruction cost in order to prevent any
961  // opportunities for the optimizer to produce MMA types within the IR.
962  if (isMMAType(Ty1))
964 
965  if (!ST->vectorsUseTwoUnits() || !Ty1->isVectorTy())
966  return InstructionCost(1);
967 
968  std::pair<InstructionCost, MVT> LT1 = TLI->getTypeLegalizationCost(DL, Ty1);
969  // If type legalization involves splitting the vector, we don't want to
970  // double the cost at every step - only the last step.
971  if (LT1.first != 1 || !LT1.second.isVector())
972  return InstructionCost(1);
973 
974  int ISD = TLI->InstructionOpcodeToISD(Opcode);
975  if (TLI->isOperationExpand(ISD, LT1.second))
976  return InstructionCost(1);
977 
978  if (Ty2) {
979  std::pair<InstructionCost, MVT> LT2 = TLI->getTypeLegalizationCost(DL, Ty2);
980  if (LT2.first != 1 || !LT2.second.isVector())
981  return InstructionCost(1);
982  }
983 
984  return InstructionCost(2);
985 }
986 
988  unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
990  TTI::OperandValueProperties Opd1PropInfo,
992  const Instruction *CxtI) {
993  assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
994 
995  InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Ty, nullptr);
996  if (!CostFactor.isValid())
997  return InstructionCost::getMax();
998 
999  // TODO: Handle more cost kinds.
1001  return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
1002  Op2Info, Opd1PropInfo,
1003  Opd2PropInfo, Args, CxtI);
1004 
1005  // Fallback to the default implementation.
1007  Opcode, Ty, CostKind, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo);
1008  return Cost * CostFactor;
1009 }
1010 
1012  ArrayRef<int> Mask, int Index,
1013  Type *SubTp) {
1014 
1015  InstructionCost CostFactor =
1016  vectorCostAdjustmentFactor(Instruction::ShuffleVector, Tp, nullptr);
1017  if (!CostFactor.isValid())
1018  return InstructionCost::getMax();
1019 
1020  // Legalize the type.
1021  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
1022 
1023  // PPC, for both Altivec/VSX, support cheap arbitrary permutations
1024  // (at least in the sense that there need only be one non-loop-invariant
1025  // instruction). We need one such shuffle instruction for each actual
1026  // register (this is not true for arbitrary shuffles, but is true for the
1027  // structured types of shuffles covered by TTI::ShuffleKind).
1028  return LT.first * CostFactor;
1029 }
1030 
1033  const Instruction *I) {
1035  return Opcode == Instruction::PHI ? 0 : 1;
1036  // Branches are assumed to be predicted.
1037  return 0;
1038 }
1039 
1041  Type *Src,
1044  const Instruction *I) {
1045  assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
1046 
1047  InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Dst, Src);
1048  if (!CostFactor.isValid())
1049  return InstructionCost::getMax();
1050 
1051  InstructionCost Cost =
1052  BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
1053  Cost *= CostFactor;
1054  // TODO: Allow non-throughput costs that aren't binary.
1056  return Cost == 0 ? 0 : 1;
1057  return Cost;
1058 }
1059 
1061  Type *CondTy,
1062  CmpInst::Predicate VecPred,
1064  const Instruction *I) {
1065  InstructionCost CostFactor =
1066  vectorCostAdjustmentFactor(Opcode, ValTy, nullptr);
1067  if (!CostFactor.isValid())
1068  return InstructionCost::getMax();
1069 
1070  InstructionCost Cost =
1071  BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
1072  // TODO: Handle other cost kinds.
1074  return Cost;
1075  return Cost * CostFactor;
1076 }
1077 
1079  unsigned Index) {
1080  assert(Val->isVectorTy() && "This must be a vector type");
1081 
1082  int ISD = TLI->InstructionOpcodeToISD(Opcode);
1083  assert(ISD && "Invalid opcode");
1084 
1085  InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Val, nullptr);
1086  if (!CostFactor.isValid())
1087  return InstructionCost::getMax();
1088 
1089  InstructionCost Cost = BaseT::getVectorInstrCost(Opcode, Val, Index);
1090  Cost *= CostFactor;
1091 
1092  if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
1093  // Double-precision scalars are already located in index #0 (or #1 if LE).
1094  if (ISD == ISD::EXTRACT_VECTOR_ELT &&
1095  Index == (ST->isLittleEndian() ? 1 : 0))
1096  return 0;
1097 
1098  return Cost;
1099 
1100  } else if (Val->getScalarType()->isIntegerTy() && Index != -1U) {
1101  if (ST->hasP9Altivec()) {
1102  if (ISD == ISD::INSERT_VECTOR_ELT)
1103  // A move-to VSR and a permute/insert. Assume vector operation cost
1104  // for both (cost will be 2x on P9).
1105  return 2 * CostFactor;
1106 
1107  // It's an extract. Maybe we can do a cheap move-from VSR.
1108  unsigned EltSize = Val->getScalarSizeInBits();
1109  if (EltSize == 64) {
1110  unsigned MfvsrdIndex = ST->isLittleEndian() ? 1 : 0;
1111  if (Index == MfvsrdIndex)
1112  return 1;
1113  } else if (EltSize == 32) {
1114  unsigned MfvsrwzIndex = ST->isLittleEndian() ? 2 : 1;
1115  if (Index == MfvsrwzIndex)
1116  return 1;
1117  }
1118 
1119  // We need a vector extract (or mfvsrld). Assume vector operation cost.
1120  // The cost of the load constant for a vector extract is disregarded
1121  // (invariant, easily schedulable).
1122  return CostFactor;
1123 
1124  } else if (ST->hasDirectMove())
1125  // Assume permute has standard cost.
1126  // Assume move-to/move-from VSR have 2x standard cost.
1127  return 3;
1128  }
1129 
1130  // Estimated cost of a load-hit-store delay. This was obtained
1131  // experimentally as a minimum needed to prevent unprofitable
1132  // vectorization for the paq8p benchmark. It may need to be
1133  // raised further if other unprofitable cases remain.
1134  unsigned LHSPenalty = 2;
1135  if (ISD == ISD::INSERT_VECTOR_ELT)
1136  LHSPenalty += 7;
1137 
1138  // Vector element insert/extract with Altivec is very expensive,
1139  // because they require store and reload with the attendant
1140  // processor stall for load-hit-store. Until VSX is available,
1141  // these need to be estimated as very costly.
1142  if (ISD == ISD::EXTRACT_VECTOR_ELT ||
1143  ISD == ISD::INSERT_VECTOR_ELT)
1144  return LHSPenalty + Cost;
1145 
1146  return Cost;
1147 }
1148 
1150  MaybeAlign Alignment,
1151  unsigned AddressSpace,
1153  const Instruction *I) {
1154 
1155  InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Src, nullptr);
1156  if (!CostFactor.isValid())
1157  return InstructionCost::getMax();
1158 
1159  if (TLI->getValueType(DL, Src, true) == MVT::Other)
1160  return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
1161  CostKind);
1162  // Legalize the type.
1163  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
1164  assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
1165  "Invalid Opcode");
1166 
1167  InstructionCost Cost =
1168  BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
1169  // TODO: Handle other cost kinds.
1171  return Cost;
1172 
1173  Cost *= CostFactor;
1174 
1175  bool IsAltivecType = ST->hasAltivec() &&
1176  (LT.second == MVT::v16i8 || LT.second == MVT::v8i16 ||
1177  LT.second == MVT::v4i32 || LT.second == MVT::v4f32);
1178  bool IsVSXType = ST->hasVSX() &&
1179  (LT.second == MVT::v2f64 || LT.second == MVT::v2i64);
1180 
1181  // VSX has 32b/64b load instructions. Legalization can handle loading of
1182  // 32b/64b to VSR correctly and cheaply. But BaseT::getMemoryOpCost and
1183  // PPCTargetLowering can't compute the cost appropriately. So here we
1184  // explicitly check this case.
1185  unsigned MemBytes = Src->getPrimitiveSizeInBits();
1186  if (Opcode == Instruction::Load && ST->hasVSX() && IsAltivecType &&
1187  (MemBytes == 64 || (ST->hasP8Vector() && MemBytes == 32)))
1188  return 1;
1189 
1190  // Aligned loads and stores are easy.
1191  unsigned SrcBytes = LT.second.getStoreSize();
1192  if (!SrcBytes || !Alignment || *Alignment >= SrcBytes)
1193  return Cost;
1194 
1195  // If we can use the permutation-based load sequence, then this is also
1196  // relatively cheap (not counting loop-invariant instructions): one load plus
1197  // one permute (the last load in a series has extra cost, but we're
1198  // neglecting that here). Note that on the P7, we could do unaligned loads
1199  // for Altivec types using the VSX instructions, but that's more expensive
1200  // than using the permutation-based load sequence. On the P8, that's no
1201  // longer true.
1202  if (Opcode == Instruction::Load && (!ST->hasP8Vector() && IsAltivecType) &&
1203  *Alignment >= LT.second.getScalarType().getStoreSize())
1204  return Cost + LT.first; // Add the cost of the permutations.
1205 
1206  // For VSX, we can do unaligned loads and stores on Altivec/VSX types. On the
1207  // P7, unaligned vector loads are more expensive than the permutation-based
1208  // load sequence, so that might be used instead, but regardless, the net cost
1209  // is about the same (not counting loop-invariant instructions).
1210  if (IsVSXType || (ST->hasVSX() && IsAltivecType))
1211  return Cost;
1212 
1213  // Newer PPC supports unaligned memory access.
1214  if (TLI->allowsMisalignedMemoryAccesses(LT.second, 0))
1215  return Cost;
1216 
1217  // PPC in general does not support unaligned loads and stores. They'll need
1218  // to be decomposed based on the alignment factor.
1219 
1220  // Add the cost of each scalar load or store.
1221  assert(Alignment);
1222  Cost += LT.first * ((SrcBytes / Alignment->value()) - 1);
1223 
1224  // For a vector type, there is also scalarization overhead (only for
1225  // stores, loads are expanded using the vector-load + permutation sequence,
1226  // which is much less expensive).
1227  if (Src->isVectorTy() && Opcode == Instruction::Store)
1228  for (int i = 0, e = cast<FixedVectorType>(Src)->getNumElements(); i < e;
1229  ++i)
1230  Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i);
1231 
1232  return Cost;
1233 }
1234 
1236  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1237  Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
1238  bool UseMaskForCond, bool UseMaskForGaps) {
1239  InstructionCost CostFactor =
1240  vectorCostAdjustmentFactor(Opcode, VecTy, nullptr);
1241  if (!CostFactor.isValid())
1242  return InstructionCost::getMax();
1243 
1244  if (UseMaskForCond || UseMaskForGaps)
1245  return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
1246  Alignment, AddressSpace, CostKind,
1247  UseMaskForCond, UseMaskForGaps);
1248 
1249  assert(isa<VectorType>(VecTy) &&
1250  "Expect a vector type for interleaved memory op");
1251 
1252  // Legalize the type.
1253  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy);
1254 
1255  // Firstly, the cost of load/store operation.
1256  InstructionCost Cost = getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment),
1258 
1259  // PPC, for both Altivec/VSX, support cheap arbitrary permutations
1260  // (at least in the sense that there need only be one non-loop-invariant
1261  // instruction). For each result vector, we need one shuffle per incoming
1262  // vector (except that the first shuffle can take two incoming vectors
1263  // because it does not need to take itself).
1264  Cost += Factor*(LT.first-1);
1265 
1266  return Cost;
1267 }
1268 
1273 }
1274 
1276  const Function *Callee,
1277  const ArrayRef<Type *> &Types) const {
1278 
1279  // We need to ensure that argument promotion does not
1280  // attempt to promote pointers to MMA types (__vector_pair
1281  // and __vector_quad) since these types explicitly cannot be
1282  // passed as arguments. Both of these types are larger than
1283  // the 128-bit Altivec vectors and have a scalar size of 1 bit.
1284  if (!BaseT::areTypesABICompatible(Caller, Callee, Types))
1285  return false;
1286 
1287  return llvm::none_of(Types, [](Type *Ty) {
1288  if (Ty->isSized())
1289  return Ty->isIntOrIntVectorTy(1) && Ty->getPrimitiveSizeInBits() > 128;
1290  return false;
1291  });
1292 }
1293 
1295  LoopInfo *LI, DominatorTree *DT,
1296  AssumptionCache *AC, TargetLibraryInfo *LibInfo) {
1297  // Process nested loops first.
1298  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
1299  if (canSaveCmp(*I, BI, SE, LI, DT, AC, LibInfo))
1300  return false; // Stop search.
1301 
1302  HardwareLoopInfo HWLoopInfo(L);
1303 
1304  if (!HWLoopInfo.canAnalyze(*LI))
1305  return false;
1306 
1307  if (!isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo))
1308  return false;
1309 
1310  if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT))
1311  return false;
1312 
1313  *BI = HWLoopInfo.ExitBranch;
1314  return true;
1315 }
1316 
1319  // PowerPC default behaviour here is "instruction number 1st priority".
1320  // If LsrNoInsnsCost is set, call default implementation.
1321  if (!LsrNoInsnsCost)
1322  return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, C1.NumIVMuls,
1323  C1.NumBaseAdds, C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
1324  std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, C2.NumIVMuls,
1325  C2.NumBaseAdds, C2.ScaleCost, C2.ImmCost, C2.SetupCost);
1326  else
1328 }
1329 
1331  return false;
1332 }
1333 
1335  const PPCTargetMachine &TM = ST->getTargetMachine();
1336  // XCOFF hasn't implemented lowerRelativeReference, disable non-ELF for now.
1337  if (!TM.isELFv2ABI())
1338  return false;
1340 }
1341 
1344  switch (Inst->getIntrinsicID()) {
1345  case Intrinsic::ppc_altivec_lvx:
1346  case Intrinsic::ppc_altivec_lvxl:
1347  case Intrinsic::ppc_altivec_lvebx:
1348  case Intrinsic::ppc_altivec_lvehx:
1349  case Intrinsic::ppc_altivec_lvewx:
1350  case Intrinsic::ppc_vsx_lxvd2x:
1351  case Intrinsic::ppc_vsx_lxvw4x:
1352  case Intrinsic::ppc_vsx_lxvd2x_be:
1353  case Intrinsic::ppc_vsx_lxvw4x_be:
1354  case Intrinsic::ppc_vsx_lxvl:
1355  case Intrinsic::ppc_vsx_lxvll:
1356  case Intrinsic::ppc_vsx_lxvp: {
1357  Info.PtrVal = Inst->getArgOperand(0);
1358  Info.ReadMem = true;
1359  Info.WriteMem = false;
1360  return true;
1361  }
1362  case Intrinsic::ppc_altivec_stvx:
1363  case Intrinsic::ppc_altivec_stvxl:
1364  case Intrinsic::ppc_altivec_stvebx:
1365  case Intrinsic::ppc_altivec_stvehx:
1366  case Intrinsic::ppc_altivec_stvewx:
1367  case Intrinsic::ppc_vsx_stxvd2x:
1368  case Intrinsic::ppc_vsx_stxvw4x:
1369  case Intrinsic::ppc_vsx_stxvd2x_be:
1370  case Intrinsic::ppc_vsx_stxvw4x_be:
1371  case Intrinsic::ppc_vsx_stxvl:
1372  case Intrinsic::ppc_vsx_stxvll:
1373  case Intrinsic::ppc_vsx_stxvp: {
1374  Info.PtrVal = Inst->getArgOperand(1);
1375  Info.ReadMem = false;
1376  Info.WriteMem = true;
1377  return true;
1378  }
1379  default:
1380  break;
1381  }
1382 
1383  return false;
1384 }
1385 
1386 bool PPCTTIImpl::hasActiveVectorLength(unsigned Opcode, Type *DataType,
1387  Align Alignment) const {
1388  // Only load and stores instructions can have variable vector length on Power.
1389  if (Opcode != Instruction::Load && Opcode != Instruction::Store)
1390  return false;
1391  // Loads/stores with length instructions use bits 0-7 of the GPR operand and
1392  // therefore cannot be used in 32-bit mode.
1393  if ((!ST->hasP9Vector() && !ST->hasP10Vector()) || !ST->isPPC64())
1394  return false;
1395  if (isa<FixedVectorType>(DataType)) {
1396  unsigned VecWidth = DataType->getPrimitiveSizeInBits();
1397  return VecWidth == 128;
1398  }
1399  Type *ScalarTy = DataType->getScalarType();
1400 
1401  if (ScalarTy->isPointerTy())
1402  return true;
1403 
1404  if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy())
1405  return true;
1406 
1407  if (!ScalarTy->isIntegerTy())
1408  return false;
1409 
1410  unsigned IntWidth = ScalarTy->getIntegerBitWidth();
1411  return IntWidth == 8 || IntWidth == 16 || IntWidth == 32 || IntWidth == 64;
1412 }
1413 
1415  Align Alignment,
1416  unsigned AddressSpace,
1418  const Instruction *I) {
1419  InstructionCost Cost = BaseT::getVPMemoryOpCost(Opcode, Src, Alignment,
1420  AddressSpace, CostKind, I);
1421  if (TLI->getValueType(DL, Src, true) == MVT::Other)
1422  return Cost;
1423  // TODO: Handle other cost kinds.
1425  return Cost;
1426 
1427  assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
1428  "Invalid Opcode");
1429 
1430  auto *SrcVTy = dyn_cast<FixedVectorType>(Src);
1431  assert(SrcVTy && "Expected a vector type for VP memory operations");
1432 
1433  if (hasActiveVectorLength(Opcode, Src, Alignment)) {
1434  std::pair<InstructionCost, MVT> LT =
1435  TLI->getTypeLegalizationCost(DL, SrcVTy);
1436 
1437  InstructionCost CostFactor =
1438  vectorCostAdjustmentFactor(Opcode, Src, nullptr);
1439  if (!CostFactor.isValid())
1440  return InstructionCost::getMax();
1441 
1442  InstructionCost Cost = LT.first * CostFactor;
1443  assert(Cost.isValid() && "Expected valid cost");
1444 
1445  // On P9 but not on P10, if the op is misaligned then it will cause a
1446  // pipeline flush. Otherwise the VSX masked memops cost the same as unmasked
1447  // ones.
1448  const Align DesiredAlignment(16);
1449  if (Alignment >= DesiredAlignment || ST->getCPUDirective() != PPC::DIR_PWR9)
1450  return Cost;
1451 
1452  // Since alignment may be under estimated, we try to compute the probability
1453  // that the actual address is aligned to the desired boundary. For example
1454  // an 8-byte aligned load is assumed to be actually 16-byte aligned half the
1455  // time, while a 4-byte aligned load has a 25% chance of being 16-byte
1456  // aligned.
1457  float AlignmentProb = ((float)Alignment.value()) / DesiredAlignment.value();
1458  float MisalignmentProb = 1.0 - AlignmentProb;
1459  return (MisalignmentProb * P9PipelineFlushEstimate) +
1460  (AlignmentProb * *Cost.getValue());
1461  }
1462 
1463  // Usually we should not get to this point, but the following is an attempt to
1464  // model the cost of legalization. Currently we can only lower intrinsics with
1465  // evl but no mask, on Power 9/10. Otherwise, we must scalarize.
1466  return getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
1467 }
i
i
Definition: README.txt:29
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::TargetTransformInfo::PSK_FastHardware
@ PSK_FastHardware
Definition: TargetTransformInfo.h:595
llvm::PPCTTIImpl::VSXRC
@ VSXRC
Definition: PPCTargetTransformInfo.h:94
llvm::ISD::STRICT_FSETCC
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:462
llvm::Type::isSized
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:263
llvm::BasicTTIImplBase< PPCTTIImpl >::DL
const DataLayout & DL
Definition: TargetTransformInfoImpl.h:39
llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition: TargetTransformInfo.h:488
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:211
llvm::PPCSubtarget::hasPOPCNTD
POPCNTDKind hasPOPCNTD() const
Definition: PPCSubtarget.h:357
llvm::PPCTTIImpl::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
Definition: PPCTargetTransformInfo.cpp:799
llvm::TargetTransformInfo::LSRCost::NumRegs
unsigned NumRegs
Definition: TargetTransformInfo.h:421
llvm::BasicTTIImplBase< PPCTTIImpl >::shouldBuildRelLookupTables
bool shouldBuildRelLookupTables() const
Definition: BasicTTIImpl.h:442
llvm::ISD::UMULO
@ UMULO
Definition: ISDOpcodes.h:319
llvm::ISD::STRICT_FSQRT
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:398
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
llvm::none_of
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1663
llvm::LoopBase::getExitBlocks
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
Definition: LoopInfoImpl.h:62
llvm::PPC::DIR_PWR10
@ DIR_PWR10
Definition: PPCSubtarget.h:63
llvm::PPC::DIR_440
@ DIR_440
Definition: PPCSubtarget.h:43
llvm::PPCTTIImpl::getMaxInterleaveFactor
unsigned getMaxInterleaveFactor(unsigned VF)
Definition: PPCTargetTransformInfo.cpp:921
llvm::PPCTTIImpl::getIntImmCost
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
Definition: PPCTargetTransformInfo.cpp:170
llvm::HardwareLoopInfo::LoopDecrement
Value * LoopDecrement
Definition: TargetTransformInfo.h:103
llvm::InstructionCost::getValue
Optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
Definition: InstructionCost.h:87
InstCombiner.h
llvm::PPCTTIImpl::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
Definition: PPCTargetTransformInfo.cpp:987
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:721
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::InstCombiner::getDominatorTree
DominatorTree & getDominatorTree() const
Definition: InstCombiner.h:370
llvm::InlineAsm::ConstraintInfoVector
std::vector< ConstraintInfo > ConstraintInfoVector
Definition: InlineAsm.h:118
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:217
ceil
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g ceil
Definition: README-FPStack.txt:54
llvm::ISD::FMINNUM
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:902
llvm::ISD::STRICT_FMAXNUM
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:410
llvm::PPCTTIImpl::getIntImmCostIntrin
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
Definition: PPCTargetTransformInfo.cpp:200
llvm::Function
Definition: Function.h:62
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::TargetTransformInfo::PopcntSupportKind
PopcntSupportKind
Flags indicating the kind of support for population count.
Definition: TargetTransformInfo.h:595
llvm::ISD::STRICT_FMINNUM
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:411
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:52
llvm::TargetTransformInfoImplCRTPBase< PPCTTIImpl >::getUserCost
InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
Definition: TargetTransformInfoImpl.h:961
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:308
llvm::TLSModel::GeneralDynamic
@ GeneralDynamic
Definition: CodeGen.h:43
llvm::PPCTTIImpl::isLSRCostLess
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2)
Definition: PPCTargetTransformInfo.cpp:1317
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1177
llvm::PPC::DIR_PWR8
@ DIR_PWR8
Definition: PPCSubtarget.h:61
llvm::InstCombiner::Builder
BuilderTy & Builder
Definition: InstCombiner.h:58
llvm::CodeMetrics
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:30
llvm::PPCTTIImpl::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: PPCTargetTransformInfo.cpp:1040
llvm::TargetTransformInfoImplBase::isLSRCostLess
bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) const
Definition: TargetTransformInfoImpl.h:216
llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1479
llvm::PPCSubtarget::hasP8Vector
bool hasP8Vector() const
Definition: PPCSubtarget.h:281
llvm::PPCSubtarget::isLittleEndian
bool isLittleEndian() const
Definition: PPCSubtarget.h:261
llvm::CastInst::Create
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Definition: Instructions.cpp:3152
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:460
llvm::TargetTransformInfo::RGK_Scalar
@ RGK_Scalar
Definition: TargetTransformInfo.h:916
llvm::PPCSubtarget::hasVSX
bool hasVSX() const
Definition: PPCSubtarget.h:279
llvm::BasicTTIImplBase< PPCTTIImpl >::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
Definition: BasicTTIImpl.h:757
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
DisablePPCConstHoist
static cl::opt< bool > DisablePPCConstHoist("disable-ppc-constant-hoisting", cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden)
llvm::HardwareLoopInfo::ExitBranch
BranchInst * ExitBranch
Definition: TargetTransformInfo.h:100
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:466
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::TargetTransformInfo::LSRCost::NumIVMuls
unsigned NumIVMuls
Definition: TargetTransformInfo.h:423
llvm::isShiftedMask_32
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:479
llvm::PPCSubtarget::hasP9Vector
bool hasP9Vector() const
Definition: PPCSubtarget.h:284
llvm::HardwareLoopInfo::isHardwareLoopCandidate
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Definition: TargetTransformInfo.cpp:100
llvm::TargetTransformInfo::UnrollingPreferences::Partial
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Definition: TargetTransformInfo.h:484
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1412
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:538
llvm::getOrEnforceKnownAlignment
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1362
llvm::Optional
Definition: APInt.h:33
llvm::LoopBase::begin
iterator begin() const
Definition: LoopInfo.h:154
llvm::PPCTTIImpl::vectorCostAdjustmentFactor
InstructionCost vectorCostAdjustmentFactor(unsigned Opcode, Type *Ty1, Type *Ty2)
Definition: PPCTargetTransformInfo.cpp:956
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::PPCTTIImpl::isHardwareLoopProfitable
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
Definition: PPCTargetTransformInfo.cpp:721
llvm::PPCSubtarget::getTargetMachine
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:226
llvm::IRBuilderBase::CreateInsertElement
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2316
llvm::TargetLoweringBase::getTypeLegalizationCost
std::pair< InstructionCost, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
Definition: TargetLoweringBase.cpp:1848
llvm::PPCTTIImpl::getRegisterClassName
const char * getRegisterClassName(unsigned ClassID) const
Definition: PPCTargetTransformInfo.cpp:873
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
llvm::Intrinsic::not_intrinsic
@ not_intrinsic
Definition: Intrinsics.h:45
llvm::TargetTransformInfoImplBase::getIntImmCost
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
Definition: TargetTransformInfoImpl.h:375
llvm::PPCSubtarget::vectorsUseTwoUnits
bool vectorsUseTwoUnits() const
Definition: PPCSubtarget.h:304
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
llvm::MVT::v2f64
@ v2f64
Definition: MachineValueType.h:172
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::TargetTransformInfo::UnrollingPreferences::AllowExpensiveTripCount
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
Definition: TargetTransformInfo.h:493
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:241
llvm::TargetTransformInfo::LSRCost::Insns
unsigned Insns
TODO: Some of these could be merged.
Definition: TargetTransformInfo.h:420
llvm::PPCTTIImpl::isNumRegsMajorCostOfLSR
bool isNumRegsMajorCostOfLSR()
Definition: PPCTargetTransformInfo.cpp:1330
llvm::CastInst::getDestTy
Type * getDestTy() const
Return the destination type, as a convenience.
Definition: InstrTypes.h:685
F
#define F(x, y, z)
Definition: MD5.cpp:55
KnownBits.h
llvm::LoopBase::block_end
block_iterator block_end() const
Definition: LoopInfo.h:177
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::TargetTransformInfo::LSRCost::AddRecCost
unsigned AddRecCost
Definition: TargetTransformInfo.h:422
floor
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g floor
Definition: README-FPStack.txt:54
llvm::ISD::STRICT_FROUND
@ STRICT_FROUND
Definition: ISDOpcodes.h:414
llvm::Reloc::Model
Model
Definition: CodeGen.h:22
llvm::AArch64CC::LT
@ LT
Definition: AArch64BaseInfo.h:266
llvm::TargetSchedModel::init
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
Definition: TargetSchedule.cpp:63
llvm::PPCTTIImpl::instCombineIntrinsic
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Definition: PPCTargetTransformInfo.cpp:68
llvm::TargetTransformInfo::LSRCost::SetupCost
unsigned SetupCost
Definition: TargetTransformInfo.h:426
llvm::ISD::FFLOOR
@ FFLOOR
Definition: ISDOpcodes.h:889
llvm::PPCTTIImpl::GPRRC
@ GPRRC
Definition: PPCTargetTransformInfo.h:94
CommandLine.h
CodeMetrics.h
TargetLowering.h
llvm::TargetLoweringBase::isOperationLegalOrCustom
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Definition: TargetLowering.h:1144
llvm::BasicTTIImplBase< PPCTTIImpl >::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
Definition: BasicTTIImpl.h:493
llvm::TargetSchedModel::getIssueWidth
unsigned getIssueWidth() const
Maximum number of micro-ops that may be scheduled per cycle.
Definition: TargetSchedule.h:98
llvm::PPC::DIR_PWR7
@ DIR_PWR7
Definition: PPCSubtarget.h:60
llvm::PPCTTIImpl::getShuffleCost
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, ArrayRef< int > Mask, int Index, Type *SubTp)
Definition: PPCTargetTransformInfo.cpp:1011
llvm::ISD::STRICT_FRINT
@ STRICT_FRINT
Definition: ISDOpcodes.h:408
llvm::BasicTTIImplBase< PPCTTIImpl >::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:1118
llvm::TargetTransformInfo::ShuffleKind
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Definition: TargetTransformInfo.h:868
llvm::PPCTTIImpl::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned ClassID) const
Definition: PPCTargetTransformInfo.cpp:847
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::TargetTransformInfo::CastContextHint
CastContextHint
Represents a hint about the context in which a cast is used.
Definition: TargetTransformInfo.h:1074
llvm::User
Definition: User.h:44
llvm::LibFunc
LibFunc
Definition: TargetLibraryInfo.h:34
llvm::ISD::STRICT_FNEARBYINT
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:409
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
LsrNoInsnsCost
static cl::opt< bool > LsrNoInsnsCost("ppc-lsr-no-insns-cost", cl::Hidden, cl::init(false), cl::desc("Do not add instruction count to lsr cost model"))
round
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
llvm::isShiftedMask_64
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:485
llvm::ISD::FROUND
@ FROUND
Definition: ISDOpcodes.h:887
llvm::PPC::DIR_PWR_FUTURE
@ DIR_PWR_FUTURE
Definition: PPCSubtarget.h:64
llvm::PPC::DIR_A2
@ DIR_A2
Definition: PPCSubtarget.h:50
llvm::LoopBase::end
iterator end() const
Definition: LoopInfo.h:155
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
llvm::ISD::LLROUND
@ LLROUND
Definition: ISDOpcodes.h:891
TargetLibraryInfo.h
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:226
llvm::PPCSubtarget::isISA3_0
bool isISA3_0() const
Definition: PPCSubtarget.h:337
llvm::InlineAsm::isInput
@ isInput
Definition: InlineAsm.h:94
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:109
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::TargetLibraryInfo::getLibFunc
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Definition: TargetLibraryInfo.h:291
llvm::Instruction
Definition: Instruction.h:45
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:191
llvm::TargetTransformInfoImplBase::getIntImmCostInst
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const
Definition: TargetTransformInfoImpl.h:380
llvm::PPCSubtarget::isPPC64
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
Definition: PPCSubtarget.cpp:253
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1467
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
llvm::ISD::FNEARBYINT
@ FNEARBYINT
Definition: ISDOpcodes.h:886
llvm::ISD::FRINT
@ FRINT
Definition: ISDOpcodes.h:885
llvm::LoopBase::getExitingBlocks
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:34
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1804
llvm::PPCTTIImpl::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: PPCTargetTransformInfo.cpp:1149
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:932
llvm::PPCTTIImpl::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: PPCTargetTransformInfo.cpp:815
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:71
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::PPCTTIImpl::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: PPCTargetTransformInfo.cpp:1078
llvm::ISD::SMULO
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:318
llvm::PPC::DIR_PWR9
@ DIR_PWR9
Definition: PPCSubtarget.h:62
Align
uint64_t Align
Definition: ELFObjHandler.cpp:82
llvm::TargetTransformInfo::RGK_FixedWidthVector
@ RGK_FixedWidthVector
Definition: TargetTransformInfo.h:916
llvm::PPCTTIImpl::FPRRC
@ FPRRC
Definition: PPCTargetTransformInfo.h:94
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:153
llvm::CastInst::getSrcTy
Type * getSrcTy() const
Return the source type, as a convenience.
Definition: InstrTypes.h:683
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::PPC::DIR_E5500
@ DIR_E5500
Definition: PPCSubtarget.h:53
llvm::BasicTTIImplBase< PPCTTIImpl >::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1070
llvm::BasicTTIImplBase< PPCTTIImpl >::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:900
llvm::None
const NoneType None
Definition: None.h:23
llvm::LinearPolySize< TypeSize >::getFixed
static TypeSize getFixed(ScalarTy MinVal)
Definition: TypeSize.h:283
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
llvm::IntrinsicCostAttributes
Definition: TargetTransformInfo.h:118
llvm::TargetTransformInfo::PSK_Software
@ PSK_Software
Definition: TargetTransformInfo.h:595
llvm::maxnum
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1307
llvm::PPCTTIImpl::getRegisterBitWidth
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
Definition: PPCTargetTransformInfo.cpp:887
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:78
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::LoopBase::block_begin
block_iterator block_begin() const
Definition: LoopInfo.h:176
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:190
llvm::ScalarEvolution::getSmallConstantTripCount
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
Definition: ScalarEvolution.cpp:7490
CacheLineSize
static cl::opt< unsigned > CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64), cl::desc("The loop prefetch cache line size"))
llvm::InlineAsm
Definition: InlineAsm.h:31
llvm::ISD::STRICT_FSETCCS
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:463
llvm::cl::opt< bool >
llvm::LoopBase< BasicBlock, Loop >::block_iterator
ArrayRef< BasicBlock * >::const_iterator block_iterator
Definition: LoopInfo.h:175
llvm::TargetLoweringBase::getMinimumJumpTableEntries
virtual unsigned getMinimumJumpTableEntries() const
Return lower limit for number of blocks in a jump table.
Definition: TargetLoweringBase.cpp:2014
llvm::IRBuilderBase::CreateBitCast
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1979
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:309
llvm::PPCTTIImpl::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Definition: PPCTargetTransformInfo.cpp:1270
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
TargetSchedule.h
llvm::MVT::v16i8
@ v16i8
Definition: MachineValueType.h:80
llvm::isInt< 32 >
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:373
llvm::StringRef::equals_insensitive
LLVM_NODISCARD bool equals_insensitive(StringRef RHS) const
Check for string equality, ignoring case.
Definition: StringRef.h:193
llvm::PPCSubtarget::POPCNTD_Unavailable
@ POPCNTD_Unavailable
Definition: PPCSubtarget.h:74
llvm::PointerType::getUnqual
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:651
llvm::isUInt< 16 >
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:408
llvm::InstCombiner::getAssumptionCache
AssumptionCache & getAssumptionCache() const
Definition: InstCombiner.h:368
llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition: TargetSchedule.h:30
Index
uint32_t Index
Definition: ELFObjHandler.cpp:83
llvm::MVT::v2i64
@ v2i64
Definition: MachineValueType.h:118
uint64_t
llvm::TLSModel::LocalDynamic
@ LocalDynamic
Definition: CodeGen.h:44
llvm::TargetTransformInfo::LSRCost
Definition: TargetTransformInfo.h:417
llvm::InstCombiner::getDataLayout
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:371
llvm::PPCSubtarget::hasP9Altivec
bool hasP9Altivec() const
Definition: PPCSubtarget.h:285
llvm::PPCTTIImpl::getPopcntSupport
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
Definition: PPCTargetTransformInfo.cpp:59
llvm::IRBuilderBase::getInt32
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:478
llvm::ISD::STRICT_LRINT
@ STRICT_LRINT
Definition: ISDOpcodes.h:419
llvm::TargetLoweringBase::getMaxExpandSizeMemcmp
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
Definition: TargetLowering.h:1648
llvm::DataLayout::isLittleEndian
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:244
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::PPCTTIImpl::getTgtMemIntrinsic
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
Definition: PPCTargetTransformInfo.cpp:1342
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:511
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:431
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::ISD::LRINT
@ LRINT
Definition: ISDOpcodes.h:892
llvm::TargetTransformInfo::OperandValueProperties
OperandValueProperties
Additional properties of an operand's values.
Definition: TargetTransformInfo.h:895
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
Metrics
Machine Trace Metrics
Definition: MachineTraceMetrics.cpp:53
llvm::Type::isHalfTy
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:141
llvm::MVT::v4f32
@ v4f32
Definition: MachineValueType.h:157
llvm::InlineAsm::ConstraintInfo
Definition: InlineAsm.h:120
llvm::ISD::STRICT_LROUND
@ STRICT_LROUND
Definition: ISDOpcodes.h:417
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
llvm::PPCTargetLowering::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
Definition: PPCISelLowering.cpp:16518
llvm::PPCTTIImpl::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: PPCTargetTransformInfo.cpp:1235
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::PPCTTIImpl::shouldBuildRelLookupTables
bool shouldBuildRelLookupTables() const
Definition: PPCTargetTransformInfo.cpp:1334
llvm::PPCTTIImpl::enableInterleavedAccessVectorization
bool enableInterleavedAccessVectorization()
Definition: PPCTargetTransformInfo.cpp:843
llvm::IRBuilderBase::CreateExtractElement
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2303
llvm::TargetTransformInfo::LSRCost::ScaleCost
unsigned ScaleCost
Definition: TargetTransformInfo.h:427
llvm::BasicTTIImplBase< PPCTTIImpl >::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: BasicTTIImpl.h:1219
llvm::TargetTransformInfo::PSK_SlowHardware
@ PSK_SlowHardware
Definition: TargetTransformInfo.h:595
SmallCTRLoopThreshold
static cl::opt< unsigned > SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden, cl::desc("Loops with a constant trip count smaller than " "this value will not use the count register."))
llvm::ISD::STRICT_LLRINT
@ STRICT_LLRINT
Definition: ISDOpcodes.h:420
llvm::TargetTransformInfo::OperandValueKind
OperandValueKind
Additional information about an operand's possible values.
Definition: TargetTransformInfo.h:887
llvm::PPCTTIImpl::useColdCCForColdCall
bool useColdCCForColdCall(Function &F)
Definition: PPCTargetTransformInfo.cpp:823
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::TargetTransformInfo::MemCmpExpansionOptions
Returns options for expansion of memcmp. IsZeroCmp is.
Definition: TargetTransformInfo.h:770
llvm::PPCSubtarget::hasDirectMove
bool hasDirectMove() const
Definition: PPCSubtarget.h:317
llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition: TargetTransformInfo.h:262
llvm::TargetTransformInfoImplBase::getVPMemoryOpCost
InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I) const
Definition: TargetTransformInfoImpl.h:576
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1086
llvm::EVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:155
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
Mul
BinaryOperator * Mul
Definition: X86PartialReduction.cpp:68
llvm::ISD::STRICT_FTRUNC
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:416
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::PPCTTIImpl::getIntImmCostInst
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
Definition: PPCTargetTransformInfo.cpp:235
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
if
if(llvm_vc STREQUAL "") set(fake_version_inc "$
Definition: CMakeLists.txt:14
llvm::ISD::LLRINT
@ LLRINT
Definition: ISDOpcodes.h:893
EnablePPCColdCC
static cl::opt< bool > EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false), cl::desc("Enable using coldcc calling conv for cold " "internal functions"))
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
trunc
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g trunc
Definition: README-FPStack.txt:63
llvm::PPCTTIImpl::canSaveCmp
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo)
Definition: PPCTargetTransformInfo.cpp:1294
llvm::TargetLoweringBase::InstructionOpcodeToISD
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
Definition: TargetLoweringBase.cpp:1768
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:431
llvm::PPCTTIImpl::getRegisterClassForType
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
Definition: PPCTargetTransformInfo.cpp:858
llvm::InstructionCost::isValid
bool isValid() const
Definition: InstructionCost.h:79
llvm::MVT::v4i32
@ v4i32
Definition: MachineValueType.h:103
llvm::BasicTTIImplBase< PPCTTIImpl >::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: BasicTTIImpl.h:565
llvm::ifs::IFSSymbolType::Func
@ Func
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:290
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:180
llvm::PPCTTIImpl::getVPMemoryOpCost
InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: PPCTargetTransformInfo.cpp:1414
llvm::PPCTTIImpl::enableAggressiveInterleaving
bool enableAggressiveInterleaving(bool LoopHasReductions)
Definition: PPCTargetTransformInfo.cpp:827
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:185
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:36
llvm::ISD::FSQRT
@ FSQRT
Definition: ISDOpcodes.h:872
llvm::MCID::Select
@ Select
Definition: MCInstrDesc.h:162
llvm::isInt< 16 >
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:370
llvm::PPCTTIImpl::getUserCost
InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
Definition: PPCTargetTransformInfo.cpp:328
j
return j(j<< 16)
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::minnum
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1296
llvm::ISD::STRICT_FMA
@ STRICT_FMA
Definition: ISDOpcodes.h:392
llvm::ISD::FMAXNUM
@ FMAXNUM
Definition: ISDOpcodes.h:903
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:242
llvm::TargetTransformInfo::LSRCost::NumBaseAdds
unsigned NumBaseAdds
Definition: TargetTransformInfo.h:424
llvm::Type::isFloatTy
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:147
llvm::PPCSubtarget::useSoftFloat
bool useSoftFloat() const
Definition: PPCSubtarget.h:245
llvm::TargetTransformInfoImplBase::getIntImmCostIntrin
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
Definition: TargetTransformInfoImpl.h:387
llvm::InstructionCost::getMax
static InstructionCost getMax()
Definition: InstructionCost.h:71
CostTable.h
llvm::EVT::getScalarType
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:296
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:325
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:883
llvm::Type::isIntOrIntVectorTy
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:196
llvm::MVT::v8i16
@ v8i16
Definition: MachineValueType.h:92
llvm::TypeSize
Definition: TypeSize.h:416
llvm::ISD::STRICT_FCEIL
@ STRICT_FCEIL
Definition: ISDOpcodes.h:412
llvm::BasicTTIImplBase< PPCTTIImpl >::getMaskedMemoryOpCost
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:1203
llvm::TargetLoweringBase::isOperationExpand
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
Definition: TargetLowering.h:1243
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::LinearPolySize< TypeSize >::getScalable
static TypeSize getScalable(ScalarTy MinVal)
Definition: TypeSize.h:286
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:221
llvm::Type::isDoubleTy
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:150
llvm::PPCTTIImpl::hasActiveVectorLength
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
Definition: PPCTargetTransformInfo.cpp:1386
powi
This is blocked on not handling X *X *X powi(X, 3)(see note above). The issue is that we end up getting t
llvm::log2
static double log2(double V)
Definition: AMDGPULibCalls.cpp:801
llvm::Type::isPPC_FP128Ty
bool isPPC_FP128Ty() const
Return true if this is powerpc long double.
Definition: Type.h:159
llvm::TargetTransformInfo::LSRCost::ImmCost
unsigned ImmCost
Definition: TargetTransformInfo.h:425
llvm::TLSModel::Model
Model
Definition: CodeGen.h:42
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:183
llvm::PPCTargetMachine
Common code between 32-bit and 64-bit PowerPC targets.
Definition: PPCTargetMachine.h:25
llvm::InstCombiner
The core instruction combiner logic.
Definition: InstCombiner.h:45
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
PPCTargetTransformInfo.h
llvm::TargetTransformInfo::RGK_ScalableVector
@ RGK_ScalableVector
Definition: TargetTransformInfo.h:916
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
llvm::HardwareLoopInfo
Attributes of a target dependent hardware loop.
Definition: TargetTransformInfo.h:95
llvm::InstructionCost::getInvalid
static InstructionCost getInvalid(CostType Val=0)
Definition: InstructionCost.h:73
llvm::ISD::STRICT_FFLOOR
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:413
Vector
So we should use XX3Form_Rcr to implement instrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store Vector
Definition: README_P9.txt:497
llvm::PPCTTIImpl::getPrefetchDistance
unsigned getPrefetchDistance() const override
Definition: PPCTargetTransformInfo.cpp:917
llvm::PPCTTIImpl::enableMemCmpExpansion
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
Definition: PPCTargetTransformInfo.cpp:836
memAddrUsesCTR
static bool memAddrUsesCTR(const Value *MemAddr, const PPCTargetMachine &TM, SmallPtrSetImpl< const Value * > &Visited)
Definition: PPCTargetTransformInfo.cpp:348
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::PPCSubtarget::hasAltivec
bool hasAltivec() const
Definition: PPCSubtarget.h:275
llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition: TargetTransformInfo.h:916
isMMAType
static bool isMMAType(Type *Ty)
Definition: PPCTargetTransformInfo.cpp:323
llvm::LoopBase< BasicBlock, Loop >::iterator
std::vector< Loop * >::const_iterator iterator
Definition: LoopInfo.h:151
llvm::PPCTTIImpl::areTypesABICompatible
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
Definition: PPCTargetTransformInfo.cpp:1275
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1343
llvm::BasicTTIImplBase< PPCTTIImpl >::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
Definition: BasicTTIImpl.h:1359
llvm::PPCTTIImpl::getCacheLineSize
unsigned getCacheLineSize() const override
Definition: PPCTargetTransformInfo.cpp:900
TargetTransformInfo.h
llvm::PPCTTIImpl::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: PPCTargetTransformInfo.cpp:1060
llvm::BasicTTIImplBase< PPCTTIImpl >::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1159
llvm::ISD::STRICT_LLROUND
@ STRICT_LLROUND
Definition: ISDOpcodes.h:418
llvm::HardwareLoopInfo::canAnalyze
bool canAnalyze(LoopInfo &LI)
Definition: TargetTransformInfo.cpp:47
llvm::PPC::DIR_E500mc
@ DIR_E500mc
Definition: PPCSubtarget.h:52
llvm::MemIntrinsicInfo
Information about a load/store intrinsic defined by the target.
Definition: TargetTransformInfo.h:70
llvm::ISD::LROUND
@ LROUND
Definition: ISDOpcodes.h:890
llvm::SmallPtrSetImpl< const Value * >
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1478
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::PPCSubtarget::getCPUDirective
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:205
llvm::TargetLibraryInfo::hasOptimizedCodeGen
bool hasOptimizedCodeGen(LibFunc F) const
Tests if the function is both available and a candidate for optimized code generation.
Definition: TargetLibraryInfo.h:340
llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition: TargetTransformInfo.h:263
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3236
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
BasicTTIImpl.h
llvm::cl::desc
Definition: CommandLine.h:412
llvm::TargetLoweringBase::getValueType
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
Definition: TargetLowering.h:1439
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3092
llvm::ISD::FTRUNC
@ FTRUNC
Definition: ISDOpcodes.h:884
llvm::PPCTTIImpl::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: PPCTargetTransformInfo.cpp:1031
llvm::PPCSubtarget::POPCNTD_Slow
@ POPCNTD_Slow
Definition: PPCSubtarget.h:75
llvm::ISD::INSERT_VECTOR_ELT
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:500
llvm::HardwareLoopInfo::CountType
IntegerType * CountType
Definition: TargetTransformInfo.h:102
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::TargetTransformInfo::TCK_RecipThroughput
@ TCK_RecipThroughput
Reciprocal throughput.
Definition: TargetTransformInfo.h:212
llvm::Directive
Definition: DirectiveEmitter.h:100
llvm::Type::isFP128Ty
bool isFP128Ty() const
Return true if this is 'fp128'.
Definition: Type.h:156
Debug.h
llvm::PPCTTIImpl::VRRC
@ VRRC
Definition: PPCTargetTransformInfo.h:94
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:166
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::PPCSubtarget::hasP10Vector
bool hasP10Vector() const
Definition: PPCSubtarget.h:286
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::TargetTransformInfoImplBase::areTypesABICompatible
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
Definition: TargetTransformInfoImpl.h:720