LLVM  16.0.0git
PPCTargetTransformInfo.cpp
Go to the documentation of this file.
1 //===-- PPCTargetTransformInfo.cpp - PPC specific TTI ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
14 #include "llvm/CodeGen/CostTable.h"
17 #include "llvm/IR/IntrinsicsPowerPC.h"
18 #include "llvm/IR/ProfDataUtils.h"
20 #include "llvm/Support/Debug.h"
21 #include "llvm/Support/KnownBits.h"
24 #include <optional>
25 
26 using namespace llvm;
27 
28 #define DEBUG_TYPE "ppctti"
29 
30 static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
31 cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
32 
33 static cl::opt<bool>
34 EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false),
35  cl::desc("Enable using coldcc calling conv for cold "
36  "internal functions"));
37 
38 static cl::opt<bool>
39 LsrNoInsnsCost("ppc-lsr-no-insns-cost", cl::Hidden, cl::init(false),
40  cl::desc("Do not add instruction count to lsr cost model"));
41 
42 // The latency of mtctr is only justified if there are more than 4
43 // comparisons that will be removed as a result.
44 static cl::opt<unsigned>
45 SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden,
46  cl::desc("Loops with a constant trip count smaller than "
47  "this value will not use the count register."));
48 
49 //===----------------------------------------------------------------------===//
50 //
51 // PPC cost model.
52 //
53 //===----------------------------------------------------------------------===//
54 
56 PPCTTIImpl::getPopcntSupport(unsigned TyWidth) {
57  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
58  if (ST->hasPOPCNTD() != PPCSubtarget::POPCNTD_Unavailable && TyWidth <= 64)
59  return ST->hasPOPCNTD() == PPCSubtarget::POPCNTD_Slow ?
61  return TTI::PSK_Software;
62 }
63 
64 std::optional<Instruction *>
66  Intrinsic::ID IID = II.getIntrinsicID();
67  switch (IID) {
68  default:
69  break;
70  case Intrinsic::ppc_altivec_lvx:
71  case Intrinsic::ppc_altivec_lvxl:
72  // Turn PPC lvx -> load if the pointer is known aligned.
74  II.getArgOperand(0), Align(16), IC.getDataLayout(), &II,
75  &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
78  return new LoadInst(II.getType(), Ptr, "", false, Align(16));
79  }
80  break;
81  case Intrinsic::ppc_vsx_lxvw4x:
82  case Intrinsic::ppc_vsx_lxvd2x: {
83  // Turn PPC VSX loads into normal loads.
86  return new LoadInst(II.getType(), Ptr, Twine(""), false, Align(1));
87  }
88  case Intrinsic::ppc_altivec_stvx:
89  case Intrinsic::ppc_altivec_stvxl:
90  // Turn stvx -> store if the pointer is known aligned.
92  II.getArgOperand(1), Align(16), IC.getDataLayout(), &II,
93  &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
94  Type *OpPtrTy = PointerType::getUnqual(II.getArgOperand(0)->getType());
95  Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
96  return new StoreInst(II.getArgOperand(0), Ptr, false, Align(16));
97  }
98  break;
99  case Intrinsic::ppc_vsx_stxvw4x:
100  case Intrinsic::ppc_vsx_stxvd2x: {
101  // Turn PPC VSX stores into normal stores.
102  Type *OpPtrTy = PointerType::getUnqual(II.getArgOperand(0)->getType());
103  Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
104  return new StoreInst(II.getArgOperand(0), Ptr, false, Align(1));
105  }
106  case Intrinsic::ppc_altivec_vperm:
107  // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
108  // Note that ppc_altivec_vperm has a big-endian bias, so when creating
109  // a vectorshuffle for little endian, we must undo the transformation
110  // performed on vec_perm in altivec.h. That is, we must complement
111  // the permutation mask with respect to 31 and reverse the order of
112  // V1 and V2.
113  if (Constant *Mask = dyn_cast<Constant>(II.getArgOperand(2))) {
114  assert(cast<FixedVectorType>(Mask->getType())->getNumElements() == 16 &&
115  "Bad type for intrinsic!");
116 
117  // Check that all of the elements are integer constants or undefs.
118  bool AllEltsOk = true;
119  for (unsigned i = 0; i != 16; ++i) {
120  Constant *Elt = Mask->getAggregateElement(i);
121  if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
122  AllEltsOk = false;
123  break;
124  }
125  }
126 
127  if (AllEltsOk) {
128  // Cast the input vectors to byte vectors.
129  Value *Op0 =
130  IC.Builder.CreateBitCast(II.getArgOperand(0), Mask->getType());
131  Value *Op1 =
132  IC.Builder.CreateBitCast(II.getArgOperand(1), Mask->getType());
133  Value *Result = UndefValue::get(Op0->getType());
134 
135  // Only extract each element once.
136  Value *ExtractedElts[32];
137  memset(ExtractedElts, 0, sizeof(ExtractedElts));
138 
139  for (unsigned i = 0; i != 16; ++i) {
140  if (isa<UndefValue>(Mask->getAggregateElement(i)))
141  continue;
142  unsigned Idx =
143  cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
144  Idx &= 31; // Match the hardware behavior.
145  if (DL.isLittleEndian())
146  Idx = 31 - Idx;
147 
148  if (!ExtractedElts[Idx]) {
149  Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
150  Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
151  ExtractedElts[Idx] = IC.Builder.CreateExtractElement(
152  Idx < 16 ? Op0ToUse : Op1ToUse, IC.Builder.getInt32(Idx & 15));
153  }
154 
155  // Insert this value into the result vector.
156  Result = IC.Builder.CreateInsertElement(Result, ExtractedElts[Idx],
157  IC.Builder.getInt32(i));
158  }
159  return CastInst::Create(Instruction::BitCast, Result, II.getType());
160  }
161  }
162  break;
163  }
164  return std::nullopt;
165 }
166 
170  return BaseT::getIntImmCost(Imm, Ty, CostKind);
171 
172  assert(Ty->isIntegerTy());
173 
174  unsigned BitSize = Ty->getPrimitiveSizeInBits();
175  if (BitSize == 0)
176  return ~0U;
177 
178  if (Imm == 0)
179  return TTI::TCC_Free;
180 
181  if (Imm.getBitWidth() <= 64) {
182  if (isInt<16>(Imm.getSExtValue()))
183  return TTI::TCC_Basic;
184 
185  if (isInt<32>(Imm.getSExtValue())) {
186  // A constant that can be materialized using lis.
187  if ((Imm.getZExtValue() & 0xFFFF) == 0)
188  return TTI::TCC_Basic;
189 
190  return 2 * TTI::TCC_Basic;
191  }
192  }
193 
194  return 4 * TTI::TCC_Basic;
195 }
196 
198  const APInt &Imm, Type *Ty,
201  return BaseT::getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
202 
203  assert(Ty->isIntegerTy());
204 
205  unsigned BitSize = Ty->getPrimitiveSizeInBits();
206  if (BitSize == 0)
207  return ~0U;
208 
209  switch (IID) {
210  default:
211  return TTI::TCC_Free;
212  case Intrinsic::sadd_with_overflow:
213  case Intrinsic::uadd_with_overflow:
214  case Intrinsic::ssub_with_overflow:
215  case Intrinsic::usub_with_overflow:
216  if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue()))
217  return TTI::TCC_Free;
218  break;
219  case Intrinsic::experimental_stackmap:
220  if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
221  return TTI::TCC_Free;
222  break;
223  case Intrinsic::experimental_patchpoint_void:
224  case Intrinsic::experimental_patchpoint_i64:
225  if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
226  return TTI::TCC_Free;
227  break;
228  }
230 }
231 
232 InstructionCost PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
233  const APInt &Imm, Type *Ty,
235  Instruction *Inst) {
237  return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind, Inst);
238 
239  assert(Ty->isIntegerTy());
240 
241  unsigned BitSize = Ty->getPrimitiveSizeInBits();
242  if (BitSize == 0)
243  return ~0U;
244 
245  unsigned ImmIdx = ~0U;
246  bool ShiftedFree = false, RunFree = false, UnsignedFree = false,
247  ZeroFree = false;
248  switch (Opcode) {
249  default:
250  return TTI::TCC_Free;
251  case Instruction::GetElementPtr:
252  // Always hoist the base address of a GetElementPtr. This prevents the
253  // creation of new constants for every base constant that gets constant
254  // folded with the offset.
255  if (Idx == 0)
256  return 2 * TTI::TCC_Basic;
257  return TTI::TCC_Free;
258  case Instruction::And:
259  RunFree = true; // (for the rotate-and-mask instructions)
260  [[fallthrough]];
261  case Instruction::Add:
262  case Instruction::Or:
263  case Instruction::Xor:
264  ShiftedFree = true;
265  [[fallthrough]];
266  case Instruction::Sub:
267  case Instruction::Mul:
268  case Instruction::Shl:
269  case Instruction::LShr:
270  case Instruction::AShr:
271  ImmIdx = 1;
272  break;
273  case Instruction::ICmp:
274  UnsignedFree = true;
275  ImmIdx = 1;
276  // Zero comparisons can use record-form instructions.
277  [[fallthrough]];
278  case Instruction::Select:
279  ZeroFree = true;
280  break;
281  case Instruction::PHI:
282  case Instruction::Call:
283  case Instruction::Ret:
284  case Instruction::Load:
285  case Instruction::Store:
286  break;
287  }
288 
289  if (ZeroFree && Imm == 0)
290  return TTI::TCC_Free;
291 
292  if (Idx == ImmIdx && Imm.getBitWidth() <= 64) {
293  if (isInt<16>(Imm.getSExtValue()))
294  return TTI::TCC_Free;
295 
296  if (RunFree) {
297  if (Imm.getBitWidth() <= 32 &&
298  (isShiftedMask_32(Imm.getZExtValue()) ||
299  isShiftedMask_32(~Imm.getZExtValue())))
300  return TTI::TCC_Free;
301 
302  if (ST->isPPC64() &&
303  (isShiftedMask_64(Imm.getZExtValue()) ||
304  isShiftedMask_64(~Imm.getZExtValue())))
305  return TTI::TCC_Free;
306  }
307 
308  if (UnsignedFree && isUInt<16>(Imm.getZExtValue()))
309  return TTI::TCC_Free;
310 
311  if (ShiftedFree && (Imm.getZExtValue() & 0xFFFF) == 0)
312  return TTI::TCC_Free;
313  }
314 
316 }
317 
318 // Check if the current Type is an MMA vector type. Valid MMA types are
319 // v256i1 and v512i1 respectively.
320 static bool isMMAType(Type *Ty) {
321  return Ty->isVectorTy() && (Ty->getScalarSizeInBits() == 1) &&
322  (Ty->getPrimitiveSizeInBits() > 128);
323 }
324 
328  // We already implement getCastInstrCost and getMemoryOpCost where we perform
329  // the vector adjustment there.
330  if (isa<CastInst>(U) || isa<LoadInst>(U) || isa<StoreInst>(U))
332 
333  if (U->getType()->isVectorTy()) {
334  // Instructions that need to be split should cost more.
335  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(U->getType());
336  return LT.first * BaseT::getInstructionCost(U, Operands, CostKind);
337  }
338 
340 }
341 
342 // Determining the address of a TLS variable results in a function call in
343 // certain TLS models.
344 static bool memAddrUsesCTR(const Value *MemAddr, const PPCTargetMachine &TM,
346  // No need to traverse again if we already checked this operand.
347  if (!Visited.insert(MemAddr).second)
348  return false;
349  const auto *GV = dyn_cast<GlobalValue>(MemAddr);
350  if (!GV) {
351  // Recurse to check for constants that refer to TLS global variables.
352  if (const auto *CV = dyn_cast<Constant>(MemAddr))
353  for (const auto &CO : CV->operands())
354  if (memAddrUsesCTR(CO, TM, Visited))
355  return true;
356  return false;
357  }
358 
359  if (!GV->isThreadLocal())
360  return false;
361  TLSModel::Model Model = TM.getTLSModel(GV);
363 }
364 
365 bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
367  const PPCTargetMachine &TM = ST->getTargetMachine();
368 
369  // Loop through the inline asm constraints and look for something that
370  // clobbers ctr.
371  auto asmClobbersCTR = [](InlineAsm *IA) {
372  InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
373  for (const InlineAsm::ConstraintInfo &C : CIV) {
374  if (C.Type != InlineAsm::isInput)
375  for (const auto &Code : C.Codes)
376  if (StringRef(Code).equals_insensitive("{ctr}"))
377  return true;
378  }
379  return false;
380  };
381 
382  auto isLargeIntegerTy = [](bool Is32Bit, Type *Ty) {
383  if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
384  return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
385 
386  return false;
387  };
388 
389  auto supportedHalfPrecisionOp = [](Instruction *Inst) {
390  switch (Inst->getOpcode()) {
391  default:
392  return false;
393  case Instruction::FPTrunc:
394  case Instruction::FPExt:
395  case Instruction::Load:
396  case Instruction::Store:
397  case Instruction::FPToUI:
398  case Instruction::UIToFP:
399  case Instruction::FPToSI:
400  case Instruction::SIToFP:
401  return true;
402  }
403  };
404 
405  for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
406  J != JE; ++J) {
407  // There are no direct operations on half precision so assume that
408  // anything with that type requires a call except for a few select
409  // operations with Power9.
410  if (Instruction *CurrInst = dyn_cast<Instruction>(J)) {
411  for (const auto &Op : CurrInst->operands()) {
412  if (Op->getType()->getScalarType()->isHalfTy() ||
413  CurrInst->getType()->getScalarType()->isHalfTy())
414  return !(ST->isISA3_0() && supportedHalfPrecisionOp(CurrInst));
415  }
416  }
417  if (CallInst *CI = dyn_cast<CallInst>(J)) {
418  // Inline ASM is okay, unless it clobbers the ctr register.
419  if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand())) {
420  if (asmClobbersCTR(IA))
421  return true;
422  continue;
423  }
424 
425  if (Function *F = CI->getCalledFunction()) {
426  // Most intrinsics don't become function calls, but some might.
427  // sin, cos, exp and log are always calls.
428  unsigned Opcode = 0;
429  if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
430  switch (F->getIntrinsicID()) {
431  default: continue;
432  // If we have a call to loop_decrement or set_loop_iterations,
433  // we're definitely using CTR.
434  case Intrinsic::set_loop_iterations:
435  case Intrinsic::loop_decrement:
436  return true;
437 
438  // Binary operations on 128-bit value will use CTR.
439  case Intrinsic::experimental_constrained_fadd:
440  case Intrinsic::experimental_constrained_fsub:
441  case Intrinsic::experimental_constrained_fmul:
442  case Intrinsic::experimental_constrained_fdiv:
443  case Intrinsic::experimental_constrained_frem:
444  if (F->getType()->getScalarType()->isFP128Ty() ||
445  F->getType()->getScalarType()->isPPC_FP128Ty())
446  return true;
447  break;
448 
449  case Intrinsic::experimental_constrained_fptosi:
450  case Intrinsic::experimental_constrained_fptoui:
451  case Intrinsic::experimental_constrained_sitofp:
452  case Intrinsic::experimental_constrained_uitofp: {
453  Type *SrcType = CI->getArgOperand(0)->getType()->getScalarType();
454  Type *DstType = CI->getType()->getScalarType();
455  if (SrcType->isPPC_FP128Ty() || DstType->isPPC_FP128Ty() ||
456  isLargeIntegerTy(!TM.isPPC64(), SrcType) ||
457  isLargeIntegerTy(!TM.isPPC64(), DstType))
458  return true;
459  break;
460  }
461 
462  // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
463  // because, although it does clobber the counter register, the
464  // control can't then return to inside the loop unless there is also
465  // an eh_sjlj_setjmp.
466  case Intrinsic::eh_sjlj_setjmp:
467 
468  case Intrinsic::memcpy:
469  case Intrinsic::memmove:
470  case Intrinsic::memset:
471  case Intrinsic::powi:
472  case Intrinsic::log:
473  case Intrinsic::log2:
474  case Intrinsic::log10:
475  case Intrinsic::exp:
476  case Intrinsic::exp2:
477  case Intrinsic::pow:
478  case Intrinsic::sin:
479  case Intrinsic::cos:
480  case Intrinsic::experimental_constrained_powi:
481  case Intrinsic::experimental_constrained_log:
482  case Intrinsic::experimental_constrained_log2:
483  case Intrinsic::experimental_constrained_log10:
484  case Intrinsic::experimental_constrained_exp:
485  case Intrinsic::experimental_constrained_exp2:
486  case Intrinsic::experimental_constrained_pow:
487  case Intrinsic::experimental_constrained_sin:
488  case Intrinsic::experimental_constrained_cos:
489  return true;
490  case Intrinsic::copysign:
491  if (CI->getArgOperand(0)->getType()->getScalarType()->
492  isPPC_FP128Ty())
493  return true;
494  else
495  continue; // ISD::FCOPYSIGN is never a library call.
496  case Intrinsic::fmuladd:
497  case Intrinsic::fma: Opcode = ISD::FMA; break;
498  case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
499  case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
500  case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
501  case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
502  case Intrinsic::rint: Opcode = ISD::FRINT; break;
503  case Intrinsic::lrint: Opcode = ISD::LRINT; break;
504  case Intrinsic::llrint: Opcode = ISD::LLRINT; break;
505  case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
506  case Intrinsic::round: Opcode = ISD::FROUND; break;
507  case Intrinsic::lround: Opcode = ISD::LROUND; break;
508  case Intrinsic::llround: Opcode = ISD::LLROUND; break;
509  case Intrinsic::minnum: Opcode = ISD::FMINNUM; break;
510  case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break;
511  case Intrinsic::experimental_constrained_fcmp:
512  Opcode = ISD::STRICT_FSETCC;
513  break;
514  case Intrinsic::experimental_constrained_fcmps:
515  Opcode = ISD::STRICT_FSETCCS;
516  break;
517  case Intrinsic::experimental_constrained_fma:
518  Opcode = ISD::STRICT_FMA;
519  break;
520  case Intrinsic::experimental_constrained_sqrt:
521  Opcode = ISD::STRICT_FSQRT;
522  break;
523  case Intrinsic::experimental_constrained_floor:
524  Opcode = ISD::STRICT_FFLOOR;
525  break;
526  case Intrinsic::experimental_constrained_ceil:
527  Opcode = ISD::STRICT_FCEIL;
528  break;
529  case Intrinsic::experimental_constrained_trunc:
530  Opcode = ISD::STRICT_FTRUNC;
531  break;
532  case Intrinsic::experimental_constrained_rint:
533  Opcode = ISD::STRICT_FRINT;
534  break;
535  case Intrinsic::experimental_constrained_lrint:
536  Opcode = ISD::STRICT_LRINT;
537  break;
538  case Intrinsic::experimental_constrained_llrint:
539  Opcode = ISD::STRICT_LLRINT;
540  break;
541  case Intrinsic::experimental_constrained_nearbyint:
542  Opcode = ISD::STRICT_FNEARBYINT;
543  break;
544  case Intrinsic::experimental_constrained_round:
545  Opcode = ISD::STRICT_FROUND;
546  break;
547  case Intrinsic::experimental_constrained_lround:
548  Opcode = ISD::STRICT_LROUND;
549  break;
550  case Intrinsic::experimental_constrained_llround:
551  Opcode = ISD::STRICT_LLROUND;
552  break;
553  case Intrinsic::experimental_constrained_minnum:
554  Opcode = ISD::STRICT_FMINNUM;
555  break;
556  case Intrinsic::experimental_constrained_maxnum:
557  Opcode = ISD::STRICT_FMAXNUM;
558  break;
559  case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break;
560  case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break;
561  }
562  }
563 
564  // PowerPC does not use [US]DIVREM or other library calls for
565  // operations on regular types which are not otherwise library calls
566  // (i.e. soft float or atomics). If adapting for targets that do,
567  // additional care is required here.
568 
569  LibFunc Func;
570  if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
571  LibInfo->getLibFunc(F->getName(), Func) &&
572  LibInfo->hasOptimizedCodeGen(Func)) {
573  // Non-read-only functions are never treated as intrinsics.
574  if (!CI->onlyReadsMemory())
575  return true;
576 
577  // Conversion happens only for FP calls.
578  if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
579  return true;
580 
581  switch (Func) {
582  default: return true;
583  case LibFunc_copysign:
584  case LibFunc_copysignf:
585  continue; // ISD::FCOPYSIGN is never a library call.
586  case LibFunc_copysignl:
587  return true;
588  case LibFunc_fabs:
589  case LibFunc_fabsf:
590  case LibFunc_fabsl:
591  continue; // ISD::FABS is never a library call.
592  case LibFunc_sqrt:
593  case LibFunc_sqrtf:
594  case LibFunc_sqrtl:
595  Opcode = ISD::FSQRT; break;
596  case LibFunc_floor:
597  case LibFunc_floorf:
598  case LibFunc_floorl:
599  Opcode = ISD::FFLOOR; break;
600  case LibFunc_nearbyint:
601  case LibFunc_nearbyintf:
602  case LibFunc_nearbyintl:
603  Opcode = ISD::FNEARBYINT; break;
604  case LibFunc_ceil:
605  case LibFunc_ceilf:
606  case LibFunc_ceill:
607  Opcode = ISD::FCEIL; break;
608  case LibFunc_rint:
609  case LibFunc_rintf:
610  case LibFunc_rintl:
611  Opcode = ISD::FRINT; break;
612  case LibFunc_round:
613  case LibFunc_roundf:
614  case LibFunc_roundl:
615  Opcode = ISD::FROUND; break;
616  case LibFunc_trunc:
617  case LibFunc_truncf:
618  case LibFunc_truncl:
619  Opcode = ISD::FTRUNC; break;
620  case LibFunc_fmin:
621  case LibFunc_fminf:
622  case LibFunc_fminl:
623  Opcode = ISD::FMINNUM; break;
624  case LibFunc_fmax:
625  case LibFunc_fmaxf:
626  case LibFunc_fmaxl:
627  Opcode = ISD::FMAXNUM; break;
628  }
629  }
630 
631  if (Opcode) {
632  EVT EVTy =
633  TLI->getValueType(DL, CI->getArgOperand(0)->getType(), true);
634 
635  if (EVTy == MVT::Other)
636  return true;
637 
638  if (TLI->isOperationLegalOrCustom(Opcode, EVTy))
639  continue;
640  else if (EVTy.isVector() &&
641  TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType()))
642  continue;
643 
644  return true;
645  }
646  }
647 
648  return true;
649  } else if ((J->getType()->getScalarType()->isFP128Ty() ||
650  J->getType()->getScalarType()->isPPC_FP128Ty())) {
651  // Most operations on f128 or ppc_f128 values become calls.
652  return true;
653  } else if (isa<FCmpInst>(J) &&
654  J->getOperand(0)->getType()->getScalarType()->isFP128Ty()) {
655  return true;
656  } else if ((isa<FPTruncInst>(J) || isa<FPExtInst>(J)) &&
657  (cast<CastInst>(J)->getSrcTy()->getScalarType()->isFP128Ty() ||
658  cast<CastInst>(J)->getDestTy()->getScalarType()->isFP128Ty())) {
659  return true;
660  } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
661  isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
662  CastInst *CI = cast<CastInst>(J);
663  if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
664  CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
665  isLargeIntegerTy(!TM.isPPC64(), CI->getSrcTy()->getScalarType()) ||
666  isLargeIntegerTy(!TM.isPPC64(), CI->getDestTy()->getScalarType()))
667  return true;
668  } else if (isLargeIntegerTy(!TM.isPPC64(),
669  J->getType()->getScalarType()) &&
670  (J->getOpcode() == Instruction::UDiv ||
671  J->getOpcode() == Instruction::SDiv ||
672  J->getOpcode() == Instruction::URem ||
673  J->getOpcode() == Instruction::SRem)) {
674  return true;
675  } else if (!TM.isPPC64() &&
676  isLargeIntegerTy(false, J->getType()->getScalarType()) &&
677  (J->getOpcode() == Instruction::Shl ||
678  J->getOpcode() == Instruction::AShr ||
679  J->getOpcode() == Instruction::LShr)) {
680  // Only on PPC32, for 128-bit integers (specifically not 64-bit
681  // integers), these might be runtime calls.
682  return true;
683  } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
684  // On PowerPC, indirect jumps use the counter register.
685  return true;
686  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
687  if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
688  return true;
689  }
690 
691  // FREM is always a call.
692  if (J->getOpcode() == Instruction::FRem)
693  return true;
694 
695  if (ST->useSoftFloat()) {
696  switch(J->getOpcode()) {
697  case Instruction::FAdd:
698  case Instruction::FSub:
699  case Instruction::FMul:
700  case Instruction::FDiv:
701  case Instruction::FPTrunc:
702  case Instruction::FPExt:
703  case Instruction::FPToUI:
704  case Instruction::FPToSI:
705  case Instruction::UIToFP:
706  case Instruction::SIToFP:
707  case Instruction::FCmp:
708  return true;
709  }
710  }
711 
712  for (Value *Operand : J->operands())
713  if (memAddrUsesCTR(Operand, TM, Visited))
714  return true;
715  }
716 
717  return false;
718 }
719 
721  AssumptionCache &AC,
722  TargetLibraryInfo *LibInfo,
723  HardwareLoopInfo &HWLoopInfo) {
724  const PPCTargetMachine &TM = ST->getTargetMachine();
725  TargetSchedModel SchedModel;
726  SchedModel.init(ST);
727 
728  // Do not convert small short loops to CTR loop.
729  unsigned ConstTripCount = SE.getSmallConstantTripCount(L);
730  if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {
732  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
734  for (BasicBlock *BB : L->blocks())
735  Metrics.analyzeBasicBlock(BB, *this, EphValues);
736  // 6 is an approximate latency for the mtctr instruction.
737  if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth()))
738  return false;
739  }
740 
741  // We don't want to spill/restore the counter register, and so we don't
742  // want to use the counter register if the loop contains calls.
744  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
745  I != IE; ++I)
746  if (mightUseCTR(*I, LibInfo, Visited))
747  return false;
748 
749  SmallVector<BasicBlock*, 4> ExitingBlocks;
750  L->getExitingBlocks(ExitingBlocks);
751 
752  // If there is an exit edge known to be frequently taken,
753  // we should not transform this loop.
754  for (auto &BB : ExitingBlocks) {
755  Instruction *TI = BB->getTerminator();
756  if (!TI) continue;
757 
758  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
759  uint64_t TrueWeight = 0, FalseWeight = 0;
760  if (!BI->isConditional() ||
761  !extractBranchWeights(*BI, TrueWeight, FalseWeight))
762  continue;
763 
764  // If the exit path is more frequent than the loop path,
765  // we return here without further analysis for this loop.
766  bool TrueIsExit = !L->contains(BI->getSuccessor(0));
767  if (( TrueIsExit && FalseWeight < TrueWeight) ||
768  (!TrueIsExit && FalseWeight > TrueWeight))
769  return false;
770  }
771  }
772 
773  // If an exit block has a PHI that accesses a TLS variable as one of the
774  // incoming values from the loop, we cannot produce a CTR loop because the
775  // address for that value will be computed in the loop.
776  SmallVector<BasicBlock *, 4> ExitBlocks;
777  L->getExitBlocks(ExitBlocks);
778  for (auto &BB : ExitBlocks) {
779  for (auto &PHI : BB->phis()) {
780  for (int Idx = 0, EndIdx = PHI.getNumIncomingValues(); Idx < EndIdx;
781  Idx++) {
782  const BasicBlock *IncomingBB = PHI.getIncomingBlock(Idx);
783  const Value *IncomingValue = PHI.getIncomingValue(Idx);
784  if (L->contains(IncomingBB) &&
785  memAddrUsesCTR(IncomingValue, TM, Visited))
786  return false;
787  }
788  }
789  }
790 
791  LLVMContext &C = L->getHeader()->getContext();
792  HWLoopInfo.CountType = TM.isPPC64() ?
794  HWLoopInfo.LoopDecrement = ConstantInt::get(HWLoopInfo.CountType, 1);
795  return true;
796 }
797 
801  if (ST->getCPUDirective() == PPC::DIR_A2) {
802  // The A2 is in-order with a deep pipeline, and concatenation unrolling
803  // helps expose latency-hiding opportunities to the instruction scheduler.
804  UP.Partial = UP.Runtime = true;
805 
806  // We unroll a lot on the A2 (hundreds of instructions), and the benefits
807  // often outweigh the cost of a division to compute the trip count.
808  UP.AllowExpensiveTripCount = true;
809  }
810 
811  BaseT::getUnrollingPreferences(L, SE, UP, ORE);
812 }
813 
816  BaseT::getPeelingPreferences(L, SE, PP);
817 }
818 // This function returns true to allow using coldcc calling convention.
819 // Returning true results in coldcc being used for functions which are cold at
820 // all call sites when the callers of the functions are not calling any other
821 // non coldcc functions.
823  return EnablePPCColdCC;
824 }
825 
826 bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
827  // On the A2, always unroll aggressively.
828  if (ST->getCPUDirective() == PPC::DIR_A2)
829  return true;
830 
831  return LoopHasReductions;
832 }
833 
835 PPCTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
837  Options.LoadSizes = {8, 4, 2, 1};
838  Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
839  return Options;
840 }
841 
843  return true;
844 }
845 
846 unsigned PPCTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
847  assert(ClassID == GPRRC || ClassID == FPRRC ||
848  ClassID == VRRC || ClassID == VSXRC);
849  if (ST->hasVSX()) {
850  assert(ClassID == GPRRC || ClassID == VSXRC || ClassID == VRRC);
851  return ClassID == VSXRC ? 64 : 32;
852  }
853  assert(ClassID == GPRRC || ClassID == FPRRC || ClassID == VRRC);
854  return 32;
855 }
856 
857 unsigned PPCTTIImpl::getRegisterClassForType(bool Vector, Type *Ty) const {
858  if (Vector)
859  return ST->hasVSX() ? VSXRC : VRRC;
860  else if (Ty && (Ty->getScalarType()->isFloatTy() ||
861  Ty->getScalarType()->isDoubleTy()))
862  return ST->hasVSX() ? VSXRC : FPRRC;
863  else if (Ty && (Ty->getScalarType()->isFP128Ty() ||
864  Ty->getScalarType()->isPPC_FP128Ty()))
865  return VRRC;
866  else if (Ty && Ty->getScalarType()->isHalfTy())
867  return VSXRC;
868  else
869  return GPRRC;
870 }
871 
872 const char* PPCTTIImpl::getRegisterClassName(unsigned ClassID) const {
873 
874  switch (ClassID) {
875  default:
876  llvm_unreachable("unknown register class");
877  return "PPC::unknown register class";
878  case GPRRC: return "PPC::GPRRC";
879  case FPRRC: return "PPC::FPRRC";
880  case VRRC: return "PPC::VRRC";
881  case VSXRC: return "PPC::VSXRC";
882  }
883 }
884 
885 TypeSize
887  switch (K) {
889  return TypeSize::getFixed(ST->isPPC64() ? 64 : 32);
891  return TypeSize::getFixed(ST->hasAltivec() ? 128 : 0);
893  return TypeSize::getScalable(0);
894  }
895 
896  llvm_unreachable("Unsupported register kind");
897 }
898 
900  // Starting with P7 we have a cache line size of 128.
901  unsigned Directive = ST->getCPUDirective();
902  // Assume that Future CPU has the same cache line size as the others.
906  return 128;
907 
908  // On other processors return a default of 64 bytes.
909  return 64;
910 }
911 
913  return 300;
914 }
915 
916 unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
917  unsigned Directive = ST->getCPUDirective();
918  // The 440 has no SIMD support, but floating-point instructions
919  // have a 5-cycle latency, so unroll by 5x for latency hiding.
920  if (Directive == PPC::DIR_440)
921  return 5;
922 
923  // The A2 has no SIMD support, but floating-point instructions
924  // have a 6-cycle latency, so unroll by 6x for latency hiding.
925  if (Directive == PPC::DIR_A2)
926  return 6;
927 
928  // FIXME: For lack of any better information, do no harm...
930  return 1;
931 
932  // For P7 and P8, floating-point instructions have a 6-cycle latency and
933  // there are two execution units, so unroll by 12x for latency hiding.
934  // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
935  // FIXME: the same for P10 as previous gen until POWER10 scheduling is ready
936  // Assume that future is the same as the others.
940  return 12;
941 
942  // For most things, modern systems have two execution units (and
943  // out-of-order execution).
944  return 2;
945 }
946 
947 // Returns a cost adjustment factor to adjust the cost of vector instructions
948 // on targets which there is overlap between the vector and scalar units,
949 // thereby reducing the overall throughput of vector code wrt. scalar code.
950 // An invalid instruction cost is returned if the type is an MMA vector type.
952  Type *Ty1, Type *Ty2) {
953  // If the vector type is of an MMA type (v256i1, v512i1), an invalid
954  // instruction cost is returned. This is to signify to other cost computing
955  // functions to return the maximum instruction cost in order to prevent any
956  // opportunities for the optimizer to produce MMA types within the IR.
957  if (isMMAType(Ty1))
959 
960  if (!ST->vectorsUseTwoUnits() || !Ty1->isVectorTy())
961  return InstructionCost(1);
962 
963  std::pair<InstructionCost, MVT> LT1 = getTypeLegalizationCost(Ty1);
964  // If type legalization involves splitting the vector, we don't want to
965  // double the cost at every step - only the last step.
966  if (LT1.first != 1 || !LT1.second.isVector())
967  return InstructionCost(1);
968 
969  int ISD = TLI->InstructionOpcodeToISD(Opcode);
970  if (TLI->isOperationExpand(ISD, LT1.second))
971  return InstructionCost(1);
972 
973  if (Ty2) {
974  std::pair<InstructionCost, MVT> LT2 = getTypeLegalizationCost(Ty2);
975  if (LT2.first != 1 || !LT2.second.isVector())
976  return InstructionCost(1);
977  }
978 
979  return InstructionCost(2);
980 }
981 
983  unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
986  const Instruction *CxtI) {
987  assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
988 
989  InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Ty, nullptr);
990  if (!CostFactor.isValid())
991  return InstructionCost::getMax();
992 
993  // TODO: Handle more cost kinds.
995  return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
996  Op2Info, Args, CxtI);
997 
998  // Fallback to the default implementation.
1000  Opcode, Ty, CostKind, Op1Info, Op2Info);
1001  return Cost * CostFactor;
1002 }
1003 
1007  int Index, Type *SubTp,
1009 
1010  InstructionCost CostFactor =
1011  vectorCostAdjustmentFactor(Instruction::ShuffleVector, Tp, nullptr);
1012  if (!CostFactor.isValid())
1013  return InstructionCost::getMax();
1014 
1015  // Legalize the type.
1016  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
1017 
1018  // PPC, for both Altivec/VSX, support cheap arbitrary permutations
1019  // (at least in the sense that there need only be one non-loop-invariant
1020  // instruction). We need one such shuffle instruction for each actual
1021  // register (this is not true for arbitrary shuffles, but is true for the
1022  // structured types of shuffles covered by TTI::ShuffleKind).
1023  return LT.first * CostFactor;
1024 }
1025 
1028  const Instruction *I) {
1030  return Opcode == Instruction::PHI ? 0 : 1;
1031  // Branches are assumed to be predicted.
1032  return 0;
1033 }
1034 
1036  Type *Src,
1039  const Instruction *I) {
1040  assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
1041 
1042  InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Dst, Src);
1043  if (!CostFactor.isValid())
1044  return InstructionCost::getMax();
1045 
1047  BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
1048  Cost *= CostFactor;
1049  // TODO: Allow non-throughput costs that aren't binary.
1051  return Cost == 0 ? 0 : 1;
1052  return Cost;
1053 }
1054 
1056  Type *CondTy,
1057  CmpInst::Predicate VecPred,
1059  const Instruction *I) {
1060  InstructionCost CostFactor =
1061  vectorCostAdjustmentFactor(Opcode, ValTy, nullptr);
1062  if (!CostFactor.isValid())
1063  return InstructionCost::getMax();
1064 
1066  BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
1067  // TODO: Handle other cost kinds.
1069  return Cost;
1070  return Cost * CostFactor;
1071 }
1072 
1074  unsigned Index) {
1075  assert(Val->isVectorTy() && "This must be a vector type");
1076 
1077  int ISD = TLI->InstructionOpcodeToISD(Opcode);
1078  assert(ISD && "Invalid opcode");
1079 
1080  InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Val, nullptr);
1081  if (!CostFactor.isValid())
1082  return InstructionCost::getMax();
1083 
1085  Cost *= CostFactor;
1086 
1087  if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
1088  // Double-precision scalars are already located in index #0 (or #1 if LE).
1089  if (ISD == ISD::EXTRACT_VECTOR_ELT &&
1090  Index == (ST->isLittleEndian() ? 1 : 0))
1091  return 0;
1092 
1093  return Cost;
1094 
1095  } else if (Val->getScalarType()->isIntegerTy() && Index != -1U) {
1096  if (ST->hasP9Altivec()) {
1097  if (ISD == ISD::INSERT_VECTOR_ELT)
1098  // A move-to VSR and a permute/insert. Assume vector operation cost
1099  // for both (cost will be 2x on P9).
1100  return 2 * CostFactor;
1101 
1102  // It's an extract. Maybe we can do a cheap move-from VSR.
1103  unsigned EltSize = Val->getScalarSizeInBits();
1104  if (EltSize == 64) {
1105  unsigned MfvsrdIndex = ST->isLittleEndian() ? 1 : 0;
1106  if (Index == MfvsrdIndex)
1107  return 1;
1108  } else if (EltSize == 32) {
1109  unsigned MfvsrwzIndex = ST->isLittleEndian() ? 2 : 1;
1110  if (Index == MfvsrwzIndex)
1111  return 1;
1112  }
1113 
1114  // We need a vector extract (or mfvsrld). Assume vector operation cost.
1115  // The cost of the load constant for a vector extract is disregarded
1116  // (invariant, easily schedulable).
1117  return CostFactor;
1118 
1119  } else if (ST->hasDirectMove())
1120  // Assume permute has standard cost.
1121  // Assume move-to/move-from VSR have 2x standard cost.
1122  return 3;
1123  }
1124 
1125  // Estimated cost of a load-hit-store delay. This was obtained
1126  // experimentally as a minimum needed to prevent unprofitable
1127  // vectorization for the paq8p benchmark. It may need to be
1128  // raised further if other unprofitable cases remain.
1129  unsigned LHSPenalty = 2;
1130  if (ISD == ISD::INSERT_VECTOR_ELT)
1131  LHSPenalty += 7;
1132 
1133  // Vector element insert/extract with Altivec is very expensive,
1134  // because they require store and reload with the attendant
1135  // processor stall for load-hit-store. Until VSX is available,
1136  // these need to be estimated as very costly.
1137  if (ISD == ISD::EXTRACT_VECTOR_ELT ||
1138  ISD == ISD::INSERT_VECTOR_ELT)
1139  return LHSPenalty + Cost;
1140 
1141  return Cost;
1142 }
1143 
1145  MaybeAlign Alignment,
1146  unsigned AddressSpace,
1148  TTI::OperandValueInfo OpInfo,
1149  const Instruction *I) {
1150 
1151  InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Src, nullptr);
1152  if (!CostFactor.isValid())
1153  return InstructionCost::getMax();
1154 
1155  if (TLI->getValueType(DL, Src, true) == MVT::Other)
1156  return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
1157  CostKind);
1158  // Legalize the type.
1159  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src);
1160  assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
1161  "Invalid Opcode");
1162 
1164  BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
1165  // TODO: Handle other cost kinds.
1167  return Cost;
1168 
1169  Cost *= CostFactor;
1170 
1171  bool IsAltivecType = ST->hasAltivec() &&
1172  (LT.second == MVT::v16i8 || LT.second == MVT::v8i16 ||
1173  LT.second == MVT::v4i32 || LT.second == MVT::v4f32);
1174  bool IsVSXType = ST->hasVSX() &&
1175  (LT.second == MVT::v2f64 || LT.second == MVT::v2i64);
1176 
1177  // VSX has 32b/64b load instructions. Legalization can handle loading of
1178  // 32b/64b to VSR correctly and cheaply. But BaseT::getMemoryOpCost and
1179  // PPCTargetLowering can't compute the cost appropriately. So here we
1180  // explicitly check this case.
1181  unsigned MemBytes = Src->getPrimitiveSizeInBits();
1182  if (Opcode == Instruction::Load && ST->hasVSX() && IsAltivecType &&
1183  (MemBytes == 64 || (ST->hasP8Vector() && MemBytes == 32)))
1184  return 1;
1185 
1186  // Aligned loads and stores are easy.
1187  unsigned SrcBytes = LT.second.getStoreSize();
1188  if (!SrcBytes || !Alignment || *Alignment >= SrcBytes)
1189  return Cost;
1190 
1191  // If we can use the permutation-based load sequence, then this is also
1192  // relatively cheap (not counting loop-invariant instructions): one load plus
1193  // one permute (the last load in a series has extra cost, but we're
1194  // neglecting that here). Note that on the P7, we could do unaligned loads
1195  // for Altivec types using the VSX instructions, but that's more expensive
1196  // than using the permutation-based load sequence. On the P8, that's no
1197  // longer true.
1198  if (Opcode == Instruction::Load && (!ST->hasP8Vector() && IsAltivecType) &&
1199  *Alignment >= LT.second.getScalarType().getStoreSize())
1200  return Cost + LT.first; // Add the cost of the permutations.
1201 
1202  // For VSX, we can do unaligned loads and stores on Altivec/VSX types. On the
1203  // P7, unaligned vector loads are more expensive than the permutation-based
1204  // load sequence, so that might be used instead, but regardless, the net cost
1205  // is about the same (not counting loop-invariant instructions).
1206  if (IsVSXType || (ST->hasVSX() && IsAltivecType))
1207  return Cost;
1208 
1209  // Newer PPC supports unaligned memory access.
1210  if (TLI->allowsMisalignedMemoryAccesses(LT.second, 0))
1211  return Cost;
1212 
1213  // PPC in general does not support unaligned loads and stores. They'll need
1214  // to be decomposed based on the alignment factor.
1215 
1216  // Add the cost of each scalar load or store.
1217  assert(Alignment);
1218  Cost += LT.first * ((SrcBytes / Alignment->value()) - 1);
1219 
1220  // For a vector type, there is also scalarization overhead (only for
1221  // stores, loads are expanded using the vector-load + permutation sequence,
1222  // which is much less expensive).
1223  if (Src->isVectorTy() && Opcode == Instruction::Store)
1224  for (int i = 0, e = cast<FixedVectorType>(Src)->getNumElements(); i < e;
1225  ++i)
1226  Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i);
1227 
1228  return Cost;
1229 }
1230 
1232  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1233  Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
1234  bool UseMaskForCond, bool UseMaskForGaps) {
1235  InstructionCost CostFactor =
1236  vectorCostAdjustmentFactor(Opcode, VecTy, nullptr);
1237  if (!CostFactor.isValid())
1238  return InstructionCost::getMax();
1239 
1240  if (UseMaskForCond || UseMaskForGaps)
1241  return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
1242  Alignment, AddressSpace, CostKind,
1243  UseMaskForCond, UseMaskForGaps);
1244 
1245  assert(isa<VectorType>(VecTy) &&
1246  "Expect a vector type for interleaved memory op");
1247 
1248  // Legalize the type.
1249  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(VecTy);
1250 
1251  // Firstly, the cost of load/store operation.
1252  InstructionCost Cost = getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment),
1254 
1255  // PPC, for both Altivec/VSX, support cheap arbitrary permutations
1256  // (at least in the sense that there need only be one non-loop-invariant
1257  // instruction). For each result vector, we need one shuffle per incoming
1258  // vector (except that the first shuffle can take two incoming vectors
1259  // because it does not need to take itself).
1260  Cost += Factor*(LT.first-1);
1261 
1262  return Cost;
1263 }
1264 
1269 }
1270 
1272  const Function *Callee,
1273  const ArrayRef<Type *> &Types) const {
1274 
1275  // We need to ensure that argument promotion does not
1276  // attempt to promote pointers to MMA types (__vector_pair
1277  // and __vector_quad) since these types explicitly cannot be
1278  // passed as arguments. Both of these types are larger than
1279  // the 128-bit Altivec vectors and have a scalar size of 1 bit.
1280  if (!BaseT::areTypesABICompatible(Caller, Callee, Types))
1281  return false;
1282 
1283  return llvm::none_of(Types, [](Type *Ty) {
1284  if (Ty->isSized())
1285  return Ty->isIntOrIntVectorTy(1) && Ty->getPrimitiveSizeInBits() > 128;
1286  return false;
1287  });
1288 }
1289 
1291  LoopInfo *LI, DominatorTree *DT,
1292  AssumptionCache *AC, TargetLibraryInfo *LibInfo) {
1293  // Process nested loops first.
1294  for (Loop *I : *L)
1295  if (canSaveCmp(I, BI, SE, LI, DT, AC, LibInfo))
1296  return false; // Stop search.
1297 
1298  HardwareLoopInfo HWLoopInfo(L);
1299 
1300  if (!HWLoopInfo.canAnalyze(*LI))
1301  return false;
1302 
1303  if (!isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo))
1304  return false;
1305 
1306  if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT))
1307  return false;
1308 
1309  *BI = HWLoopInfo.ExitBranch;
1310  return true;
1311 }
1312 
1314  const TargetTransformInfo::LSRCost &C2) {
1315  // PowerPC default behaviour here is "instruction number 1st priority".
1316  // If LsrNoInsnsCost is set, call default implementation.
1317  if (!LsrNoInsnsCost)
1318  return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, C1.NumIVMuls,
1319  C1.NumBaseAdds, C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
1320  std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, C2.NumIVMuls,
1321  C2.NumBaseAdds, C2.ScaleCost, C2.ImmCost, C2.SetupCost);
1322  else
1324 }
1325 
1327  return false;
1328 }
1329 
1331  const PPCTargetMachine &TM = ST->getTargetMachine();
1332  // XCOFF hasn't implemented lowerRelativeReference, disable non-ELF for now.
1333  if (!TM.isELFv2ABI())
1334  return false;
1336 }
1337 
1340  switch (Inst->getIntrinsicID()) {
1341  case Intrinsic::ppc_altivec_lvx:
1342  case Intrinsic::ppc_altivec_lvxl:
1343  case Intrinsic::ppc_altivec_lvebx:
1344  case Intrinsic::ppc_altivec_lvehx:
1345  case Intrinsic::ppc_altivec_lvewx:
1346  case Intrinsic::ppc_vsx_lxvd2x:
1347  case Intrinsic::ppc_vsx_lxvw4x:
1348  case Intrinsic::ppc_vsx_lxvd2x_be:
1349  case Intrinsic::ppc_vsx_lxvw4x_be:
1350  case Intrinsic::ppc_vsx_lxvl:
1351  case Intrinsic::ppc_vsx_lxvll:
1352  case Intrinsic::ppc_vsx_lxvp: {
1353  Info.PtrVal = Inst->getArgOperand(0);
1354  Info.ReadMem = true;
1355  Info.WriteMem = false;
1356  return true;
1357  }
1358  case Intrinsic::ppc_altivec_stvx:
1359  case Intrinsic::ppc_altivec_stvxl:
1360  case Intrinsic::ppc_altivec_stvebx:
1361  case Intrinsic::ppc_altivec_stvehx:
1362  case Intrinsic::ppc_altivec_stvewx:
1363  case Intrinsic::ppc_vsx_stxvd2x:
1364  case Intrinsic::ppc_vsx_stxvw4x:
1365  case Intrinsic::ppc_vsx_stxvd2x_be:
1366  case Intrinsic::ppc_vsx_stxvw4x_be:
1367  case Intrinsic::ppc_vsx_stxvl:
1368  case Intrinsic::ppc_vsx_stxvll:
1369  case Intrinsic::ppc_vsx_stxvp: {
1370  Info.PtrVal = Inst->getArgOperand(1);
1371  Info.ReadMem = false;
1372  Info.WriteMem = true;
1373  return true;
1374  }
1375  case Intrinsic::ppc_stbcx:
1376  case Intrinsic::ppc_sthcx:
1377  case Intrinsic::ppc_stdcx:
1378  case Intrinsic::ppc_stwcx: {
1379  Info.PtrVal = Inst->getArgOperand(0);
1380  Info.ReadMem = false;
1381  Info.WriteMem = true;
1382  return true;
1383  }
1384  default:
1385  break;
1386  }
1387 
1388  return false;
1389 }
1390 
1392  Align Alignment) const {
1393  // Only load and stores instructions can have variable vector length on Power.
1394  if (Opcode != Instruction::Load && Opcode != Instruction::Store)
1395  return false;
1396  // Loads/stores with length instructions use bits 0-7 of the GPR operand and
1397  // therefore cannot be used in 32-bit mode.
1398  if ((!ST->hasP9Vector() && !ST->hasP10Vector()) || !ST->isPPC64())
1399  return false;
1400  if (isa<FixedVectorType>(DataType)) {
1401  unsigned VecWidth = DataType->getPrimitiveSizeInBits();
1402  return VecWidth == 128;
1403  }
1404  Type *ScalarTy = DataType->getScalarType();
1405 
1406  if (ScalarTy->isPointerTy())
1407  return true;
1408 
1409  if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy())
1410  return true;
1411 
1412  if (!ScalarTy->isIntegerTy())
1413  return false;
1414 
1415  unsigned IntWidth = ScalarTy->getIntegerBitWidth();
1416  return IntWidth == 8 || IntWidth == 16 || IntWidth == 32 || IntWidth == 64;
1417 }
1418 
1420  Align Alignment,
1421  unsigned AddressSpace,
1423  const Instruction *I) {
1424  InstructionCost Cost = BaseT::getVPMemoryOpCost(Opcode, Src, Alignment,
1425  AddressSpace, CostKind, I);
1426  if (TLI->getValueType(DL, Src, true) == MVT::Other)
1427  return Cost;
1428  // TODO: Handle other cost kinds.
1430  return Cost;
1431 
1432  assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
1433  "Invalid Opcode");
1434 
1435  auto *SrcVTy = dyn_cast<FixedVectorType>(Src);
1436  assert(SrcVTy && "Expected a vector type for VP memory operations");
1437 
1438  if (hasActiveVectorLength(Opcode, Src, Alignment)) {
1439  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(SrcVTy);
1440 
1441  InstructionCost CostFactor =
1442  vectorCostAdjustmentFactor(Opcode, Src, nullptr);
1443  if (!CostFactor.isValid())
1444  return InstructionCost::getMax();
1445 
1446  InstructionCost Cost = LT.first * CostFactor;
1447  assert(Cost.isValid() && "Expected valid cost");
1448 
1449  // On P9 but not on P10, if the op is misaligned then it will cause a
1450  // pipeline flush. Otherwise the VSX masked memops cost the same as unmasked
1451  // ones.
1452  const Align DesiredAlignment(16);
1453  if (Alignment >= DesiredAlignment || ST->getCPUDirective() != PPC::DIR_PWR9)
1454  return Cost;
1455 
1456  // Since alignment may be under estimated, we try to compute the probability
1457  // that the actual address is aligned to the desired boundary. For example
1458  // an 8-byte aligned load is assumed to be actually 16-byte aligned half the
1459  // time, while a 4-byte aligned load has a 25% chance of being 16-byte
1460  // aligned.
1461  float AlignmentProb = ((float)Alignment.value()) / DesiredAlignment.value();
1462  float MisalignmentProb = 1.0 - AlignmentProb;
1463  return (MisalignmentProb * P9PipelineFlushEstimate) +
1464  (AlignmentProb * *Cost.getValue());
1465  }
1466 
1467  // Usually we should not get to this point, but the following is an attempt to
1468  // model the cost of legalization. Currently we can only lower intrinsics with
1469  // evl but no mask, on Power 9/10. Otherwise, we must scalarize.
1470  return getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
1471 }
1472 
1474  // Subtargets using PC-Relative addressing supported.
1475  if (ST->isUsingPCRelativeCalls())
1476  return true;
1477 
1478  const Function *Callee = CB->getCalledFunction();
1479  // Indirect calls and variadic argument functions not supported.
1480  if (!Callee || Callee->isVarArg())
1481  return false;
1482 
1483  const Function *Caller = CB->getCaller();
1484  // Support if we can share TOC base.
1485  return ST->getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(),
1486  Callee);
1487 }
i
i
Definition: README.txt:29
llvm::InstructionCost
Definition: InstructionCost.h:30
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::TargetTransformInfo::PSK_FastHardware
@ PSK_FastHardware
Definition: TargetTransformInfo.h:585
llvm::PPCTTIImpl::VSXRC
@ VSXRC
Definition: PPCTargetTransformInfo.h:96
llvm::ISD::STRICT_FSETCC
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:475
llvm::Type::isSized
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:283
llvm::BasicTTIImplBase< PPCTTIImpl >::DL
const DataLayout & DL
Definition: TargetTransformInfoImpl.h:38
llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition: TargetTransformInfo.h:474
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:218
llvm::PPCSubtarget::hasPOPCNTD
POPCNTDKind hasPOPCNTD() const
Definition: PPCSubtarget.h:361
llvm::PPCTTIImpl::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
Definition: PPCTargetTransformInfo.cpp:798
llvm::TargetTransformInfo::LSRCost::NumRegs
unsigned NumRegs
Definition: TargetTransformInfo.h:407
llvm::BasicTTIImplBase< PPCTTIImpl >::shouldBuildRelLookupTables
bool shouldBuildRelLookupTables() const
Definition: BasicTTIImpl.h:474
llvm::ISD::UMULO
@ UMULO
Definition: ISDOpcodes.h:332
llvm::ISD::STRICT_FSQRT
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:411
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::PPC::DIR_440
@ DIR_440
Definition: PPCSubtarget.h:43
llvm::none_of
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1748
llvm::LoopBase::getExitBlocks
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
Definition: LoopInfoImpl.h:64
llvm::PPCTTIImpl::getMaxInterleaveFactor
unsigned getMaxInterleaveFactor(unsigned VF)
Definition: PPCTargetTransformInfo.cpp:916
llvm::PPCTTIImpl::getIntImmCost
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
Definition: PPCTargetTransformInfo.cpp:167
llvm::HardwareLoopInfo::LoopDecrement
Value * LoopDecrement
Definition: TargetTransformInfo.h:105
InstCombiner.h
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:720
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::InstCombiner::getDominatorTree
DominatorTree & getDominatorTree() const
Definition: InstCombiner.h:370
PHI
Rewrite undef for PHI
Definition: AMDGPURewriteUndefForPHI.cpp:101
llvm::InlineAsm::ConstraintInfoVector
std::vector< ConstraintInfo > ConstraintInfoVector
Definition: InlineAsm.h:120
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:237
llvm::TargetTransformInfoImplBase::isLSRCostLess
bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const
Definition: TargetTransformInfoImpl.h:217
ceil
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g ceil
Definition: README-FPStack.txt:54
llvm::ISD::FMINNUM
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:943
llvm::ISD::STRICT_FMAXNUM
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:423
llvm::PPCTTIImpl::getIntImmCostIntrin
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
Definition: PPCTargetTransformInfo.cpp:197
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
llvm::PPCTTIImpl::isLSRCostLess
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
Definition: PPCTargetTransformInfo.cpp:1313
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:139
llvm::TargetTransformInfo::PopcntSupportKind
PopcntSupportKind
Flags indicating the kind of support for population count.
Definition: TargetTransformInfo.h:585
llvm::ISD::STRICT_FMINNUM
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:424
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
llvm::TargetTransformInfoImplCRTPBase< PPCTTIImpl >::getInstructionCost
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
Definition: TargetTransformInfoImpl.h:1017
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:328
llvm::TLSModel::GeneralDynamic
@ GeneralDynamic
Definition: CodeGen.h:43
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::InstCombiner::Builder
BuilderTy & Builder
Definition: InstCombiner.h:58
llvm::CodeMetrics
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:31
llvm::PPCTTIImpl::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: PPCTargetTransformInfo.cpp:1035
llvm::PPCSubtarget::hasP8Vector
bool hasP8Vector() const
Definition: PPCSubtarget.h:283
llvm::PPCSubtarget::isLittleEndian
bool isLittleEndian() const
Definition: PPCSubtarget.h:263
llvm::CastInst::Create
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Definition: Instructions.cpp:3341
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::TargetTransformInfo::RGK_Scalar
@ RGK_Scalar
Definition: TargetTransformInfo.h:967
llvm::PPCSubtarget::hasVSX
bool hasVSX() const
Definition: PPCSubtarget.h:281
llvm::PPC::DIR_E5500
@ DIR_E5500
Definition: PPCSubtarget.h:53
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
DisablePPCConstHoist
static cl::opt< bool > DisablePPCConstHoist("disable-ppc-constant-hoisting", cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden)
llvm::HardwareLoopInfo::ExitBranch
BranchInst * ExitBranch
Definition: TargetTransformInfo.h:102
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:482
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::TargetTransformInfo::LSRCost::NumIVMuls
unsigned NumIVMuls
Definition: TargetTransformInfo.h:409
llvm::isShiftedMask_32
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:446
llvm::PPCSubtarget::hasP9Vector
bool hasP9Vector() const
Definition: PPCSubtarget.h:286
llvm::PPC::DIR_PWR7
@ DIR_PWR7
Definition: PPCSubtarget.h:60
llvm::HardwareLoopInfo::isHardwareLoopCandidate
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Definition: TargetTransformInfo.cpp:106
llvm::TargetTransformInfo::UnrollingPreferences::Partial
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Definition: TargetTransformInfo.h:470
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:529
llvm::getOrEnforceKnownAlignment
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1418
llvm::PPCTTIImpl::supportsTailCallFor
bool supportsTailCallFor(const CallBase *CB) const
Definition: PPCTargetTransformInfo.cpp:1473
Vector
So we should use XX3Form_Rcr to implement intrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store Vector
Definition: README_P9.txt:497
llvm::PPCTTIImpl::vectorCostAdjustmentFactor
InstructionCost vectorCostAdjustmentFactor(unsigned Opcode, Type *Ty1, Type *Ty2)
Definition: PPCTargetTransformInfo.cpp:951
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::PPCTTIImpl::isHardwareLoopProfitable
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
Definition: PPCTargetTransformInfo.cpp:720
llvm::PPCSubtarget::getTargetMachine
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:228
llvm::IRBuilderBase::CreateInsertElement
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2336
llvm::PPCTTIImpl::getRegisterClassName
const char * getRegisterClassName(unsigned ClassID) const
Definition: PPCTargetTransformInfo.cpp:872
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:119
llvm::Intrinsic::not_intrinsic
@ not_intrinsic
Definition: Intrinsics.h:45
llvm::TargetTransformInfoImplBase::getIntImmCost
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
Definition: TargetTransformInfoImpl.h:395
llvm::PPC::DIR_PWR9
@ DIR_PWR9
Definition: PPCSubtarget.h:62
llvm::PPCSubtarget::vectorsUseTwoUnits
bool vectorsUseTwoUnits() const
Definition: PPCSubtarget.h:306
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:458
llvm::MVT::v2f64
@ v2f64
Definition: MachineValueType.h:190
llvm::TargetTransformInfo::OperandValueInfo
Definition: TargetTransformInfo.h:926
llvm::TargetTransformInfo::UnrollingPreferences::AllowExpensiveTripCount
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
Definition: TargetTransformInfo.h:479
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
llvm::TargetTransformInfo::LSRCost::Insns
unsigned Insns
TODO: Some of these could be merged.
Definition: TargetTransformInfo.h:406
llvm::PPCTTIImpl::isNumRegsMajorCostOfLSR
bool isNumRegsMajorCostOfLSR()
Definition: PPCTargetTransformInfo.cpp:1326
llvm::CastInst::getDestTy
Type * getDestTy() const
Return the destination type, as a convenience.
Definition: InstrTypes.h:684
F
#define F(x, y, z)
Definition: MD5.cpp:55
KnownBits.h
llvm::LoopBase::block_end
block_iterator block_end() const
Definition: LoopInfo.h:194
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::TargetTransformInfo::LSRCost::AddRecCost
unsigned AddRecCost
Definition: TargetTransformInfo.h:408
floor
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g floor
Definition: README-FPStack.txt:54
llvm::ISD::STRICT_FROUND
@ STRICT_FROUND
Definition: ISDOpcodes.h:427
llvm::Reloc::Model
Model
Definition: CodeGen.h:22
llvm::AArch64CC::LT
@ LT
Definition: AArch64BaseInfo.h:266
llvm::TargetSchedModel::init
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
Definition: TargetSchedule.cpp:47
llvm::PPC::DIR_A2
@ DIR_A2
Definition: PPCSubtarget.h:50
llvm::TargetTransformInfo::LSRCost::SetupCost
unsigned SetupCost
Definition: TargetTransformInfo.h:412
llvm::ISD::FFLOOR
@ FFLOOR
Definition: ISDOpcodes.h:930
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::PPCTTIImpl::GPRRC
@ GPRRC
Definition: PPCTargetTransformInfo.h:96
CommandLine.h
CodeMetrics.h
TargetLowering.h
llvm::PPCTTIImpl::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
Definition: PPCTargetTransformInfo.cpp:1144
llvm::BasicTTIImplBase< PPCTTIImpl >::getTypeLegalizationCost
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
Definition: BasicTTIImpl.h:789
llvm::TargetLoweringBase::isOperationLegalOrCustom
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Definition: TargetLowering.h:1184
llvm::BasicTTIImplBase< PPCTTIImpl >::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
Definition: BasicTTIImpl.h:525
llvm::TargetSchedModel::getIssueWidth
unsigned getIssueWidth() const
Maximum number of micro-ops that may be scheduled per cycle.
Definition: TargetSchedule.h:98
llvm::ISD::STRICT_FRINT
@ STRICT_FRINT
Definition: ISDOpcodes.h:421
llvm::BasicTTIImplBase< PPCTTIImpl >::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:1187
InlinePriorityMode::Cost
@ Cost
llvm::TargetTransformInfo::ShuffleKind
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Definition: TargetTransformInfo.h:889
llvm::PPCTTIImpl::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned ClassID) const
Definition: PPCTargetTransformInfo.cpp:846
llvm::TargetTransformInfo::CastContextHint
CastContextHint
Represents a hint about the context in which a cast is used.
Definition: TargetTransformInfo.h:1142
llvm::User
Definition: User.h:44
llvm::LibFunc
LibFunc
Definition: TargetLibraryInfo.h:36
llvm::ISD::STRICT_FNEARBYINT
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:422
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:34
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
LsrNoInsnsCost
static cl::opt< bool > LsrNoInsnsCost("ppc-lsr-no-insns-cost", cl::Hidden, cl::init(false), cl::desc("Do not add instruction count to lsr cost model"))
round
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:56
llvm::isShiftedMask_64
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:452
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1397
SI
@ SI
Definition: SIInstrInfo.cpp:7966
llvm::ISD::FROUND
@ FROUND
Definition: ISDOpcodes.h:928
llvm::PPCTargetLowering::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
Definition: PPCISelLowering.cpp:16871
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:195
llvm::ISD::LLROUND
@ LLROUND
Definition: ISDOpcodes.h:932
TargetLibraryInfo.h
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:246
llvm::PPCSubtarget::isISA3_0
bool isISA3_0() const
Definition: PPCSubtarget.h:340
llvm::InlineAsm::isInput
@ isInput
Definition: InlineAsm.h:95
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
llvm::BasicTTIImplBase< PPCTTIImpl >::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
Definition: BasicTTIImpl.h:825
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::TargetLibraryInfo::getLibFunc
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Definition: TargetLibraryInfo.h:298
llvm::Instruction
Definition: Instruction.h:42
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:189
llvm::TargetTransformInfoImplBase::getIntImmCostInst
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const
Definition: TargetTransformInfoImpl.h:400
llvm::PPCSubtarget::isPPC64
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
Definition: PPCSubtarget.cpp:256
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
llvm::ISD::FNEARBYINT
@ FNEARBYINT
Definition: ISDOpcodes.h:927
llvm::ISD::FRINT
@ FRINT
Definition: ISDOpcodes.h:926
llvm::LoopBase::getExitingBlocks
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:33
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1713
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
llvm::PPCTTIImpl::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: PPCTargetTransformInfo.cpp:814
llvm::AArch64PACKey::IA
@ IA
Definition: AArch64BaseInfo.h:819
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::PPCTTIImpl::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: PPCTargetTransformInfo.cpp:1073
llvm::ISD::SMULO
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:331
Align
uint64_t Align
Definition: ELFObjHandler.cpp:82
llvm::TargetTransformInfo::RGK_FixedWidthVector
@ RGK_FixedWidthVector
Definition: TargetTransformInfo.h:967
llvm::PPCTTIImpl::FPRRC
@ FPRRC
Definition: PPCTargetTransformInfo.h:96
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:155
llvm::CastInst::getSrcTy
Type * getSrcTy() const
Return the source type, as a convenience.
Definition: InstrTypes.h:682
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::BasicTTIImplBase< PPCTTIImpl >::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1137
llvm::BasicTTIImplBase< PPCTTIImpl >::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:969
llvm::LinearPolySize< TypeSize >::getFixed
static TypeSize getFixed(ScalarTy MinVal)
Definition: TypeSize.h:283
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
llvm::IntrinsicCostAttributes
Definition: TargetTransformInfo.h:120
llvm::CallBase::getCaller
Function * getCaller()
Helper to get the caller (the parent function).
Definition: Instructions.cpp:284
llvm::TargetTransformInfo::PSK_Software
@ PSK_Software
Definition: TargetTransformInfo.h:585
llvm::maxnum
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1322
llvm::PPCTTIImpl::getRegisterBitWidth
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
Definition: PPCTargetTransformInfo.cpp:886
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:74
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::LoopBase::block_begin
block_iterator block_begin() const
Definition: LoopInfo.h:193
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:210
llvm::ScalarEvolution::getSmallConstantTripCount
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
Definition: ScalarEvolution.cpp:8016
llvm::InlineAsm
Definition: InlineAsm.h:33
llvm::ISD::STRICT_FSETCCS
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:476
llvm::cl::opt< bool >
llvm::LoopBase< BasicBlock, Loop >::block_iterator
ArrayRef< BasicBlock * >::const_iterator block_iterator
Definition: LoopInfo.h:192
llvm::TargetLoweringBase::getMinimumJumpTableEntries
virtual unsigned getMinimumJumpTableEntries() const
Return lower limit for number of blocks in a jump table.
Definition: TargetLoweringBase.cpp:1996
llvm::PPC::DIR_PWR10
@ DIR_PWR10
Definition: PPCSubtarget.h:63
llvm::IRBuilderBase::CreateBitCast
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1991
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:298
llvm::PPC::DIR_E500mc
@ DIR_E500mc
Definition: PPCSubtarget.h:52
llvm::PPCTTIImpl::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Definition: PPCTargetTransformInfo.cpp:1266
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
TargetSchedule.h
llvm::MVT::v16i8
@ v16i8
Definition: MachineValueType.h:89
llvm::PPCSubtarget::POPCNTD_Unavailable
@ POPCNTD_Unavailable
Definition: PPCSubtarget.h:74
llvm::PointerType::getUnqual
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:651
llvm::InstCombiner::getAssumptionCache
AssumptionCache & getAssumptionCache() const
Definition: InstCombiner.h:368
llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition: TargetSchedule.h:30
llvm::MVT::v2i64
@ v2i64
Definition: MachineValueType.h:131
uint64_t
llvm::TLSModel::LocalDynamic
@ LocalDynamic
Definition: CodeGen.h:44
llvm::TargetTransformInfo::LSRCost
Definition: TargetTransformInfo.h:403
llvm::InstCombiner::getDataLayout
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:371
llvm::PPCSubtarget::hasP9Altivec
bool hasP9Altivec() const
Definition: PPCSubtarget.h:287
llvm::PPCTTIImpl::instCombineIntrinsic
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Definition: PPCTargetTransformInfo.cpp:65
llvm::PPCTTIImpl::getPopcntSupport
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
Definition: PPCTargetTransformInfo.cpp:56
llvm::IRBuilderBase::getInt32
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:472
llvm::ISD::STRICT_LRINT
@ STRICT_LRINT
Definition: ISDOpcodes.h:432
llvm::TargetLoweringBase::getMaxExpandSizeMemcmp
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
Definition: TargetLowering.h:1706
llvm::DataLayout::isLittleEndian
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:244
ProfDataUtils.h
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
llvm::PPCTTIImpl::getTgtMemIntrinsic
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
Definition: PPCTargetTransformInfo.cpp:1338
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:534
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:417
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::ISD::LRINT
@ LRINT
Definition: ISDOpcodes.h:933
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
Metrics
Machine Trace Metrics
Definition: MachineTraceMetrics.cpp:53
llvm::Type::isHalfTy
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:142
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::MVT::v4f32
@ v4f32
Definition: MachineValueType.h:170
llvm::InlineAsm::ConstraintInfo
Definition: InlineAsm.h:122
llvm::ISD::STRICT_LROUND
@ STRICT_LROUND
Definition: ISDOpcodes.h:430
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
llvm::PPCTTIImpl::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: PPCTargetTransformInfo.cpp:1231
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
llvm::PPCTTIImpl::shouldBuildRelLookupTables
bool shouldBuildRelLookupTables() const
Definition: PPCTargetTransformInfo.cpp:1330
llvm::PPCTTIImpl::enableInterleavedAccessVectorization
bool enableInterleavedAccessVectorization()
Definition: PPCTargetTransformInfo.cpp:842
llvm::IRBuilderBase::CreateExtractElement
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2324
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:60
llvm::TargetTransformInfo::LSRCost::ScaleCost
unsigned ScaleCost
Definition: TargetTransformInfo.h:413
llvm::BasicTTIImplBase< PPCTTIImpl >::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: BasicTTIImpl.h:1291
llvm::BasicTTIImplBase< PPCTTIImpl >::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1231
llvm::TargetTransformInfo::PSK_SlowHardware
@ PSK_SlowHardware
Definition: TargetTransformInfo.h:585
SmallCTRLoopThreshold
static cl::opt< unsigned > SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden, cl::desc("Loops with a constant trip count smaller than " "this value will not use the count register."))
llvm::ISD::STRICT_LLRINT
@ STRICT_LLRINT
Definition: ISDOpcodes.h:433
llvm::PPCTTIImpl::useColdCCForColdCall
bool useColdCCForColdCall(Function &F)
Definition: PPCTargetTransformInfo.cpp:822
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::TargetTransformInfo::MemCmpExpansionOptions
Returns options for expansion of memcmp. IsZeroCmp is.
Definition: TargetTransformInfo.h:780
llvm::PPCSubtarget::hasDirectMove
bool hasDirectMove() const
Definition: PPCSubtarget.h:319
llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition: TargetTransformInfo.h:244
llvm::TargetTransformInfoImplBase::getVPMemoryOpCost
InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I) const
Definition: TargetTransformInfoImpl.h:612
llvm::PPCTTIImpl::getInstructionCost
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
Definition: PPCTargetTransformInfo.cpp:325
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1108
llvm::EVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:154
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
Mul
BinaryOperator * Mul
Definition: X86PartialReduction.cpp:70
llvm::ISD::STRICT_FTRUNC
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:429
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::PPCTTIImpl::getIntImmCostInst
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
Definition: PPCTargetTransformInfo.cpp:232
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
if
if(llvm_vc STREQUAL "") set(fake_version_inc "$
Definition: CMakeLists.txt:14
llvm::ISD::LLRINT
@ LLRINT
Definition: ISDOpcodes.h:934
EnablePPCColdCC
static cl::opt< bool > EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false), cl::desc("Enable using coldcc calling conv for cold " "internal functions"))
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
trunc
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g trunc
Definition: README-FPStack.txt:63
llvm::PPCTTIImpl::canSaveCmp
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo)
Definition: PPCTargetTransformInfo.cpp:1290
llvm::TargetLoweringBase::InstructionOpcodeToISD
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
Definition: TargetLoweringBase.cpp:1785
llvm::TargetMachine::shouldAssumeDSOLocal
bool shouldAssumeDSOLocal(const Module &M, const GlobalValue *GV) const
Definition: TargetMachine.cpp:88
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:430
llvm::PPCTTIImpl::getRegisterClassForType
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
Definition: PPCTargetTransformInfo.cpp:857
llvm::InstructionCost::isValid
bool isValid() const
Definition: InstructionCost.h:80
llvm::MVT::v4i32
@ v4i32
Definition: MachineValueType.h:112
llvm::BasicTTIImplBase< PPCTTIImpl >::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: BasicTTIImpl.h:597
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
llvm::PPCTTIImpl::getVPMemoryOpCost
InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: PPCTargetTransformInfo.cpp:1419
llvm::PPCTTIImpl::enableAggressiveInterleaving
bool enableAggressiveInterleaving(bool LoopHasReductions)
Definition: PPCTargetTransformInfo.cpp:826
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:187
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
llvm::ISD::FSQRT
@ FSQRT
Definition: ISDOpcodes.h:913
llvm::MCID::Select
@ Select
Definition: MCInstrDesc.h:164
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
llvm::minnum
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1311
llvm::ISD::STRICT_FMA
@ STRICT_FMA
Definition: ISDOpcodes.h:405
llvm::ISD::FMAXNUM
@ FMAXNUM
Definition: ISDOpcodes.h:944
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:240
llvm::TargetTransformInfo::LSRCost::NumBaseAdds
unsigned NumBaseAdds
Definition: TargetTransformInfo.h:410
llvm::Type::isFloatTy
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:153
llvm::PPCSubtarget::useSoftFloat
bool useSoftFloat() const
Definition: PPCSubtarget.h:247
llvm::TargetTransformInfoImplBase::getIntImmCostIntrin
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
Definition: TargetTransformInfoImpl.h:407
llvm::InstructionCost::getMax
static InstructionCost getMax()
Definition: InstructionCost.h:72
CostTable.h
llvm::EVT::getScalarType
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:295
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:924
llvm::Type::isIntOrIntVectorTy
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:216
llvm::MVT::v8i16
@ v8i16
Definition: MachineValueType.h:101
llvm::TypeSize
Definition: TypeSize.h:435
llvm::ISD::STRICT_FCEIL
@ STRICT_FCEIL
Definition: ISDOpcodes.h:425
llvm::BasicTTIImplBase< PPCTTIImpl >::getMaskedMemoryOpCost
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:1275
llvm::TargetLoweringBase::isOperationExpand
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
Definition: TargetLowering.h:1287
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:105
llvm::LinearPolySize< TypeSize >::getScalable
static TypeSize getScalable(ScalarTy MinVal)
Definition: TypeSize.h:286
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:226
llvm::Type::isDoubleTy
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:156
llvm::PPCTTIImpl::hasActiveVectorLength
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
Definition: PPCTargetTransformInfo.cpp:1391
powi
This is blocked on not handling X *X *X powi(X, 3)(see note above). The issue is that we end up getting t
llvm::log2
static double log2(double V)
Definition: AMDGPULibCalls.cpp:794
llvm::Type::isPPC_FP128Ty
bool isPPC_FP128Ty() const
Return true if this is powerpc long double.
Definition: Type.h:165
llvm::TargetTransformInfo::LSRCost::ImmCost
unsigned ImmCost
Definition: TargetTransformInfo.h:411
llvm::TLSModel::Model
Model
Definition: CodeGen.h:42
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
llvm::PPCTargetMachine
Common code between 32-bit and 64-bit PowerPC targets.
Definition: PPCTargetMachine.h:26
llvm::InstCombiner
The core instruction combiner logic.
Definition: InstCombiner.h:45
PPCTargetTransformInfo.h
llvm::TargetTransformInfo::RGK_ScalableVector
@ RGK_ScalableVector
Definition: TargetTransformInfo.h:967
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
llvm::HardwareLoopInfo
Attributes of a target dependent hardware loop.
Definition: TargetTransformInfo.h:97
llvm::InstructionCost::getInvalid
static InstructionCost getInvalid(CostType Val=0)
Definition: InstructionCost.h:74
llvm::ISD::STRICT_FFLOOR
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:426
llvm::RISCVMatInt::Imm
@ Imm
Definition: RISCVMatInt.h:23
llvm::PPCTTIImpl::getPrefetchDistance
unsigned getPrefetchDistance() const override
Definition: PPCTargetTransformInfo.cpp:912
llvm::PPCTTIImpl::enableMemCmpExpansion
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
Definition: PPCTargetTransformInfo.cpp:835
memAddrUsesCTR
static bool memAddrUsesCTR(const Value *MemAddr, const PPCTargetMachine &TM, SmallPtrSetImpl< const Value * > &Visited)
Definition: PPCTargetTransformInfo.cpp:344
llvm::PPCSubtarget::isUsingPCRelativeCalls
bool isUsingPCRelativeCalls() const
Definition: PPCSubtarget.cpp:258
llvm::StringRef::equals_insensitive
bool equals_insensitive(StringRef RHS) const
Check for string equality, ignoring case.
Definition: StringRef.h:170
llvm::PPCSubtarget::hasAltivec
bool hasAltivec() const
Definition: PPCSubtarget.h:277
llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition: TargetTransformInfo.h:967
isMMAType
static bool isMMAType(Type *Ty)
Definition: PPCTargetTransformInfo.cpp:320
llvm::PPCTTIImpl::areTypesABICompatible
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
Definition: PPCTargetTransformInfo.cpp:1271
llvm::PPC::DIR_PWR8
@ DIR_PWR8
Definition: PPCSubtarget.h:61
DataType
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1342
llvm::BasicTTIImplBase< PPCTTIImpl >::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
Definition: BasicTTIImpl.h:1435
llvm::PPCTTIImpl::getCacheLineSize
unsigned getCacheLineSize() const override
Definition: PPCTargetTransformInfo.cpp:899
TargetTransformInfo.h
llvm::PPCTTIImpl::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: PPCTargetTransformInfo.cpp:1055
llvm::ISD::STRICT_LLROUND
@ STRICT_LLROUND
Definition: ISDOpcodes.h:431
llvm::Optional::value
constexpr const T & value() const &
Definition: Optional.h:281
llvm::HardwareLoopInfo::canAnalyze
bool canAnalyze(LoopInfo &LI)
Definition: TargetTransformInfo.cpp:51
llvm::PPCTTIImpl::getShuffleCost
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, Type *SubTp, ArrayRef< const Value * > Args=std::nullopt)
Definition: PPCTargetTransformInfo.cpp:1004
llvm::MemIntrinsicInfo
Information about a load/store intrinsic defined by the target.
Definition: TargetTransformInfo.h:72
llvm::ISD::LROUND
@ LROUND
Definition: ISDOpcodes.h:931
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1175
llvm::SmallPtrSetImpl< const Value * >
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1474
llvm::extractBranchWeights
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
Definition: ProfDataUtils.cpp:104
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::PPCSubtarget::getCPUDirective
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:207
llvm::TargetLibraryInfo::hasOptimizedCodeGen
bool hasOptimizedCodeGen(LibFunc F) const
Tests if the function is both available and a candidate for optimized code generation.
Definition: TargetLibraryInfo.h:347
llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition: TargetTransformInfo.h:245
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3276
llvm::PPC::DIR_PWR_FUTURE
@ DIR_PWR_FUTURE
Definition: PPCSubtarget.h:64
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
BasicTTIImpl.h
llvm::cl::desc
Definition: CommandLine.h:413
llvm::TargetLoweringBase::getValueType
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
Definition: TargetLowering.h:1497
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3132
llvm::ISD::FTRUNC
@ FTRUNC
Definition: ISDOpcodes.h:925
llvm::PPCTTIImpl::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: PPCTargetTransformInfo.cpp:1026
llvm::PPCTTIImpl::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
Definition: PPCTargetTransformInfo.cpp:982
llvm::PPCSubtarget::POPCNTD_Slow
@ POPCNTD_Slow
Definition: PPCSubtarget.h:75
llvm::ISD::INSERT_VECTOR_ELT
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:523
llvm::HardwareLoopInfo::CountType
IntegerType * CountType
Definition: TargetTransformInfo.h:104
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::TargetTransformInfo::TCK_RecipThroughput
@ TCK_RecipThroughput
Reciprocal throughput.
Definition: TargetTransformInfo.h:219
llvm::Directive
Definition: DirectiveEmitter.h:100
llvm::Type::isFP128Ty
bool isFP128Ty() const
Return true if this is 'fp128'.
Definition: Type.h:162
Debug.h
llvm::PPCTTIImpl::VRRC
@ VRRC
Definition: PPCTargetTransformInfo.h:96
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:164
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::PPCSubtarget::hasP10Vector
bool hasP10Vector() const
Definition: PPCSubtarget.h:288
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:39
llvm::TargetTransformInfoImplBase::areTypesABICompatible
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
Definition: TargetTransformInfoImpl.h:770