LLVM  3.7.0
PPCTargetTransformInfo.cpp
Go to the documentation of this file.
1 //===-- PPCTargetTransformInfo.cpp - PPC specific TTI ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "PPCTargetTransformInfo.h"
14 #include "llvm/Support/Debug.h"
15 #include "llvm/Target/CostTable.h"
17 using namespace llvm;
18 
19 #define DEBUG_TYPE "ppctti"
20 
21 static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
22 cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
23 
24 //===----------------------------------------------------------------------===//
25 //
26 // PPC cost model.
27 //
28 //===----------------------------------------------------------------------===//
29 
31 PPCTTIImpl::getPopcntSupport(unsigned TyWidth) {
32  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
33  if (ST->hasPOPCNTD() && TyWidth <= 64)
34  return TTI::PSK_FastHardware;
35  return TTI::PSK_Software;
36 }
37 
38 unsigned PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
40  return BaseT::getIntImmCost(Imm, Ty);
41 
42  assert(Ty->isIntegerTy());
43 
44  unsigned BitSize = Ty->getPrimitiveSizeInBits();
45  if (BitSize == 0)
46  return ~0U;
47 
48  if (Imm == 0)
49  return TTI::TCC_Free;
50 
51  if (Imm.getBitWidth() <= 64) {
52  if (isInt<16>(Imm.getSExtValue()))
53  return TTI::TCC_Basic;
54 
55  if (isInt<32>(Imm.getSExtValue())) {
56  // A constant that can be materialized using lis.
57  if ((Imm.getZExtValue() & 0xFFFF) == 0)
58  return TTI::TCC_Basic;
59 
60  return 2 * TTI::TCC_Basic;
61  }
62  }
63 
64  return 4 * TTI::TCC_Basic;
65 }
66 
67 unsigned PPCTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
68  const APInt &Imm, Type *Ty) {
70  return BaseT::getIntImmCost(IID, Idx, Imm, Ty);
71 
72  assert(Ty->isIntegerTy());
73 
74  unsigned BitSize = Ty->getPrimitiveSizeInBits();
75  if (BitSize == 0)
76  return ~0U;
77 
78  switch (IID) {
79  default:
80  return TTI::TCC_Free;
81  case Intrinsic::sadd_with_overflow:
82  case Intrinsic::uadd_with_overflow:
83  case Intrinsic::ssub_with_overflow:
84  case Intrinsic::usub_with_overflow:
85  if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue()))
86  return TTI::TCC_Free;
87  break;
88  case Intrinsic::experimental_stackmap:
89  if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
90  return TTI::TCC_Free;
91  break;
92  case Intrinsic::experimental_patchpoint_void:
93  case Intrinsic::experimental_patchpoint_i64:
94  if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
95  return TTI::TCC_Free;
96  break;
97  }
98  return PPCTTIImpl::getIntImmCost(Imm, Ty);
99 }
100 
101 unsigned PPCTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
102  const APInt &Imm, Type *Ty) {
104  return BaseT::getIntImmCost(Opcode, Idx, Imm, Ty);
105 
106  assert(Ty->isIntegerTy());
107 
108  unsigned BitSize = Ty->getPrimitiveSizeInBits();
109  if (BitSize == 0)
110  return ~0U;
111 
112  unsigned ImmIdx = ~0U;
113  bool ShiftedFree = false, RunFree = false, UnsignedFree = false,
114  ZeroFree = false;
115  switch (Opcode) {
116  default:
117  return TTI::TCC_Free;
118  case Instruction::GetElementPtr:
119  // Always hoist the base address of a GetElementPtr. This prevents the
120  // creation of new constants for every base constant that gets constant
121  // folded with the offset.
122  if (Idx == 0)
123  return 2 * TTI::TCC_Basic;
124  return TTI::TCC_Free;
125  case Instruction::And:
126  RunFree = true; // (for the rotate-and-mask instructions)
127  // Fallthrough...
128  case Instruction::Add:
129  case Instruction::Or:
130  case Instruction::Xor:
131  ShiftedFree = true;
132  // Fallthrough...
133  case Instruction::Sub:
134  case Instruction::Mul:
135  case Instruction::Shl:
136  case Instruction::LShr:
137  case Instruction::AShr:
138  ImmIdx = 1;
139  break;
140  case Instruction::ICmp:
141  UnsignedFree = true;
142  ImmIdx = 1;
143  // Fallthrough... (zero comparisons can use record-form instructions)
144  case Instruction::Select:
145  ZeroFree = true;
146  break;
147  case Instruction::PHI:
148  case Instruction::Call:
149  case Instruction::Ret:
150  case Instruction::Load:
151  case Instruction::Store:
152  break;
153  }
154 
155  if (ZeroFree && Imm == 0)
156  return TTI::TCC_Free;
157 
158  if (Idx == ImmIdx && Imm.getBitWidth() <= 64) {
159  if (isInt<16>(Imm.getSExtValue()))
160  return TTI::TCC_Free;
161 
162  if (RunFree) {
163  if (Imm.getBitWidth() <= 32 &&
164  (isShiftedMask_32(Imm.getZExtValue()) ||
166  return TTI::TCC_Free;
167 
168  if (ST->isPPC64() &&
169  (isShiftedMask_64(Imm.getZExtValue()) ||
171  return TTI::TCC_Free;
172  }
173 
174  if (UnsignedFree && isUInt<16>(Imm.getZExtValue()))
175  return TTI::TCC_Free;
176 
177  if (ShiftedFree && (Imm.getZExtValue() & 0xFFFF) == 0)
178  return TTI::TCC_Free;
179  }
180 
181  return PPCTTIImpl::getIntImmCost(Imm, Ty);
182 }
183 
186  if (ST->getDarwinDirective() == PPC::DIR_A2) {
187  // The A2 is in-order with a deep pipeline, and concatenation unrolling
188  // helps expose latency-hiding opportunities to the instruction scheduler.
189  UP.Partial = UP.Runtime = true;
190 
191  // We unroll a lot on the A2 (hundreds of instructions), and the benefits
192  // often outweigh the cost of a division to compute the trip count.
193  UP.AllowExpensiveTripCount = true;
194  }
195 
197 }
198 
199 bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
200  return LoopHasReductions;
201 }
202 
203 unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) {
204  if (Vector && !ST->hasAltivec() && !ST->hasQPX())
205  return 0;
206  return ST->hasVSX() ? 64 : 32;
207 }
208 
209 unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) {
210  if (Vector) {
211  if (ST->hasQPX()) return 256;
212  if (ST->hasAltivec()) return 128;
213  return 0;
214  }
215 
216  if (ST->isPPC64())
217  return 64;
218  return 32;
219 
220 }
221 
222 unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
223  unsigned Directive = ST->getDarwinDirective();
224  // The 440 has no SIMD support, but floating-point instructions
225  // have a 5-cycle latency, so unroll by 5x for latency hiding.
226  if (Directive == PPC::DIR_440)
227  return 5;
228 
229  // The A2 has no SIMD support, but floating-point instructions
230  // have a 6-cycle latency, so unroll by 6x for latency hiding.
231  if (Directive == PPC::DIR_A2)
232  return 6;
233 
234  // FIXME: For lack of any better information, do no harm...
235  if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500)
236  return 1;
237 
238  // For P7 and P8, floating-point instructions have a 6-cycle latency and
239  // there are two execution units, so unroll by 12x for latency hiding.
240  if (Directive == PPC::DIR_PWR7 ||
241  Directive == PPC::DIR_PWR8)
242  return 12;
243 
244  // For most things, modern systems have two execution units (and
245  // out-of-order execution).
246  return 2;
247 }
248 
250  unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
252  TTI::OperandValueProperties Opd2PropInfo) {
253  assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
254 
255  // Fallback to the default implementation.
256  return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
257  Opd1PropInfo, Opd2PropInfo);
258 }
259 
261  Type *SubTp) {
262  return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
263 }
264 
265 unsigned PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
266  assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
267 
268  return BaseT::getCastInstrCost(Opcode, Dst, Src);
269 }
270 
271 unsigned PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
272  Type *CondTy) {
273  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
274 }
275 
276 unsigned PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
277  unsigned Index) {
278  assert(Val->isVectorTy() && "This must be a vector type");
279 
280  int ISD = TLI->InstructionOpcodeToISD(Opcode);
281  assert(ISD && "Invalid opcode");
282 
283  if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
284  // Double-precision scalars are already located in index #0.
285  if (Index == 0)
286  return 0;
287 
288  return BaseT::getVectorInstrCost(Opcode, Val, Index);
289  } else if (ST->hasQPX() && Val->getScalarType()->isFloatingPointTy()) {
290  // Floating point scalars are already located in index #0.
291  if (Index == 0)
292  return 0;
293 
294  return BaseT::getVectorInstrCost(Opcode, Val, Index);
295  }
296 
297  // Estimated cost of a load-hit-store delay. This was obtained
298  // experimentally as a minimum needed to prevent unprofitable
299  // vectorization for the paq8p benchmark. It may need to be
300  // raised further if other unprofitable cases remain.
301  unsigned LHSPenalty = 2;
302  if (ISD == ISD::INSERT_VECTOR_ELT)
303  LHSPenalty += 7;
304 
305  // Vector element insert/extract with Altivec is very expensive,
306  // because they require store and reload with the attendant
307  // processor stall for load-hit-store. Until VSX is available,
308  // these need to be estimated as very costly.
309  if (ISD == ISD::EXTRACT_VECTOR_ELT ||
310  ISD == ISD::INSERT_VECTOR_ELT)
311  return LHSPenalty + BaseT::getVectorInstrCost(Opcode, Val, Index);
312 
313  return BaseT::getVectorInstrCost(Opcode, Val, Index);
314 }
315 
316 unsigned PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
317  unsigned Alignment,
318  unsigned AddressSpace) {
319  // Legalize the type.
320  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
321  assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
322  "Invalid Opcode");
323 
324  unsigned Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
325 
326  // VSX loads/stores support unaligned access.
327  if (ST->hasVSX()) {
328  if (LT.second == MVT::v2f64 || LT.second == MVT::v2i64)
329  return Cost;
330  }
331 
332  bool UnalignedAltivec =
333  Src->isVectorTy() &&
334  Src->getPrimitiveSizeInBits() >= LT.second.getSizeInBits() &&
335  LT.second.getSizeInBits() == 128 &&
336  Opcode == Instruction::Load;
337 
338  // PPC in general does not support unaligned loads and stores. They'll need
339  // to be decomposed based on the alignment factor.
340  unsigned SrcBytes = LT.second.getStoreSize();
341  if (SrcBytes && Alignment && Alignment < SrcBytes && !UnalignedAltivec) {
342  Cost += LT.first*(SrcBytes/Alignment-1);
343 
344  // For a vector type, there is also scalarization overhead (only for
345  // stores, loads are expanded using the vector-load + permutation sequence,
346  // which is much less expensive).
347  if (Src->isVectorTy() && Opcode == Instruction::Store)
348  for (int i = 0, e = Src->getVectorNumElements(); i < e; ++i)
350  }
351 
352  return Cost;
353 }
354 
bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:276
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
Definition: BasicTTIImpl.h:485
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1327
bool hasPOPCNTD() const
Definition: PPCSubtarget.h:230
Cost tables and simple lookup functions.
bool isDoubleTy() const
isDoubleTy - Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:146
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None)
bool hasAltivec() const
Definition: PPCSubtarget.h:221
unsigned getIntImmCost(const APInt &Imm, Type *Ty)
bool hasQPX() const
Definition: PPCSubtarget.h:223
bool isShiftedMask_32(uint32_t Value)
isShiftedMask_32 - This function returns true if the argument contains a non-empty sequence of ones w...
Definition: MathExtras.h:342
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src)
Definition: BasicTTIImpl.h:342
This file a TargetTransformInfo::Concept conforming object specific to the PPC target machine...
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
static ConstantInt * ExtractElement(Constant *V, Constant *Idx)
unsigned getMaxInterleaveFactor(unsigned VF)
static cl::opt< bool > DisablePPCConstHoist("disable-ppc-constant-hoisting", cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden)
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
PopcntSupportKind
Flags indicating the kind of support for population count.
bool isFloatingPointTy() const
isFloatingPointTy - Return true if this is one of the six floating point types
Definition: Type.h:159
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy)
Definition: BasicTTIImpl.h:440
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:325
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src)
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy)
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
bool isVectorTy() const
isVectorTy - True if this is an instance of VectorType.
Definition: Type.h:226
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1339
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
Definition: ISDOpcodes.h:267
APInt Or(const APInt &LHS, const APInt &RHS)
Bitwise OR function for APInt.
Definition: APInt.h:1895
APInt Xor(const APInt &LHS, const APInt &RHS)
Bitwise XOR function for APInt.
Definition: APInt.h:1900
Expected to fold away in lowering.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1273
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP)
unsigned getNumberOfRegisters(bool Vector)
bool isShiftedMask_64(uint64_t Value)
isShiftedMask_64 - This function returns true if the argument contains a non-empty sequence of ones w...
Definition: MathExtras.h:348
OperandValueProperties
Additional properties of an operand's values.
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
Definition: BasicTTIImpl.h:334
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:273
unsigned getVectorNumElements() const
Definition: Type.cpp:212
AddressSpace
Definition: NVPTXBaseInfo.h:22
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
bool enableAggressiveInterleaving(bool LoopHasReductions)
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None)
Definition: BasicTTIImpl.h:285
Class for arbitrary precision integers.
Definition: APInt.h:73
bool isIntegerTy() const
isIntegerTy - True if this is an instance of IntegerType.
Definition: Type.h:193
const Type * getScalarType() const LLVM_READONLY
getScalarType - If this is a vector type, return the element type, otherwise return 'this'...
Definition: Type.cpp:51
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
Definition: APInt.h:1890
unsigned getIntImmCost(const APInt &Imm, Type *Ty)
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
Parameters that control the generic loop unrolling transformation.
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
bool hasVSX() const
Definition: PPCSubtarget.h:224
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:478
bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:272
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
The cost of a typical 'add' instruction.
const ARM::ArchExtKind Kind
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
getPrimitiveSizeInBits - Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:121
bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:298
unsigned getDarwinDirective() const
getDarwinDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:152
unsigned getRegisterBitWidth(bool Vector)
bool isPowerOf2_32(uint32_t Value)
isPowerOf2_32 - This function returns true if the argument is a power of two > 0. ...
Definition: MathExtras.h:354
OperandValueKind
Additional information about an operand's possible values.
This pass exposes codegen information to IR-level passes.
void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP)
Definition: BasicTTIImpl.h:219
std::pair< unsigned, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
This file describes how to lower LLVM code to machine code.
ShuffleKind
The various kinds of shuffle patterns for vector queries.