LLVM  4.0.0
SystemZTargetTransformInfo.cpp
Go to the documentation of this file.
1 //===-- SystemZTargetTransformInfo.cpp - SystemZ-specific TTI -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a TargetTransformInfo analysis pass specific to the
11 // SystemZ target machine. It uses the target's detailed information to provide
12 // more precise answers to certain TTI queries, while letting the target
13 // independent and default TTI implementations handle the rest.
14 //
15 //===----------------------------------------------------------------------===//
16 
20 #include "llvm/IR/IntrinsicInst.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Target/CostTable.h"
24 using namespace llvm;
25 
26 #define DEBUG_TYPE "systemztti"
27 
28 //===----------------------------------------------------------------------===//
29 //
30 // SystemZ cost model.
31 //
32 //===----------------------------------------------------------------------===//
33 
35  assert(Ty->isIntegerTy());
36 
37  unsigned BitSize = Ty->getPrimitiveSizeInBits();
38  // There is no cost model for constants with a bit size of 0. Return TCC_Free
39  // here, so that constant hoisting will ignore this constant.
40  if (BitSize == 0)
41  return TTI::TCC_Free;
42  // No cost model for operations on integers larger than 64 bit implemented yet.
43  if (BitSize > 64)
44  return TTI::TCC_Free;
45 
46  if (Imm == 0)
47  return TTI::TCC_Free;
48 
49  if (Imm.getBitWidth() <= 64) {
50  // Constants loaded via lgfi.
51  if (isInt<32>(Imm.getSExtValue()))
52  return TTI::TCC_Basic;
53  // Constants loaded via llilf.
54  if (isUInt<32>(Imm.getZExtValue()))
55  return TTI::TCC_Basic;
56  // Constants loaded via llihf:
57  if ((Imm.getZExtValue() & 0xffffffff) == 0)
58  return TTI::TCC_Basic;
59 
60  return 2 * TTI::TCC_Basic;
61  }
62 
63  return 4 * TTI::TCC_Basic;
64 }
65 
66 int SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
67  const APInt &Imm, Type *Ty) {
68  assert(Ty->isIntegerTy());
69 
70  unsigned BitSize = Ty->getPrimitiveSizeInBits();
71  // There is no cost model for constants with a bit size of 0. Return TCC_Free
72  // here, so that constant hoisting will ignore this constant.
73  if (BitSize == 0)
74  return TTI::TCC_Free;
75  // No cost model for operations on integers larger than 64 bit implemented yet.
76  if (BitSize > 64)
77  return TTI::TCC_Free;
78 
79  switch (Opcode) {
80  default:
81  return TTI::TCC_Free;
82  case Instruction::GetElementPtr:
83  // Always hoist the base address of a GetElementPtr. This prevents the
84  // creation of new constants for every base constant that gets constant
85  // folded with the offset.
86  if (Idx == 0)
87  return 2 * TTI::TCC_Basic;
88  return TTI::TCC_Free;
89  case Instruction::Store:
90  if (Idx == 0 && Imm.getBitWidth() <= 64) {
91  // Any 8-bit immediate store can by implemented via mvi.
92  if (BitSize == 8)
93  return TTI::TCC_Free;
94  // 16-bit immediate values can be stored via mvhhi/mvhi/mvghi.
95  if (isInt<16>(Imm.getSExtValue()))
96  return TTI::TCC_Free;
97  }
98  break;
99  case Instruction::ICmp:
100  if (Idx == 1 && Imm.getBitWidth() <= 64) {
101  // Comparisons against signed 32-bit immediates implemented via cgfi.
102  if (isInt<32>(Imm.getSExtValue()))
103  return TTI::TCC_Free;
104  // Comparisons against unsigned 32-bit immediates implemented via clgfi.
105  if (isUInt<32>(Imm.getZExtValue()))
106  return TTI::TCC_Free;
107  }
108  break;
109  case Instruction::Add:
110  case Instruction::Sub:
111  if (Idx == 1 && Imm.getBitWidth() <= 64) {
112  // We use algfi/slgfi to add/subtract 32-bit unsigned immediates.
113  if (isUInt<32>(Imm.getZExtValue()))
114  return TTI::TCC_Free;
115  // Or their negation, by swapping addition vs. subtraction.
116  if (isUInt<32>(-Imm.getSExtValue()))
117  return TTI::TCC_Free;
118  }
119  break;
120  case Instruction::Mul:
121  if (Idx == 1 && Imm.getBitWidth() <= 64) {
122  // We use msgfi to multiply by 32-bit signed immediates.
123  if (isInt<32>(Imm.getSExtValue()))
124  return TTI::TCC_Free;
125  }
126  break;
127  case Instruction::Or:
128  case Instruction::Xor:
129  if (Idx == 1 && Imm.getBitWidth() <= 64) {
130  // Masks supported by oilf/xilf.
131  if (isUInt<32>(Imm.getZExtValue()))
132  return TTI::TCC_Free;
133  // Masks supported by oihf/xihf.
134  if ((Imm.getZExtValue() & 0xffffffff) == 0)
135  return TTI::TCC_Free;
136  }
137  break;
138  case Instruction::And:
139  if (Idx == 1 && Imm.getBitWidth() <= 64) {
140  // Any 32-bit AND operation can by implemented via nilf.
141  if (BitSize <= 32)
142  return TTI::TCC_Free;
143  // 64-bit masks supported by nilf.
144  if (isUInt<32>(~Imm.getZExtValue()))
145  return TTI::TCC_Free;
146  // 64-bit masks supported by nilh.
147  if ((Imm.getZExtValue() & 0xffffffff) == 0xffffffff)
148  return TTI::TCC_Free;
149  // Some 64-bit AND operations can be implemented via risbg.
150  const SystemZInstrInfo *TII = ST->getInstrInfo();
151  unsigned Start, End;
152  if (TII->isRxSBGMask(Imm.getZExtValue(), BitSize, Start, End))
153  return TTI::TCC_Free;
154  }
155  break;
156  case Instruction::Shl:
157  case Instruction::LShr:
158  case Instruction::AShr:
159  // Always return TCC_Free for the shift value of a shift instruction.
160  if (Idx == 1)
161  return TTI::TCC_Free;
162  break;
163  case Instruction::UDiv:
164  case Instruction::SDiv:
165  case Instruction::URem:
166  case Instruction::SRem:
167  case Instruction::Trunc:
168  case Instruction::ZExt:
169  case Instruction::SExt:
170  case Instruction::IntToPtr:
171  case Instruction::PtrToInt:
172  case Instruction::BitCast:
173  case Instruction::PHI:
174  case Instruction::Call:
175  case Instruction::Select:
176  case Instruction::Ret:
177  case Instruction::Load:
178  break;
179  }
180 
181  return SystemZTTIImpl::getIntImmCost(Imm, Ty);
182 }
183 
185  const APInt &Imm, Type *Ty) {
186  assert(Ty->isIntegerTy());
187 
188  unsigned BitSize = Ty->getPrimitiveSizeInBits();
189  // There is no cost model for constants with a bit size of 0. Return TCC_Free
190  // here, so that constant hoisting will ignore this constant.
191  if (BitSize == 0)
192  return TTI::TCC_Free;
193  // No cost model for operations on integers larger than 64 bit implemented yet.
194  if (BitSize > 64)
195  return TTI::TCC_Free;
196 
197  switch (IID) {
198  default:
199  return TTI::TCC_Free;
200  case Intrinsic::sadd_with_overflow:
201  case Intrinsic::uadd_with_overflow:
202  case Intrinsic::ssub_with_overflow:
203  case Intrinsic::usub_with_overflow:
204  // These get expanded to include a normal addition/subtraction.
205  if (Idx == 1 && Imm.getBitWidth() <= 64) {
206  if (isUInt<32>(Imm.getZExtValue()))
207  return TTI::TCC_Free;
208  if (isUInt<32>(-Imm.getSExtValue()))
209  return TTI::TCC_Free;
210  }
211  break;
212  case Intrinsic::smul_with_overflow:
213  case Intrinsic::umul_with_overflow:
214  // These get expanded to include a normal multiplication.
215  if (Idx == 1 && Imm.getBitWidth() <= 64) {
216  if (isInt<32>(Imm.getSExtValue()))
217  return TTI::TCC_Free;
218  }
219  break;
220  case Intrinsic::experimental_stackmap:
221  if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
222  return TTI::TCC_Free;
223  break;
224  case Intrinsic::experimental_patchpoint_void:
225  case Intrinsic::experimental_patchpoint_i64:
226  if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
227  return TTI::TCC_Free;
228  break;
229  }
230  return SystemZTTIImpl::getIntImmCost(Imm, Ty);
231 }
232 
235  assert(isPowerOf2_32(TyWidth) && "Type width must be power of 2");
236  if (ST->hasPopulationCount() && TyWidth <= 64)
237  return TTI::PSK_FastHardware;
238  return TTI::PSK_Software;
239 }
240 
243  // Find out if L contains a call, what the machine instruction count
244  // estimate is, and how many stores there are.
245  bool HasCall = false;
246  unsigned NumStores = 0;
247  for (auto &BB : L->blocks())
248  for (auto &I : *BB) {
249  if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) {
250  ImmutableCallSite CS(&I);
251  if (const Function *F = CS.getCalledFunction()) {
252  if (isLoweredToCall(F))
253  HasCall = true;
254  if (F->getIntrinsicID() == Intrinsic::memcpy ||
255  F->getIntrinsicID() == Intrinsic::memset)
256  NumStores++;
257  } else { // indirect call.
258  HasCall = true;
259  }
260  }
261  if (isa<StoreInst>(&I)) {
262  NumStores++;
263  Type *MemAccessTy = I.getOperand(0)->getType();
264  if((MemAccessTy->isIntegerTy() || MemAccessTy->isFloatingPointTy()) &&
265  (getDataLayout().getTypeSizeInBits(MemAccessTy) == 128))
266  NumStores++; // 128 bit fp/int stores get split.
267  }
268  }
269 
270  // The z13 processor will run out of store tags if too many stores
271  // are fed into it too quickly. Therefore make sure there are not
272  // too many stores in the resulting unrolled loop.
273  unsigned const Max = (NumStores ? (12 / NumStores) : UINT_MAX);
274 
275  if (HasCall) {
276  // Only allow full unrolling if loop has any calls.
277  UP.FullUnrollMaxCount = Max;
278  UP.MaxCount = 1;
279  return;
280  }
281 
282  UP.MaxCount = Max;
283  if (UP.MaxCount <= 1)
284  return;
285 
286  // Allow partial and runtime trip count unrolling.
287  UP.Partial = UP.Runtime = true;
288 
289  UP.PartialThreshold = 75;
291 
292  // Allow expensive instructions in the pre-header of the loop.
293  UP.AllowExpensiveTripCount = true;
294 
295  UP.Force = true;
296 }
297 
299  if (!Vector)
300  // Discount the stack pointer. Also leave out %r0, since it can't
301  // be used in an address.
302  return 14;
303  if (ST->hasVector())
304  return 32;
305  return 0;
306 }
307 
308 unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) {
309  if (!Vector)
310  return 64;
311  if (ST->hasVector())
312  return 128;
313  return 0;
314 }
315 
MachineLoop * L
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:315
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1309
Cost tables and simple lookup functions.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:271
const HexagonInstrInfo * TII
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
#define F(x, y, z)
Definition: MD5.cpp:51
unsigned getNumberOfRegisters(bool Vector)
PopcntSupportKind
Flags indicating the kind of support for population count.
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:160
void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP)
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:143
constexpr bool isPowerOf2_32(uint32_t Value)
isPowerOf2_32 - This function returns true if the argument is a power of two > 0. ...
Definition: MathExtras.h:399
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1321
APInt Or(const APInt &LHS, const APInt &RHS)
Bitwise OR function for APInt.
Definition: APInt.h:1947
APInt Xor(const APInt &LHS, const APInt &RHS)
Bitwise XOR function for APInt.
Definition: APInt.h:1952
Expected to fold away in lowering.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1255
static const unsigned End
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
bool isRxSBGMask(uint64_t Mask, unsigned BitSize, unsigned &Start, unsigned &End) const
const SystemZInstrInfo * getInstrInfo() const override
bool hasPopulationCount() const
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:274
unsigned getRegisterBitWidth(bool Vector)
int getIntImmCost(const APInt &Imm, Type *Ty)
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Class for arbitrary precision integers.
Definition: APInt.h:77
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:195
const DataLayout & getDataLayout() const
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
Definition: APInt.h:1942
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:368
Parameters that control the generic loop unrolling transformation.
ImmutableCallSite - establish a view to a call site for examination.
Definition: CallSite.h:665
#define I(x, y, z)
Definition: MD5.cpp:54
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
The cost of a typical 'add' instruction.
FunTy * getCalledFunction() const
getCalledFunction - Return the function being called if this is a direct call, otherwise return null ...
Definition: CallSite.h:110
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:108
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:533
This pass exposes codegen information to IR-level passes.
This file describes how to lower LLVM code to machine code.