File: | llvm/include/llvm/Analysis/TargetTransformInfoImpl.h |
Warning: | line 81, column 25 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===// | ||||||||||||||
2 | // | ||||||||||||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||||||||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||||||||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||||||||||
6 | // | ||||||||||||||
7 | //===----------------------------------------------------------------------===// | ||||||||||||||
8 | // | ||||||||||||||
9 | // \file | ||||||||||||||
10 | // This file implements a TargetTransformInfo analysis pass specific to the | ||||||||||||||
11 | // AMDGPU target machine. It uses the target's detailed information to provide | ||||||||||||||
12 | // more precise answers to certain TTI queries, while letting the target | ||||||||||||||
13 | // independent and default TTI implementations handle the rest. | ||||||||||||||
14 | // | ||||||||||||||
15 | //===----------------------------------------------------------------------===// | ||||||||||||||
16 | |||||||||||||||
17 | #include "AMDGPUTargetTransformInfo.h" | ||||||||||||||
18 | #include "AMDGPUSubtarget.h" | ||||||||||||||
19 | #include "Utils/AMDGPUBaseInfo.h" | ||||||||||||||
20 | #include "llvm/ADT/STLExtras.h" | ||||||||||||||
21 | #include "llvm/Analysis/LoopInfo.h" | ||||||||||||||
22 | #include "llvm/Analysis/TargetTransformInfo.h" | ||||||||||||||
23 | #include "llvm/Analysis/ValueTracking.h" | ||||||||||||||
24 | #include "llvm/CodeGen/ISDOpcodes.h" | ||||||||||||||
25 | #include "llvm/CodeGen/ValueTypes.h" | ||||||||||||||
26 | #include "llvm/IR/Argument.h" | ||||||||||||||
27 | #include "llvm/IR/Attributes.h" | ||||||||||||||
28 | #include "llvm/IR/BasicBlock.h" | ||||||||||||||
29 | #include "llvm/IR/CallingConv.h" | ||||||||||||||
30 | #include "llvm/IR/DataLayout.h" | ||||||||||||||
31 | #include "llvm/IR/DerivedTypes.h" | ||||||||||||||
32 | #include "llvm/IR/Function.h" | ||||||||||||||
33 | #include "llvm/IR/Instruction.h" | ||||||||||||||
34 | #include "llvm/IR/Instructions.h" | ||||||||||||||
35 | #include "llvm/IR/IntrinsicInst.h" | ||||||||||||||
36 | #include "llvm/IR/Module.h" | ||||||||||||||
37 | #include "llvm/IR/PatternMatch.h" | ||||||||||||||
38 | #include "llvm/IR/Type.h" | ||||||||||||||
39 | #include "llvm/IR/Value.h" | ||||||||||||||
40 | #include "llvm/MC/SubtargetFeature.h" | ||||||||||||||
41 | #include "llvm/Support/Casting.h" | ||||||||||||||
42 | #include "llvm/Support/CommandLine.h" | ||||||||||||||
43 | #include "llvm/Support/Debug.h" | ||||||||||||||
44 | #include "llvm/Support/ErrorHandling.h" | ||||||||||||||
45 | #include "llvm/Support/MachineValueType.h" | ||||||||||||||
46 | #include "llvm/Support/raw_ostream.h" | ||||||||||||||
47 | #include "llvm/Target/TargetMachine.h" | ||||||||||||||
48 | #include <algorithm> | ||||||||||||||
49 | #include <cassert> | ||||||||||||||
50 | #include <limits> | ||||||||||||||
51 | #include <utility> | ||||||||||||||
52 | |||||||||||||||
53 | using namespace llvm; | ||||||||||||||
54 | |||||||||||||||
55 | #define DEBUG_TYPE"AMDGPUtti" "AMDGPUtti" | ||||||||||||||
56 | |||||||||||||||
57 | static cl::opt<unsigned> UnrollThresholdPrivate( | ||||||||||||||
58 | "amdgpu-unroll-threshold-private", | ||||||||||||||
59 | cl::desc("Unroll threshold for AMDGPU if private memory used in a loop"), | ||||||||||||||
60 | cl::init(2700), cl::Hidden); | ||||||||||||||
61 | |||||||||||||||
62 | static cl::opt<unsigned> UnrollThresholdLocal( | ||||||||||||||
63 | "amdgpu-unroll-threshold-local", | ||||||||||||||
64 | cl::desc("Unroll threshold for AMDGPU if local memory used in a loop"), | ||||||||||||||
65 | cl::init(1000), cl::Hidden); | ||||||||||||||
66 | |||||||||||||||
67 | static cl::opt<unsigned> UnrollThresholdIf( | ||||||||||||||
68 | "amdgpu-unroll-threshold-if", | ||||||||||||||
69 | cl::desc("Unroll threshold increment for AMDGPU for each if statement inside loop"), | ||||||||||||||
70 | cl::init(150), cl::Hidden); | ||||||||||||||
71 | |||||||||||||||
72 | static cl::opt<bool> UnrollRuntimeLocal( | ||||||||||||||
73 | "amdgpu-unroll-runtime-local", | ||||||||||||||
74 | cl::desc("Allow runtime unroll for AMDGPU if local memory used in a loop"), | ||||||||||||||
75 | cl::init(true), cl::Hidden); | ||||||||||||||
76 | |||||||||||||||
77 | static cl::opt<bool> UseLegacyDA( | ||||||||||||||
78 | "amdgpu-use-legacy-divergence-analysis", | ||||||||||||||
79 | cl::desc("Enable legacy divergence analysis for AMDGPU"), | ||||||||||||||
80 | cl::init(false), cl::Hidden); | ||||||||||||||
81 | |||||||||||||||
82 | static bool dependsOnLocalPhi(const Loop *L, const Value *Cond, | ||||||||||||||
83 | unsigned Depth = 0) { | ||||||||||||||
84 | const Instruction *I = dyn_cast<Instruction>(Cond); | ||||||||||||||
85 | if (!I) | ||||||||||||||
86 | return false; | ||||||||||||||
87 | |||||||||||||||
88 | for (const Value *V : I->operand_values()) { | ||||||||||||||
89 | if (!L->contains(I)) | ||||||||||||||
90 | continue; | ||||||||||||||
91 | if (const PHINode *PHI = dyn_cast<PHINode>(V)) { | ||||||||||||||
92 | if (llvm::none_of(L->getSubLoops(), [PHI](const Loop* SubLoop) { | ||||||||||||||
93 | return SubLoop->contains(PHI); })) | ||||||||||||||
94 | return true; | ||||||||||||||
95 | } else if (Depth < 10 && dependsOnLocalPhi(L, V, Depth+1)) | ||||||||||||||
96 | return true; | ||||||||||||||
97 | } | ||||||||||||||
98 | return false; | ||||||||||||||
99 | } | ||||||||||||||
100 | |||||||||||||||
101 | void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, | ||||||||||||||
102 | TTI::UnrollingPreferences &UP) { | ||||||||||||||
103 | const Function &F = *L->getHeader()->getParent(); | ||||||||||||||
104 | UP.Threshold = AMDGPU::getIntegerAttribute(F, "amdgpu-unroll-threshold", 300); | ||||||||||||||
105 | UP.MaxCount = std::numeric_limits<unsigned>::max(); | ||||||||||||||
106 | UP.Partial = true; | ||||||||||||||
107 | |||||||||||||||
108 | // TODO: Do we want runtime unrolling? | ||||||||||||||
109 | |||||||||||||||
110 | // Maximum alloca size than can fit registers. Reserve 16 registers. | ||||||||||||||
111 | const unsigned MaxAlloca = (256 - 16) * 4; | ||||||||||||||
112 | unsigned ThresholdPrivate = UnrollThresholdPrivate; | ||||||||||||||
113 | unsigned ThresholdLocal = UnrollThresholdLocal; | ||||||||||||||
114 | unsigned MaxBoost = std::max(ThresholdPrivate, ThresholdLocal); | ||||||||||||||
115 | for (const BasicBlock *BB : L->getBlocks()) { | ||||||||||||||
116 | const DataLayout &DL = BB->getModule()->getDataLayout(); | ||||||||||||||
117 | unsigned LocalGEPsSeen = 0; | ||||||||||||||
118 | |||||||||||||||
119 | if (llvm::any_of(L->getSubLoops(), [BB](const Loop* SubLoop) { | ||||||||||||||
120 | return SubLoop->contains(BB); })) | ||||||||||||||
121 | continue; // Block belongs to an inner loop. | ||||||||||||||
122 | |||||||||||||||
123 | for (const Instruction &I : *BB) { | ||||||||||||||
124 | // Unroll a loop which contains an "if" statement whose condition | ||||||||||||||
125 | // defined by a PHI belonging to the loop. This may help to eliminate | ||||||||||||||
126 | // if region and potentially even PHI itself, saving on both divergence | ||||||||||||||
127 | // and registers used for the PHI. | ||||||||||||||
128 | // Add a small bonus for each of such "if" statements. | ||||||||||||||
129 | if (const BranchInst *Br = dyn_cast<BranchInst>(&I)) { | ||||||||||||||
130 | if (UP.Threshold < MaxBoost && Br->isConditional()) { | ||||||||||||||
131 | BasicBlock *Succ0 = Br->getSuccessor(0); | ||||||||||||||
132 | BasicBlock *Succ1 = Br->getSuccessor(1); | ||||||||||||||
133 | if ((L->contains(Succ0) && L->isLoopExiting(Succ0)) || | ||||||||||||||
134 | (L->contains(Succ1) && L->isLoopExiting(Succ1))) | ||||||||||||||
135 | continue; | ||||||||||||||
136 | if (dependsOnLocalPhi(L, Br->getCondition())) { | ||||||||||||||
137 | UP.Threshold += UnrollThresholdIf; | ||||||||||||||
138 | LLVM_DEBUG(dbgs() << "Set unroll threshold " << UP.Thresholddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("AMDGPUtti")) { dbgs() << "Set unroll threshold " << UP.Threshold << " for loop:\n" << *L << " due to " << *Br << '\n'; } } while (false) | ||||||||||||||
139 | << " for loop:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("AMDGPUtti")) { dbgs() << "Set unroll threshold " << UP.Threshold << " for loop:\n" << *L << " due to " << *Br << '\n'; } } while (false) | ||||||||||||||
140 | << *L << " due to " << *Br << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("AMDGPUtti")) { dbgs() << "Set unroll threshold " << UP.Threshold << " for loop:\n" << *L << " due to " << *Br << '\n'; } } while (false); | ||||||||||||||
141 | if (UP.Threshold >= MaxBoost) | ||||||||||||||
142 | return; | ||||||||||||||
143 | } | ||||||||||||||
144 | } | ||||||||||||||
145 | continue; | ||||||||||||||
146 | } | ||||||||||||||
147 | |||||||||||||||
148 | const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I); | ||||||||||||||
149 | if (!GEP) | ||||||||||||||
150 | continue; | ||||||||||||||
151 | |||||||||||||||
152 | unsigned AS = GEP->getAddressSpace(); | ||||||||||||||
153 | unsigned Threshold = 0; | ||||||||||||||
154 | if (AS == AMDGPUAS::PRIVATE_ADDRESS) | ||||||||||||||
155 | Threshold = ThresholdPrivate; | ||||||||||||||
156 | else if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) | ||||||||||||||
157 | Threshold = ThresholdLocal; | ||||||||||||||
158 | else | ||||||||||||||
159 | continue; | ||||||||||||||
160 | |||||||||||||||
161 | if (UP.Threshold >= Threshold) | ||||||||||||||
162 | continue; | ||||||||||||||
163 | |||||||||||||||
164 | if (AS == AMDGPUAS::PRIVATE_ADDRESS) { | ||||||||||||||
165 | const Value *Ptr = GEP->getPointerOperand(); | ||||||||||||||
166 | const AllocaInst *Alloca = | ||||||||||||||
167 | dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr, DL)); | ||||||||||||||
168 | if (!Alloca || !Alloca->isStaticAlloca()) | ||||||||||||||
169 | continue; | ||||||||||||||
170 | Type *Ty = Alloca->getAllocatedType(); | ||||||||||||||
171 | unsigned AllocaSize = Ty->isSized() ? DL.getTypeAllocSize(Ty) : 0; | ||||||||||||||
172 | if (AllocaSize > MaxAlloca) | ||||||||||||||
173 | continue; | ||||||||||||||
174 | } else if (AS == AMDGPUAS::LOCAL_ADDRESS || | ||||||||||||||
175 | AS == AMDGPUAS::REGION_ADDRESS) { | ||||||||||||||
176 | LocalGEPsSeen++; | ||||||||||||||
177 | // Inhibit unroll for local memory if we have seen addressing not to | ||||||||||||||
178 | // a variable, most likely we will be unable to combine it. | ||||||||||||||
179 | // Do not unroll too deep inner loops for local memory to give a chance | ||||||||||||||
180 | // to unroll an outer loop for a more important reason. | ||||||||||||||
181 | if (LocalGEPsSeen > 1 || L->getLoopDepth() > 2 || | ||||||||||||||
182 | (!isa<GlobalVariable>(GEP->getPointerOperand()) && | ||||||||||||||
183 | !isa<Argument>(GEP->getPointerOperand()))) | ||||||||||||||
184 | continue; | ||||||||||||||
185 | LLVM_DEBUG(dbgs() << "Allow unroll runtime for loop:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("AMDGPUtti")) { dbgs() << "Allow unroll runtime for loop:\n" << *L << " due to LDS use.\n"; } } while (false) | ||||||||||||||
186 | << *L << " due to LDS use.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("AMDGPUtti")) { dbgs() << "Allow unroll runtime for loop:\n" << *L << " due to LDS use.\n"; } } while (false); | ||||||||||||||
187 | UP.Runtime = UnrollRuntimeLocal; | ||||||||||||||
188 | } | ||||||||||||||
189 | |||||||||||||||
190 | // Check if GEP depends on a value defined by this loop itself. | ||||||||||||||
191 | bool HasLoopDef = false; | ||||||||||||||
192 | for (const Value *Op : GEP->operands()) { | ||||||||||||||
193 | const Instruction *Inst = dyn_cast<Instruction>(Op); | ||||||||||||||
194 | if (!Inst || L->isLoopInvariant(Op)) | ||||||||||||||
195 | continue; | ||||||||||||||
196 | |||||||||||||||
197 | if (llvm::any_of(L->getSubLoops(), [Inst](const Loop* SubLoop) { | ||||||||||||||
198 | return SubLoop->contains(Inst); })) | ||||||||||||||
199 | continue; | ||||||||||||||
200 | HasLoopDef = true; | ||||||||||||||
201 | break; | ||||||||||||||
202 | } | ||||||||||||||
203 | if (!HasLoopDef) | ||||||||||||||
204 | continue; | ||||||||||||||
205 | |||||||||||||||
206 | // We want to do whatever we can to limit the number of alloca | ||||||||||||||
207 | // instructions that make it through to the code generator. allocas | ||||||||||||||
208 | // require us to use indirect addressing, which is slow and prone to | ||||||||||||||
209 | // compiler bugs. If this loop does an address calculation on an | ||||||||||||||
210 | // alloca ptr, then we want to use a higher than normal loop unroll | ||||||||||||||
211 | // threshold. This will give SROA a better chance to eliminate these | ||||||||||||||
212 | // allocas. | ||||||||||||||
213 | // | ||||||||||||||
214 | // We also want to have more unrolling for local memory to let ds | ||||||||||||||
215 | // instructions with different offsets combine. | ||||||||||||||
216 | // | ||||||||||||||
217 | // Don't use the maximum allowed value here as it will make some | ||||||||||||||
218 | // programs way too big. | ||||||||||||||
219 | UP.Threshold = Threshold; | ||||||||||||||
220 | LLVM_DEBUG(dbgs() << "Set unroll threshold " << Thresholddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("AMDGPUtti")) { dbgs() << "Set unroll threshold " << Threshold << " for loop:\n" << *L << " due to " << *GEP << '\n'; } } while (false) | ||||||||||||||
221 | << " for loop:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("AMDGPUtti")) { dbgs() << "Set unroll threshold " << Threshold << " for loop:\n" << *L << " due to " << *GEP << '\n'; } } while (false) | ||||||||||||||
222 | << *L << " due to " << *GEP << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("AMDGPUtti")) { dbgs() << "Set unroll threshold " << Threshold << " for loop:\n" << *L << " due to " << *GEP << '\n'; } } while (false); | ||||||||||||||
223 | if (UP.Threshold >= MaxBoost) | ||||||||||||||
224 | return; | ||||||||||||||
225 | } | ||||||||||||||
226 | } | ||||||||||||||
227 | } | ||||||||||||||
228 | |||||||||||||||
229 | unsigned GCNTTIImpl::getHardwareNumberOfRegisters(bool Vec) const { | ||||||||||||||
230 | // The concept of vector registers doesn't really exist. Some packed vector | ||||||||||||||
231 | // operations operate on the normal 32-bit registers. | ||||||||||||||
232 | return 256; | ||||||||||||||
233 | } | ||||||||||||||
234 | |||||||||||||||
235 | unsigned GCNTTIImpl::getNumberOfRegisters(bool Vec) const { | ||||||||||||||
236 | // This is really the number of registers to fill when vectorizing / | ||||||||||||||
237 | // interleaving loops, so we lie to avoid trying to use all registers. | ||||||||||||||
238 | return getHardwareNumberOfRegisters(Vec) >> 3; | ||||||||||||||
239 | } | ||||||||||||||
240 | |||||||||||||||
241 | unsigned GCNTTIImpl::getRegisterBitWidth(bool Vector) const { | ||||||||||||||
242 | return 32; | ||||||||||||||
243 | } | ||||||||||||||
244 | |||||||||||||||
245 | unsigned GCNTTIImpl::getMinVectorRegisterBitWidth() const { | ||||||||||||||
246 | return 32; | ||||||||||||||
247 | } | ||||||||||||||
248 | |||||||||||||||
249 | unsigned GCNTTIImpl::getLoadVectorFactor(unsigned VF, unsigned LoadSize, | ||||||||||||||
250 | unsigned ChainSizeInBytes, | ||||||||||||||
251 | VectorType *VecTy) const { | ||||||||||||||
252 | unsigned VecRegBitWidth = VF * LoadSize; | ||||||||||||||
253 | if (VecRegBitWidth > 128 && VecTy->getScalarSizeInBits() < 32) | ||||||||||||||
254 | // TODO: Support element-size less than 32bit? | ||||||||||||||
255 | return 128 / LoadSize; | ||||||||||||||
256 | |||||||||||||||
257 | return VF; | ||||||||||||||
258 | } | ||||||||||||||
259 | |||||||||||||||
260 | unsigned GCNTTIImpl::getStoreVectorFactor(unsigned VF, unsigned StoreSize, | ||||||||||||||
261 | unsigned ChainSizeInBytes, | ||||||||||||||
262 | VectorType *VecTy) const { | ||||||||||||||
263 | unsigned VecRegBitWidth = VF * StoreSize; | ||||||||||||||
264 | if (VecRegBitWidth > 128) | ||||||||||||||
265 | return 128 / StoreSize; | ||||||||||||||
266 | |||||||||||||||
267 | return VF; | ||||||||||||||
268 | } | ||||||||||||||
269 | |||||||||||||||
270 | unsigned GCNTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { | ||||||||||||||
271 | if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS || | ||||||||||||||
272 | AddrSpace == AMDGPUAS::CONSTANT_ADDRESS || | ||||||||||||||
273 | AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT || | ||||||||||||||
274 | AddrSpace == AMDGPUAS::BUFFER_FAT_POINTER) { | ||||||||||||||
275 | return 512; | ||||||||||||||
276 | } | ||||||||||||||
277 | |||||||||||||||
278 | if (AddrSpace == AMDGPUAS::FLAT_ADDRESS || | ||||||||||||||
279 | AddrSpace == AMDGPUAS::LOCAL_ADDRESS || | ||||||||||||||
280 | AddrSpace == AMDGPUAS::REGION_ADDRESS) | ||||||||||||||
281 | return 128; | ||||||||||||||
282 | |||||||||||||||
283 | if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) | ||||||||||||||
284 | return 8 * ST->getMaxPrivateElementSize(); | ||||||||||||||
285 | |||||||||||||||
286 | llvm_unreachable("unhandled address space")::llvm::llvm_unreachable_internal("unhandled address space", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp" , 286); | ||||||||||||||
287 | } | ||||||||||||||
288 | |||||||||||||||
289 | bool GCNTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, | ||||||||||||||
290 | unsigned Alignment, | ||||||||||||||
291 | unsigned AddrSpace) const { | ||||||||||||||
292 | // We allow vectorization of flat stores, even though we may need to decompose | ||||||||||||||
293 | // them later if they may access private memory. We don't have enough context | ||||||||||||||
294 | // here, and legalization can handle it. | ||||||||||||||
295 | if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) { | ||||||||||||||
296 | return (Alignment >= 4 || ST->hasUnalignedScratchAccess()) && | ||||||||||||||
297 | ChainSizeInBytes <= ST->getMaxPrivateElementSize(); | ||||||||||||||
298 | } | ||||||||||||||
299 | return true; | ||||||||||||||
300 | } | ||||||||||||||
301 | |||||||||||||||
302 | bool GCNTTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, | ||||||||||||||
303 | unsigned Alignment, | ||||||||||||||
304 | unsigned AddrSpace) const { | ||||||||||||||
305 | return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace); | ||||||||||||||
306 | } | ||||||||||||||
307 | |||||||||||||||
308 | bool GCNTTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, | ||||||||||||||
309 | unsigned Alignment, | ||||||||||||||
310 | unsigned AddrSpace) const { | ||||||||||||||
311 | return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace); | ||||||||||||||
312 | } | ||||||||||||||
313 | |||||||||||||||
314 | unsigned GCNTTIImpl::getMaxInterleaveFactor(unsigned VF) { | ||||||||||||||
315 | // Disable unrolling if the loop is not vectorized. | ||||||||||||||
316 | // TODO: Enable this again. | ||||||||||||||
317 | if (VF == 1) | ||||||||||||||
318 | return 1; | ||||||||||||||
319 | |||||||||||||||
320 | return 8; | ||||||||||||||
321 | } | ||||||||||||||
322 | |||||||||||||||
323 | bool GCNTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, | ||||||||||||||
324 | MemIntrinsicInfo &Info) const { | ||||||||||||||
325 | switch (Inst->getIntrinsicID()) { | ||||||||||||||
326 | case Intrinsic::amdgcn_atomic_inc: | ||||||||||||||
327 | case Intrinsic::amdgcn_atomic_dec: | ||||||||||||||
328 | case Intrinsic::amdgcn_ds_ordered_add: | ||||||||||||||
329 | case Intrinsic::amdgcn_ds_ordered_swap: | ||||||||||||||
330 | case Intrinsic::amdgcn_ds_fadd: | ||||||||||||||
331 | case Intrinsic::amdgcn_ds_fmin: | ||||||||||||||
332 | case Intrinsic::amdgcn_ds_fmax: { | ||||||||||||||
333 | auto *Ordering = dyn_cast<ConstantInt>(Inst->getArgOperand(2)); | ||||||||||||||
334 | auto *Volatile = dyn_cast<ConstantInt>(Inst->getArgOperand(4)); | ||||||||||||||
335 | if (!Ordering || !Volatile) | ||||||||||||||
336 | return false; // Invalid. | ||||||||||||||
337 | |||||||||||||||
338 | unsigned OrderingVal = Ordering->getZExtValue(); | ||||||||||||||
339 | if (OrderingVal > static_cast<unsigned>(AtomicOrdering::SequentiallyConsistent)) | ||||||||||||||
340 | return false; | ||||||||||||||
341 | |||||||||||||||
342 | Info.PtrVal = Inst->getArgOperand(0); | ||||||||||||||
343 | Info.Ordering = static_cast<AtomicOrdering>(OrderingVal); | ||||||||||||||
344 | Info.ReadMem = true; | ||||||||||||||
345 | Info.WriteMem = true; | ||||||||||||||
346 | Info.IsVolatile = !Volatile->isNullValue(); | ||||||||||||||
347 | return true; | ||||||||||||||
348 | } | ||||||||||||||
349 | default: | ||||||||||||||
350 | return false; | ||||||||||||||
351 | } | ||||||||||||||
352 | } | ||||||||||||||
353 | |||||||||||||||
354 | int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, | ||||||||||||||
355 | TTI::OperandValueKind Opd1Info, | ||||||||||||||
356 | TTI::OperandValueKind Opd2Info, | ||||||||||||||
357 | TTI::OperandValueProperties Opd1PropInfo, | ||||||||||||||
358 | TTI::OperandValueProperties Opd2PropInfo, | ||||||||||||||
359 | ArrayRef<const Value *> Args, | ||||||||||||||
360 | const Instruction *CxtI) { | ||||||||||||||
361 | EVT OrigTy = TLI->getValueType(DL, Ty); | ||||||||||||||
362 | if (!OrigTy.isSimple()) { | ||||||||||||||
363 | return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, | ||||||||||||||
364 | Opd1PropInfo, Opd2PropInfo); | ||||||||||||||
365 | } | ||||||||||||||
366 | |||||||||||||||
367 | // Legalize the type. | ||||||||||||||
368 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); | ||||||||||||||
369 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | ||||||||||||||
370 | |||||||||||||||
371 | // Because we don't have any legal vector operations, but the legal types, we | ||||||||||||||
372 | // need to account for split vectors. | ||||||||||||||
373 | unsigned NElts = LT.second.isVector() ? | ||||||||||||||
374 | LT.second.getVectorNumElements() : 1; | ||||||||||||||
375 | |||||||||||||||
376 | MVT::SimpleValueType SLT = LT.second.getScalarType().SimpleTy; | ||||||||||||||
377 | |||||||||||||||
378 | switch (ISD) { | ||||||||||||||
379 | case ISD::SHL: | ||||||||||||||
380 | case ISD::SRL: | ||||||||||||||
381 | case ISD::SRA: | ||||||||||||||
382 | if (SLT == MVT::i64) | ||||||||||||||
383 | return get64BitInstrCost() * LT.first * NElts; | ||||||||||||||
384 | |||||||||||||||
385 | if (ST->has16BitInsts() && SLT == MVT::i16) | ||||||||||||||
386 | NElts = (NElts + 1) / 2; | ||||||||||||||
387 | |||||||||||||||
388 | // i32 | ||||||||||||||
389 | return getFullRateInstrCost() * LT.first * NElts; | ||||||||||||||
390 | case ISD::ADD: | ||||||||||||||
391 | case ISD::SUB: | ||||||||||||||
392 | case ISD::AND: | ||||||||||||||
393 | case ISD::OR: | ||||||||||||||
394 | case ISD::XOR: | ||||||||||||||
395 | if (SLT == MVT::i64) { | ||||||||||||||
396 | // and, or and xor are typically split into 2 VALU instructions. | ||||||||||||||
397 | return 2 * getFullRateInstrCost() * LT.first * NElts; | ||||||||||||||
398 | } | ||||||||||||||
399 | |||||||||||||||
400 | if (ST->has16BitInsts() && SLT == MVT::i16) | ||||||||||||||
401 | NElts = (NElts + 1) / 2; | ||||||||||||||
402 | |||||||||||||||
403 | return LT.first * NElts * getFullRateInstrCost(); | ||||||||||||||
404 | case ISD::MUL: { | ||||||||||||||
405 | const int QuarterRateCost = getQuarterRateInstrCost(); | ||||||||||||||
406 | if (SLT == MVT::i64) { | ||||||||||||||
407 | const int FullRateCost = getFullRateInstrCost(); | ||||||||||||||
408 | return (4 * QuarterRateCost + (2 * 2) * FullRateCost) * LT.first * NElts; | ||||||||||||||
409 | } | ||||||||||||||
410 | |||||||||||||||
411 | if (ST->has16BitInsts() && SLT == MVT::i16) | ||||||||||||||
412 | NElts = (NElts + 1) / 2; | ||||||||||||||
413 | |||||||||||||||
414 | // i32 | ||||||||||||||
415 | return QuarterRateCost * NElts * LT.first; | ||||||||||||||
416 | } | ||||||||||||||
417 | case ISD::FADD: | ||||||||||||||
418 | case ISD::FSUB: | ||||||||||||||
419 | case ISD::FMUL: | ||||||||||||||
420 | if (SLT == MVT::f64) | ||||||||||||||
421 | return LT.first * NElts * get64BitInstrCost(); | ||||||||||||||
422 | |||||||||||||||
423 | if (ST->has16BitInsts() && SLT == MVT::f16) | ||||||||||||||
424 | NElts = (NElts + 1) / 2; | ||||||||||||||
425 | |||||||||||||||
426 | if (SLT == MVT::f32 || SLT == MVT::f16) | ||||||||||||||
427 | return LT.first * NElts * getFullRateInstrCost(); | ||||||||||||||
428 | break; | ||||||||||||||
429 | case ISD::FDIV: | ||||||||||||||
430 | case ISD::FREM: | ||||||||||||||
431 | // FIXME: frem should be handled separately. The fdiv in it is most of it, | ||||||||||||||
432 | // but the current lowering is also not entirely correct. | ||||||||||||||
433 | if (SLT == MVT::f64) { | ||||||||||||||
434 | int Cost = 4 * get64BitInstrCost() + 7 * getQuarterRateInstrCost(); | ||||||||||||||
435 | // Add cost of workaround. | ||||||||||||||
436 | if (!ST->hasUsableDivScaleConditionOutput()) | ||||||||||||||
437 | Cost += 3 * getFullRateInstrCost(); | ||||||||||||||
438 | |||||||||||||||
439 | return LT.first * Cost * NElts; | ||||||||||||||
440 | } | ||||||||||||||
441 | |||||||||||||||
442 | if (!Args.empty() && match(Args[0], PatternMatch::m_FPOne())) { | ||||||||||||||
443 | // TODO: This is more complicated, unsafe flags etc. | ||||||||||||||
444 | if ((SLT == MVT::f32 && !HasFP32Denormals) || | ||||||||||||||
445 | (SLT == MVT::f16 && ST->has16BitInsts())) { | ||||||||||||||
446 | return LT.first * getQuarterRateInstrCost() * NElts; | ||||||||||||||
447 | } | ||||||||||||||
448 | } | ||||||||||||||
449 | |||||||||||||||
450 | if (SLT == MVT::f16 && ST->has16BitInsts()) { | ||||||||||||||
451 | // 2 x v_cvt_f32_f16 | ||||||||||||||
452 | // f32 rcp | ||||||||||||||
453 | // f32 fmul | ||||||||||||||
454 | // v_cvt_f16_f32 | ||||||||||||||
455 | // f16 div_fixup | ||||||||||||||
456 | int Cost = 4 * getFullRateInstrCost() + 2 * getQuarterRateInstrCost(); | ||||||||||||||
457 | return LT.first * Cost * NElts; | ||||||||||||||
458 | } | ||||||||||||||
459 | |||||||||||||||
460 | if (SLT == MVT::f32 || SLT == MVT::f16) { | ||||||||||||||
461 | int Cost = 7 * getFullRateInstrCost() + 1 * getQuarterRateInstrCost(); | ||||||||||||||
462 | |||||||||||||||
463 | if (!HasFP32Denormals) { | ||||||||||||||
464 | // FP mode switches. | ||||||||||||||
465 | Cost += 2 * getFullRateInstrCost(); | ||||||||||||||
466 | } | ||||||||||||||
467 | |||||||||||||||
468 | return LT.first * NElts * Cost; | ||||||||||||||
469 | } | ||||||||||||||
470 | break; | ||||||||||||||
471 | default: | ||||||||||||||
472 | break; | ||||||||||||||
473 | } | ||||||||||||||
474 | |||||||||||||||
475 | return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, | ||||||||||||||
476 | Opd1PropInfo, Opd2PropInfo); | ||||||||||||||
477 | } | ||||||||||||||
478 | |||||||||||||||
479 | template <typename T> | ||||||||||||||
480 | int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, | ||||||||||||||
481 | ArrayRef<T *> Args, | ||||||||||||||
482 | FastMathFlags FMF, unsigned VF) { | ||||||||||||||
483 | if (ID != Intrinsic::fma) | ||||||||||||||
484 | return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF); | ||||||||||||||
485 | |||||||||||||||
486 | EVT OrigTy = TLI->getValueType(DL, RetTy); | ||||||||||||||
487 | if (!OrigTy.isSimple()) { | ||||||||||||||
488 | return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF); | ||||||||||||||
489 | } | ||||||||||||||
490 | |||||||||||||||
491 | // Legalize the type. | ||||||||||||||
492 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy); | ||||||||||||||
493 | |||||||||||||||
494 | unsigned NElts = LT.second.isVector() ? | ||||||||||||||
495 | LT.second.getVectorNumElements() : 1; | ||||||||||||||
496 | |||||||||||||||
497 | MVT::SimpleValueType SLT = LT.second.getScalarType().SimpleTy; | ||||||||||||||
498 | |||||||||||||||
499 | if (SLT == MVT::f64) | ||||||||||||||
500 | return LT.first * NElts * get64BitInstrCost(); | ||||||||||||||
501 | |||||||||||||||
502 | if (ST->has16BitInsts() && SLT == MVT::f16) | ||||||||||||||
503 | NElts = (NElts + 1) / 2; | ||||||||||||||
504 | |||||||||||||||
505 | return LT.first * NElts * (ST->hasFastFMAF32() ? getHalfRateInstrCost() | ||||||||||||||
506 | : getQuarterRateInstrCost()); | ||||||||||||||
507 | } | ||||||||||||||
508 | |||||||||||||||
509 | int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, | ||||||||||||||
510 | ArrayRef<Value*> Args, FastMathFlags FMF, | ||||||||||||||
511 | unsigned VF) { | ||||||||||||||
512 | return getIntrinsicInstrCost<Value>(ID, RetTy, Args, FMF, VF); | ||||||||||||||
513 | } | ||||||||||||||
514 | |||||||||||||||
515 | int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, | ||||||||||||||
516 | ArrayRef<Type *> Tys, FastMathFlags FMF, | ||||||||||||||
517 | unsigned ScalarizationCostPassed) { | ||||||||||||||
518 | return getIntrinsicInstrCost<Type>(ID, RetTy, Tys, FMF, | ||||||||||||||
519 | ScalarizationCostPassed); | ||||||||||||||
520 | } | ||||||||||||||
521 | |||||||||||||||
522 | unsigned GCNTTIImpl::getCFInstrCost(unsigned Opcode) { | ||||||||||||||
523 | // XXX - For some reason this isn't called for switch. | ||||||||||||||
524 | switch (Opcode) { | ||||||||||||||
525 | case Instruction::Br: | ||||||||||||||
526 | case Instruction::Ret: | ||||||||||||||
527 | return 10; | ||||||||||||||
528 | default: | ||||||||||||||
529 | return BaseT::getCFInstrCost(Opcode); | ||||||||||||||
530 | } | ||||||||||||||
531 | } | ||||||||||||||
532 | |||||||||||||||
533 | int GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *Ty, | ||||||||||||||
534 | bool IsPairwise) { | ||||||||||||||
535 | EVT OrigTy = TLI->getValueType(DL, Ty); | ||||||||||||||
536 | |||||||||||||||
537 | // Computes cost on targets that have packed math instructions(which support | ||||||||||||||
538 | // 16-bit types only). | ||||||||||||||
539 | if (IsPairwise || | ||||||||||||||
540 | !ST->hasVOP3PInsts() || | ||||||||||||||
541 | OrigTy.getScalarSizeInBits() != 16) | ||||||||||||||
542 | return BaseT::getArithmeticReductionCost(Opcode, Ty, IsPairwise); | ||||||||||||||
543 | |||||||||||||||
544 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); | ||||||||||||||
545 | return LT.first * getFullRateInstrCost(); | ||||||||||||||
546 | } | ||||||||||||||
547 | |||||||||||||||
548 | int GCNTTIImpl::getMinMaxReductionCost(Type *Ty, Type *CondTy, | ||||||||||||||
549 | bool IsPairwise, | ||||||||||||||
550 | bool IsUnsigned) { | ||||||||||||||
551 | EVT OrigTy = TLI->getValueType(DL, Ty); | ||||||||||||||
552 | |||||||||||||||
553 | // Computes cost on targets that have packed math instructions(which support | ||||||||||||||
554 | // 16-bit types only). | ||||||||||||||
555 | if (IsPairwise || | ||||||||||||||
556 | !ST->hasVOP3PInsts() || | ||||||||||||||
557 | OrigTy.getScalarSizeInBits() != 16) | ||||||||||||||
558 | return BaseT::getMinMaxReductionCost(Ty, CondTy, IsPairwise, IsUnsigned); | ||||||||||||||
559 | |||||||||||||||
560 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); | ||||||||||||||
561 | return LT.first * getHalfRateInstrCost(); | ||||||||||||||
562 | } | ||||||||||||||
563 | |||||||||||||||
564 | int GCNTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, | ||||||||||||||
565 | unsigned Index) { | ||||||||||||||
566 | switch (Opcode) { | ||||||||||||||
567 | case Instruction::ExtractElement: | ||||||||||||||
568 | case Instruction::InsertElement: { | ||||||||||||||
569 | unsigned EltSize | ||||||||||||||
570 | = DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType()); | ||||||||||||||
571 | if (EltSize < 32) { | ||||||||||||||
572 | if (EltSize == 16 && Index == 0 && ST->has16BitInsts()) | ||||||||||||||
573 | return 0; | ||||||||||||||
574 | return BaseT::getVectorInstrCost(Opcode, ValTy, Index); | ||||||||||||||
575 | } | ||||||||||||||
576 | |||||||||||||||
577 | // Extracts are just reads of a subregister, so are free. Inserts are | ||||||||||||||
578 | // considered free because we don't want to have any cost for scalarizing | ||||||||||||||
579 | // operations, and we don't have to copy into a different register class. | ||||||||||||||
580 | |||||||||||||||
581 | // Dynamic indexing isn't free and is best avoided. | ||||||||||||||
582 | return Index == ~0u ? 2 : 0; | ||||||||||||||
583 | } | ||||||||||||||
584 | default: | ||||||||||||||
585 | return BaseT::getVectorInstrCost(Opcode, ValTy, Index); | ||||||||||||||
586 | } | ||||||||||||||
587 | } | ||||||||||||||
588 | |||||||||||||||
589 | static bool isArgPassedInSGPR(const Argument *A) { | ||||||||||||||
590 | const Function *F = A->getParent(); | ||||||||||||||
591 | |||||||||||||||
592 | // Arguments to compute shaders are never a source of divergence. | ||||||||||||||
593 | CallingConv::ID CC = F->getCallingConv(); | ||||||||||||||
594 | switch (CC) { | ||||||||||||||
595 | case CallingConv::AMDGPU_KERNEL: | ||||||||||||||
596 | case CallingConv::SPIR_KERNEL: | ||||||||||||||
597 | return true; | ||||||||||||||
598 | case CallingConv::AMDGPU_VS: | ||||||||||||||
599 | case CallingConv::AMDGPU_LS: | ||||||||||||||
600 | case CallingConv::AMDGPU_HS: | ||||||||||||||
601 | case CallingConv::AMDGPU_ES: | ||||||||||||||
602 | case CallingConv::AMDGPU_GS: | ||||||||||||||
603 | case CallingConv::AMDGPU_PS: | ||||||||||||||
604 | case CallingConv::AMDGPU_CS: | ||||||||||||||
605 | // For non-compute shaders, SGPR inputs are marked with either inreg or byval. | ||||||||||||||
606 | // Everything else is in VGPRs. | ||||||||||||||
607 | return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) || | ||||||||||||||
608 | F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal); | ||||||||||||||
609 | default: | ||||||||||||||
610 | // TODO: Should calls support inreg for SGPR inputs? | ||||||||||||||
611 | return false; | ||||||||||||||
612 | } | ||||||||||||||
613 | } | ||||||||||||||
614 | |||||||||||||||
615 | /// Analyze if the results of inline asm are divergent. If \p Indices is empty, | ||||||||||||||
616 | /// this is analyzing the collective result of all output registers. Otherwise, | ||||||||||||||
617 | /// this is only querying a specific result index if this returns multiple | ||||||||||||||
618 | /// registers in a struct. | ||||||||||||||
619 | bool GCNTTIImpl::isInlineAsmSourceOfDivergence( | ||||||||||||||
620 | const CallInst *CI, ArrayRef<unsigned> Indices) const { | ||||||||||||||
621 | // TODO: Handle complex extract indices | ||||||||||||||
622 | if (Indices.size() > 1) | ||||||||||||||
623 | return true; | ||||||||||||||
624 | |||||||||||||||
625 | const DataLayout &DL = CI->getModule()->getDataLayout(); | ||||||||||||||
626 | const SIRegisterInfo *TRI = ST->getRegisterInfo(); | ||||||||||||||
627 | ImmutableCallSite CS(CI); | ||||||||||||||
628 | TargetLowering::AsmOperandInfoVector TargetConstraints | ||||||||||||||
629 | = TLI->ParseConstraints(DL, ST->getRegisterInfo(), CS); | ||||||||||||||
630 | |||||||||||||||
631 | const int TargetOutputIdx = Indices.empty() ? -1 : Indices[0]; | ||||||||||||||
632 | |||||||||||||||
633 | int OutputIdx = 0; | ||||||||||||||
634 | for (auto &TC : TargetConstraints) { | ||||||||||||||
635 | if (TC.Type != InlineAsm::isOutput) | ||||||||||||||
636 | continue; | ||||||||||||||
637 | |||||||||||||||
638 | // Skip outputs we don't care about. | ||||||||||||||
639 | if (TargetOutputIdx != -1 && TargetOutputIdx != OutputIdx++) | ||||||||||||||
640 | continue; | ||||||||||||||
641 | |||||||||||||||
642 | TLI->ComputeConstraintToUse(TC, SDValue()); | ||||||||||||||
643 | |||||||||||||||
644 | Register AssignedReg; | ||||||||||||||
645 | const TargetRegisterClass *RC; | ||||||||||||||
646 | std::tie(AssignedReg, RC) = TLI->getRegForInlineAsmConstraint( | ||||||||||||||
647 | TRI, TC.ConstraintCode, TC.ConstraintVT); | ||||||||||||||
648 | if (AssignedReg) { | ||||||||||||||
649 | // FIXME: This is a workaround for getRegForInlineAsmConstraint | ||||||||||||||
650 | // returning VS_32 | ||||||||||||||
651 | RC = TRI->getPhysRegClass(AssignedReg); | ||||||||||||||
652 | } | ||||||||||||||
653 | |||||||||||||||
654 | // For AGPR constraints null is returned on subtargets without AGPRs, so | ||||||||||||||
655 | // assume divergent for null. | ||||||||||||||
656 | if (!RC || !TRI->isSGPRClass(RC)) | ||||||||||||||
657 | return true; | ||||||||||||||
658 | } | ||||||||||||||
659 | |||||||||||||||
660 | return false; | ||||||||||||||
661 | } | ||||||||||||||
662 | |||||||||||||||
663 | /// \returns true if the new GPU divergence analysis is enabled. | ||||||||||||||
664 | bool GCNTTIImpl::useGPUDivergenceAnalysis() const { | ||||||||||||||
665 | return !UseLegacyDA; | ||||||||||||||
666 | } | ||||||||||||||
667 | |||||||||||||||
668 | /// \returns true if the result of the value could potentially be | ||||||||||||||
669 | /// different across workitems in a wavefront. | ||||||||||||||
670 | bool GCNTTIImpl::isSourceOfDivergence(const Value *V) const { | ||||||||||||||
671 | if (const Argument *A = dyn_cast<Argument>(V)) | ||||||||||||||
672 | return !isArgPassedInSGPR(A); | ||||||||||||||
673 | |||||||||||||||
674 | // Loads from the private and flat address spaces are divergent, because | ||||||||||||||
675 | // threads can execute the load instruction with the same inputs and get | ||||||||||||||
676 | // different results. | ||||||||||||||
677 | // | ||||||||||||||
678 | // All other loads are not divergent, because if threads issue loads with the | ||||||||||||||
679 | // same arguments, they will always get the same result. | ||||||||||||||
680 | if (const LoadInst *Load = dyn_cast<LoadInst>(V)) | ||||||||||||||
681 | return Load->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS || | ||||||||||||||
682 | Load->getPointerAddressSpace() == AMDGPUAS::FLAT_ADDRESS; | ||||||||||||||
683 | |||||||||||||||
684 | // Atomics are divergent because they are executed sequentially: when an | ||||||||||||||
685 | // atomic operation refers to the same address in each thread, then each | ||||||||||||||
686 | // thread after the first sees the value written by the previous thread as | ||||||||||||||
687 | // original value. | ||||||||||||||
688 | if (isa<AtomicRMWInst>(V) || isa<AtomicCmpXchgInst>(V)) | ||||||||||||||
689 | return true; | ||||||||||||||
690 | |||||||||||||||
691 | if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) | ||||||||||||||
692 | return AMDGPU::isIntrinsicSourceOfDivergence(Intrinsic->getIntrinsicID()); | ||||||||||||||
693 | |||||||||||||||
694 | // Assume all function calls are a source of divergence. | ||||||||||||||
695 | if (const CallInst *CI = dyn_cast<CallInst>(V)) { | ||||||||||||||
696 | if (isa<InlineAsm>(CI->getCalledValue())) | ||||||||||||||
697 | return isInlineAsmSourceOfDivergence(CI); | ||||||||||||||
698 | return true; | ||||||||||||||
699 | } | ||||||||||||||
700 | |||||||||||||||
701 | // Assume all function calls are a source of divergence. | ||||||||||||||
702 | if (isa<InvokeInst>(V)) | ||||||||||||||
703 | return true; | ||||||||||||||
704 | |||||||||||||||
705 | return false; | ||||||||||||||
706 | } | ||||||||||||||
707 | |||||||||||||||
708 | bool GCNTTIImpl::isAlwaysUniform(const Value *V) const { | ||||||||||||||
709 | if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) { | ||||||||||||||
710 | switch (Intrinsic->getIntrinsicID()) { | ||||||||||||||
711 | default: | ||||||||||||||
712 | return false; | ||||||||||||||
713 | case Intrinsic::amdgcn_readfirstlane: | ||||||||||||||
714 | case Intrinsic::amdgcn_readlane: | ||||||||||||||
715 | case Intrinsic::amdgcn_icmp: | ||||||||||||||
716 | case Intrinsic::amdgcn_fcmp: | ||||||||||||||
717 | case Intrinsic::amdgcn_if_break: | ||||||||||||||
718 | return true; | ||||||||||||||
719 | } | ||||||||||||||
720 | } | ||||||||||||||
721 | |||||||||||||||
722 | if (const CallInst *CI = dyn_cast<CallInst>(V)) { | ||||||||||||||
723 | if (isa<InlineAsm>(CI->getCalledValue())) | ||||||||||||||
724 | return !isInlineAsmSourceOfDivergence(CI); | ||||||||||||||
725 | return false; | ||||||||||||||
726 | } | ||||||||||||||
727 | |||||||||||||||
728 | const ExtractValueInst *ExtValue = dyn_cast<ExtractValueInst>(V); | ||||||||||||||
729 | if (!ExtValue) | ||||||||||||||
730 | return false; | ||||||||||||||
731 | |||||||||||||||
732 | const CallInst *CI = dyn_cast<CallInst>(ExtValue->getOperand(0)); | ||||||||||||||
733 | if (!CI) | ||||||||||||||
734 | return false; | ||||||||||||||
735 | |||||||||||||||
736 | if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(CI)) { | ||||||||||||||
737 | switch (Intrinsic->getIntrinsicID()) { | ||||||||||||||
738 | default: | ||||||||||||||
739 | return false; | ||||||||||||||
740 | case Intrinsic::amdgcn_if: | ||||||||||||||
741 | case Intrinsic::amdgcn_else: { | ||||||||||||||
742 | ArrayRef<unsigned> Indices = ExtValue->getIndices(); | ||||||||||||||
743 | return Indices.size() == 1 && Indices[0] == 1; | ||||||||||||||
744 | } | ||||||||||||||
745 | } | ||||||||||||||
746 | } | ||||||||||||||
747 | |||||||||||||||
748 | // If we have inline asm returning mixed SGPR and VGPR results, we inferred | ||||||||||||||
749 | // divergent for the overall struct return. We need to override it in the | ||||||||||||||
750 | // case we're extracting an SGPR component here. | ||||||||||||||
751 | if (isa<InlineAsm>(CI->getCalledValue())) | ||||||||||||||
752 | return !isInlineAsmSourceOfDivergence(CI, ExtValue->getIndices()); | ||||||||||||||
753 | |||||||||||||||
754 | return false; | ||||||||||||||
755 | } | ||||||||||||||
756 | |||||||||||||||
757 | bool GCNTTIImpl::collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, | ||||||||||||||
758 | Intrinsic::ID IID) const { | ||||||||||||||
759 | switch (IID) { | ||||||||||||||
760 | case Intrinsic::amdgcn_atomic_inc: | ||||||||||||||
761 | case Intrinsic::amdgcn_atomic_dec: | ||||||||||||||
762 | case Intrinsic::amdgcn_ds_fadd: | ||||||||||||||
763 | case Intrinsic::amdgcn_ds_fmin: | ||||||||||||||
764 | case Intrinsic::amdgcn_ds_fmax: | ||||||||||||||
765 | case Intrinsic::amdgcn_is_shared: | ||||||||||||||
766 | case Intrinsic::amdgcn_is_private: | ||||||||||||||
767 | OpIndexes.push_back(0); | ||||||||||||||
768 | return true; | ||||||||||||||
769 | default: | ||||||||||||||
770 | return false; | ||||||||||||||
771 | } | ||||||||||||||
772 | } | ||||||||||||||
773 | |||||||||||||||
774 | bool GCNTTIImpl::rewriteIntrinsicWithAddressSpace( | ||||||||||||||
775 | IntrinsicInst *II, Value *OldV, Value *NewV) const { | ||||||||||||||
776 | auto IntrID = II->getIntrinsicID(); | ||||||||||||||
777 | switch (IntrID) { | ||||||||||||||
778 | case Intrinsic::amdgcn_atomic_inc: | ||||||||||||||
779 | case Intrinsic::amdgcn_atomic_dec: | ||||||||||||||
780 | case Intrinsic::amdgcn_ds_fadd: | ||||||||||||||
781 | case Intrinsic::amdgcn_ds_fmin: | ||||||||||||||
782 | case Intrinsic::amdgcn_ds_fmax: { | ||||||||||||||
783 | const ConstantInt *IsVolatile = cast<ConstantInt>(II->getArgOperand(4)); | ||||||||||||||
784 | if (!IsVolatile->isZero()) | ||||||||||||||
785 | return false; | ||||||||||||||
786 | Module *M = II->getParent()->getParent()->getParent(); | ||||||||||||||
787 | Type *DestTy = II->getType(); | ||||||||||||||
788 | Type *SrcTy = NewV->getType(); | ||||||||||||||
789 | Function *NewDecl = | ||||||||||||||
790 | Intrinsic::getDeclaration(M, II->getIntrinsicID(), {DestTy, SrcTy}); | ||||||||||||||
791 | II->setArgOperand(0, NewV); | ||||||||||||||
792 | II->setCalledFunction(NewDecl); | ||||||||||||||
793 | return true; | ||||||||||||||
794 | } | ||||||||||||||
795 | case Intrinsic::amdgcn_is_shared: | ||||||||||||||
796 | case Intrinsic::amdgcn_is_private: { | ||||||||||||||
797 | unsigned TrueAS = IntrID == Intrinsic::amdgcn_is_shared ? | ||||||||||||||
798 | AMDGPUAS::LOCAL_ADDRESS : AMDGPUAS::PRIVATE_ADDRESS; | ||||||||||||||
799 | unsigned NewAS = NewV->getType()->getPointerAddressSpace(); | ||||||||||||||
800 | LLVMContext &Ctx = NewV->getType()->getContext(); | ||||||||||||||
801 | ConstantInt *NewVal = (TrueAS == NewAS) ? | ||||||||||||||
802 | ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx); | ||||||||||||||
803 | II->replaceAllUsesWith(NewVal); | ||||||||||||||
804 | II->eraseFromParent(); | ||||||||||||||
805 | return true; | ||||||||||||||
806 | } | ||||||||||||||
807 | default: | ||||||||||||||
808 | return false; | ||||||||||||||
809 | } | ||||||||||||||
810 | } | ||||||||||||||
811 | |||||||||||||||
812 | unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, | ||||||||||||||
813 | Type *SubTp) { | ||||||||||||||
814 | if (ST->hasVOP3PInsts()) { | ||||||||||||||
815 | VectorType *VT = cast<VectorType>(Tp); | ||||||||||||||
816 | if (VT->getNumElements() == 2 && | ||||||||||||||
817 | DL.getTypeSizeInBits(VT->getElementType()) == 16) { | ||||||||||||||
818 | // With op_sel VOP3P instructions freely can access the low half or high | ||||||||||||||
819 | // half of a register, so any swizzle is free. | ||||||||||||||
820 | |||||||||||||||
821 | switch (Kind) { | ||||||||||||||
822 | case TTI::SK_Broadcast: | ||||||||||||||
823 | case TTI::SK_Reverse: | ||||||||||||||
824 | case TTI::SK_PermuteSingleSrc: | ||||||||||||||
825 | return 0; | ||||||||||||||
826 | default: | ||||||||||||||
827 | break; | ||||||||||||||
828 | } | ||||||||||||||
829 | } | ||||||||||||||
830 | } | ||||||||||||||
831 | |||||||||||||||
832 | return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); | ||||||||||||||
833 | } | ||||||||||||||
834 | |||||||||||||||
835 | bool GCNTTIImpl::areInlineCompatible(const Function *Caller, | ||||||||||||||
836 | const Function *Callee) const { | ||||||||||||||
837 | const TargetMachine &TM = getTLI()->getTargetMachine(); | ||||||||||||||
838 | const GCNSubtarget *CallerST | ||||||||||||||
839 | = static_cast<const GCNSubtarget *>(TM.getSubtargetImpl(*Caller)); | ||||||||||||||
840 | const GCNSubtarget *CalleeST | ||||||||||||||
841 | = static_cast<const GCNSubtarget *>(TM.getSubtargetImpl(*Callee)); | ||||||||||||||
842 | |||||||||||||||
843 | const FeatureBitset &CallerBits = CallerST->getFeatureBits(); | ||||||||||||||
844 | const FeatureBitset &CalleeBits = CalleeST->getFeatureBits(); | ||||||||||||||
845 | |||||||||||||||
846 | FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList; | ||||||||||||||
847 | FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList; | ||||||||||||||
848 | if ((RealCallerBits & RealCalleeBits) != RealCalleeBits) | ||||||||||||||
849 | return false; | ||||||||||||||
850 | |||||||||||||||
851 | // FIXME: dx10_clamp can just take the caller setting, but there seems to be | ||||||||||||||
852 | // no way to support merge for backend defined attributes. | ||||||||||||||
853 | AMDGPU::SIModeRegisterDefaults CallerMode(*Caller, *CallerST); | ||||||||||||||
854 | AMDGPU::SIModeRegisterDefaults CalleeMode(*Callee, *CalleeST); | ||||||||||||||
855 | return CallerMode.isInlineCompatible(CalleeMode); | ||||||||||||||
856 | } | ||||||||||||||
857 | |||||||||||||||
858 | void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, | ||||||||||||||
859 | TTI::UnrollingPreferences &UP) { | ||||||||||||||
860 | CommonTTI.getUnrollingPreferences(L, SE, UP); | ||||||||||||||
861 | } | ||||||||||||||
862 | |||||||||||||||
863 | unsigned GCNTTIImpl::getUserCost(const User *U, | ||||||||||||||
864 | ArrayRef<const Value *> Operands) { | ||||||||||||||
865 | const Instruction *I = dyn_cast<Instruction>(U); | ||||||||||||||
| |||||||||||||||
866 | if (!I
| ||||||||||||||
867 | return BaseT::getUserCost(U, Operands); | ||||||||||||||
868 | |||||||||||||||
869 | // Estimate different operations to be optimized out | ||||||||||||||
870 | switch (I->getOpcode()) { | ||||||||||||||
871 | case Instruction::ExtractElement: { | ||||||||||||||
872 | ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); | ||||||||||||||
873 | unsigned Idx = -1; | ||||||||||||||
874 | if (CI) | ||||||||||||||
875 | Idx = CI->getZExtValue(); | ||||||||||||||
876 | return getVectorInstrCost(I->getOpcode(), I->getOperand(0)->getType(), Idx); | ||||||||||||||
877 | } | ||||||||||||||
878 | case Instruction::InsertElement: { | ||||||||||||||
879 | ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2)); | ||||||||||||||
880 | unsigned Idx = -1; | ||||||||||||||
881 | if (CI) | ||||||||||||||
882 | Idx = CI->getZExtValue(); | ||||||||||||||
883 | return getVectorInstrCost(I->getOpcode(), I->getType(), Idx); | ||||||||||||||
884 | } | ||||||||||||||
885 | case Instruction::Call: { | ||||||||||||||
886 | if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { | ||||||||||||||
887 | SmallVector<Value *, 4> Args(II->arg_operands()); | ||||||||||||||
888 | FastMathFlags FMF; | ||||||||||||||
889 | if (auto *FPMO = dyn_cast<FPMathOperator>(II)) | ||||||||||||||
890 | FMF = FPMO->getFastMathFlags(); | ||||||||||||||
891 | return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args, | ||||||||||||||
892 | FMF); | ||||||||||||||
893 | } else { | ||||||||||||||
894 | return BaseT::getUserCost(U, Operands); | ||||||||||||||
895 | } | ||||||||||||||
896 | } | ||||||||||||||
897 | case Instruction::ShuffleVector: { | ||||||||||||||
898 | const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I); | ||||||||||||||
899 | Type *Ty = Shuffle->getType(); | ||||||||||||||
900 | Type *SrcTy = Shuffle->getOperand(0)->getType(); | ||||||||||||||
901 | |||||||||||||||
902 | // TODO: Identify and add costs for insert subvector, etc. | ||||||||||||||
903 | int SubIndex; | ||||||||||||||
904 | if (Shuffle->isExtractSubvectorMask(SubIndex)) | ||||||||||||||
905 | return getShuffleCost(TTI::SK_ExtractSubvector, SrcTy, SubIndex, Ty); | ||||||||||||||
906 | |||||||||||||||
907 | if (Shuffle->changesLength()) | ||||||||||||||
908 | return BaseT::getUserCost(U, Operands); | ||||||||||||||
909 | |||||||||||||||
910 | if (Shuffle->isIdentity()) | ||||||||||||||
911 | return 0; | ||||||||||||||
912 | |||||||||||||||
913 | if (Shuffle->isReverse()) | ||||||||||||||
914 | return getShuffleCost(TTI::SK_Reverse, Ty, 0, nullptr); | ||||||||||||||
915 | |||||||||||||||
916 | if (Shuffle->isSelect()) | ||||||||||||||
917 | return getShuffleCost(TTI::SK_Select, Ty, 0, nullptr); | ||||||||||||||
918 | |||||||||||||||
919 | if (Shuffle->isTranspose()) | ||||||||||||||
920 | return getShuffleCost(TTI::SK_Transpose, Ty, 0, nullptr); | ||||||||||||||
921 | |||||||||||||||
922 | if (Shuffle->isZeroEltSplat()) | ||||||||||||||
923 | return getShuffleCost(TTI::SK_Broadcast, Ty, 0, nullptr); | ||||||||||||||
924 | |||||||||||||||
925 | if (Shuffle->isSingleSource()) | ||||||||||||||
926 | return getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, 0, nullptr); | ||||||||||||||
927 | |||||||||||||||
928 | return getShuffleCost(TTI::SK_PermuteTwoSrc, Ty, 0, nullptr); | ||||||||||||||
929 | } | ||||||||||||||
930 | case Instruction::ZExt: | ||||||||||||||
931 | case Instruction::SExt: | ||||||||||||||
932 | case Instruction::FPToUI: | ||||||||||||||
933 | case Instruction::FPToSI: | ||||||||||||||
934 | case Instruction::FPExt: | ||||||||||||||
935 | case Instruction::PtrToInt: | ||||||||||||||
936 | case Instruction::IntToPtr: | ||||||||||||||
937 | case Instruction::SIToFP: | ||||||||||||||
938 | case Instruction::UIToFP: | ||||||||||||||
939 | case Instruction::Trunc: | ||||||||||||||
940 | case Instruction::FPTrunc: | ||||||||||||||
941 | case Instruction::BitCast: | ||||||||||||||
942 | case Instruction::AddrSpaceCast: { | ||||||||||||||
943 | return getCastInstrCost(I->getOpcode(), I->getType(), | ||||||||||||||
944 | I->getOperand(0)->getType(), I); | ||||||||||||||
945 | } | ||||||||||||||
946 | case Instruction::Add: | ||||||||||||||
947 | case Instruction::FAdd: | ||||||||||||||
948 | case Instruction::Sub: | ||||||||||||||
949 | case Instruction::FSub: | ||||||||||||||
950 | case Instruction::Mul: | ||||||||||||||
951 | case Instruction::FMul: | ||||||||||||||
952 | case Instruction::UDiv: | ||||||||||||||
953 | case Instruction::SDiv: | ||||||||||||||
954 | case Instruction::FDiv: | ||||||||||||||
955 | case Instruction::URem: | ||||||||||||||
956 | case Instruction::SRem: | ||||||||||||||
957 | case Instruction::FRem: | ||||||||||||||
958 | case Instruction::Shl: | ||||||||||||||
959 | case Instruction::LShr: | ||||||||||||||
960 | case Instruction::AShr: | ||||||||||||||
961 | case Instruction::And: | ||||||||||||||
962 | case Instruction::Or: | ||||||||||||||
963 | case Instruction::Xor: | ||||||||||||||
964 | case Instruction::FNeg: { | ||||||||||||||
965 | return getArithmeticInstrCost(I->getOpcode(), I->getType(), | ||||||||||||||
966 | TTI::OK_AnyValue, TTI::OK_AnyValue, | ||||||||||||||
967 | TTI::OP_None, TTI::OP_None, Operands, I); | ||||||||||||||
968 | } | ||||||||||||||
969 | default: | ||||||||||||||
970 | break; | ||||||||||||||
971 | } | ||||||||||||||
972 | |||||||||||||||
973 | return BaseT::getUserCost(U, Operands); | ||||||||||||||
974 | } | ||||||||||||||
975 | |||||||||||||||
976 | unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const { | ||||||||||||||
977 | return 4 * 128; // XXX - 4 channels. Should these count as vector instead? | ||||||||||||||
978 | } | ||||||||||||||
979 | |||||||||||||||
980 | unsigned R600TTIImpl::getNumberOfRegisters(bool Vec) const { | ||||||||||||||
981 | return getHardwareNumberOfRegisters(Vec); | ||||||||||||||
982 | } | ||||||||||||||
983 | |||||||||||||||
984 | unsigned R600TTIImpl::getRegisterBitWidth(bool Vector) const { | ||||||||||||||
985 | return 32; | ||||||||||||||
986 | } | ||||||||||||||
987 | |||||||||||||||
988 | unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const { | ||||||||||||||
989 | return 32; | ||||||||||||||
990 | } | ||||||||||||||
991 | |||||||||||||||
992 | unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { | ||||||||||||||
993 | if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS || | ||||||||||||||
994 | AddrSpace == AMDGPUAS::CONSTANT_ADDRESS) | ||||||||||||||
995 | return 128; | ||||||||||||||
996 | if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS || | ||||||||||||||
997 | AddrSpace == AMDGPUAS::REGION_ADDRESS) | ||||||||||||||
998 | return 64; | ||||||||||||||
999 | if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) | ||||||||||||||
1000 | return 32; | ||||||||||||||
1001 | |||||||||||||||
1002 | if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS || | ||||||||||||||
1003 | AddrSpace == AMDGPUAS::PARAM_I_ADDRESS || | ||||||||||||||
1004 | (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 && | ||||||||||||||
1005 | AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15))) | ||||||||||||||
1006 | return 128; | ||||||||||||||
1007 | llvm_unreachable("unhandled address space")::llvm::llvm_unreachable_internal("unhandled address space", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp" , 1007); | ||||||||||||||
1008 | } | ||||||||||||||
1009 | |||||||||||||||
1010 | bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, | ||||||||||||||
1011 | unsigned Alignment, | ||||||||||||||
1012 | unsigned AddrSpace) const { | ||||||||||||||
1013 | // We allow vectorization of flat stores, even though we may need to decompose | ||||||||||||||
1014 | // them later if they may access private memory. We don't have enough context | ||||||||||||||
1015 | // here, and legalization can handle it. | ||||||||||||||
1016 | return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS); | ||||||||||||||
1017 | } | ||||||||||||||
1018 | |||||||||||||||
1019 | bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, | ||||||||||||||
1020 | unsigned Alignment, | ||||||||||||||
1021 | unsigned AddrSpace) const { | ||||||||||||||
1022 | return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace); | ||||||||||||||
1023 | } | ||||||||||||||
1024 | |||||||||||||||
1025 | bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, | ||||||||||||||
1026 | unsigned Alignment, | ||||||||||||||
1027 | unsigned AddrSpace) const { | ||||||||||||||
1028 | return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace); | ||||||||||||||
1029 | } | ||||||||||||||
1030 | |||||||||||||||
1031 | unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) { | ||||||||||||||
1032 | // Disable unrolling if the loop is not vectorized. | ||||||||||||||
1033 | // TODO: Enable this again. | ||||||||||||||
1034 | if (VF == 1) | ||||||||||||||
1035 | return 1; | ||||||||||||||
1036 | |||||||||||||||
1037 | return 8; | ||||||||||||||
1038 | } | ||||||||||||||
1039 | |||||||||||||||
1040 | unsigned R600TTIImpl::getCFInstrCost(unsigned Opcode) { | ||||||||||||||
1041 | // XXX - For some reason this isn't called for switch. | ||||||||||||||
1042 | switch (Opcode) { | ||||||||||||||
1043 | case Instruction::Br: | ||||||||||||||
1044 | case Instruction::Ret: | ||||||||||||||
1045 | return 10; | ||||||||||||||
1046 | default: | ||||||||||||||
1047 | return BaseT::getCFInstrCost(Opcode); | ||||||||||||||
1048 | } | ||||||||||||||
1049 | } | ||||||||||||||
1050 | |||||||||||||||
1051 | int R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, | ||||||||||||||
1052 | unsigned Index) { | ||||||||||||||
1053 | switch (Opcode) { | ||||||||||||||
1054 | case Instruction::ExtractElement: | ||||||||||||||
1055 | case Instruction::InsertElement: { | ||||||||||||||
1056 | unsigned EltSize | ||||||||||||||
1057 | = DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType()); | ||||||||||||||
1058 | if (EltSize < 32) { | ||||||||||||||
1059 | return BaseT::getVectorInstrCost(Opcode, ValTy, Index); | ||||||||||||||
1060 | } | ||||||||||||||
1061 | |||||||||||||||
1062 | // Extracts are just reads of a subregister, so are free. Inserts are | ||||||||||||||
1063 | // considered free because we don't want to have any cost for scalarizing | ||||||||||||||
1064 | // operations, and we don't have to copy into a different register class. | ||||||||||||||
1065 | |||||||||||||||
1066 | // Dynamic indexing isn't free and is best avoided. | ||||||||||||||
1067 | return Index == ~0u ? 2 : 0; | ||||||||||||||
1068 | } | ||||||||||||||
1069 | default: | ||||||||||||||
1070 | return BaseT::getVectorInstrCost(Opcode, ValTy, Index); | ||||||||||||||
1071 | } | ||||||||||||||
1072 | } | ||||||||||||||
1073 | |||||||||||||||
1074 | void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, | ||||||||||||||
1075 | TTI::UnrollingPreferences &UP) { | ||||||||||||||
1076 | CommonTTI.getUnrollingPreferences(L, SE, UP); | ||||||||||||||
1077 | } |
1 | //===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===// | ||||||||||||||
2 | // | ||||||||||||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||||||||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||||||||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||||||||||
6 | // | ||||||||||||||
7 | //===----------------------------------------------------------------------===// | ||||||||||||||
8 | /// \file | ||||||||||||||
9 | /// This file provides helpers for the implementation of | ||||||||||||||
10 | /// a TargetTransformInfo-conforming class. | ||||||||||||||
11 | /// | ||||||||||||||
12 | //===----------------------------------------------------------------------===// | ||||||||||||||
13 | |||||||||||||||
14 | #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H | ||||||||||||||
15 | #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H | ||||||||||||||
16 | |||||||||||||||
17 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" | ||||||||||||||
18 | #include "llvm/Analysis/TargetTransformInfo.h" | ||||||||||||||
19 | #include "llvm/Analysis/VectorUtils.h" | ||||||||||||||
20 | #include "llvm/IR/CallSite.h" | ||||||||||||||
21 | #include "llvm/IR/DataLayout.h" | ||||||||||||||
22 | #include "llvm/IR/Function.h" | ||||||||||||||
23 | #include "llvm/IR/GetElementPtrTypeIterator.h" | ||||||||||||||
24 | #include "llvm/IR/Operator.h" | ||||||||||||||
25 | #include "llvm/IR/Type.h" | ||||||||||||||
26 | |||||||||||||||
27 | namespace llvm { | ||||||||||||||
28 | |||||||||||||||
29 | /// Base class for use as a mix-in that aids implementing | ||||||||||||||
30 | /// a TargetTransformInfo-compatible class. | ||||||||||||||
31 | class TargetTransformInfoImplBase { | ||||||||||||||
32 | protected: | ||||||||||||||
33 | typedef TargetTransformInfo TTI; | ||||||||||||||
34 | |||||||||||||||
35 | const DataLayout &DL; | ||||||||||||||
36 | |||||||||||||||
37 | explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {} | ||||||||||||||
38 | |||||||||||||||
39 | public: | ||||||||||||||
40 | // Provide value semantics. MSVC requires that we spell all of these out. | ||||||||||||||
41 | TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) | ||||||||||||||
42 | : DL(Arg.DL) {} | ||||||||||||||
43 | TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {} | ||||||||||||||
44 | |||||||||||||||
45 | const DataLayout &getDataLayout() const { return DL; } | ||||||||||||||
46 | |||||||||||||||
47 | unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) { | ||||||||||||||
48 | switch (Opcode) { | ||||||||||||||
49 | default: | ||||||||||||||
50 | // By default, just classify everything as 'basic'. | ||||||||||||||
51 | return TTI::TCC_Basic; | ||||||||||||||
52 | |||||||||||||||
53 | case Instruction::GetElementPtr: | ||||||||||||||
54 | llvm_unreachable("Use getGEPCost for GEP operations!")::llvm::llvm_unreachable_internal("Use getGEPCost for GEP operations!" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 54); | ||||||||||||||
55 | |||||||||||||||
56 | case Instruction::BitCast: | ||||||||||||||
57 | assert(OpTy && "Cast instructions must provide the operand type")((OpTy && "Cast instructions must provide the operand type" ) ? static_cast<void> (0) : __assert_fail ("OpTy && \"Cast instructions must provide the operand type\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 57, __PRETTY_FUNCTION__)); | ||||||||||||||
58 | if (Ty == OpTy || (Ty->isPointerTy() && OpTy->isPointerTy())) | ||||||||||||||
59 | // Identity and pointer-to-pointer casts are free. | ||||||||||||||
60 | return TTI::TCC_Free; | ||||||||||||||
61 | |||||||||||||||
62 | // Otherwise, the default basic cost is used. | ||||||||||||||
63 | return TTI::TCC_Basic; | ||||||||||||||
64 | |||||||||||||||
65 | case Instruction::Freeze: | ||||||||||||||
66 | // Freeze operation is free because it should be lowered into a register | ||||||||||||||
67 | // use without any register copy in assembly code. | ||||||||||||||
68 | return TTI::TCC_Free; | ||||||||||||||
69 | |||||||||||||||
70 | case Instruction::FDiv: | ||||||||||||||
71 | case Instruction::FRem: | ||||||||||||||
72 | case Instruction::SDiv: | ||||||||||||||
73 | case Instruction::SRem: | ||||||||||||||
74 | case Instruction::UDiv: | ||||||||||||||
75 | case Instruction::URem: | ||||||||||||||
76 | return TTI::TCC_Expensive; | ||||||||||||||
77 | |||||||||||||||
78 | case Instruction::IntToPtr: { | ||||||||||||||
79 | // An inttoptr cast is free so long as the input is a legal integer type | ||||||||||||||
80 | // which doesn't contain values outside the range of a pointer. | ||||||||||||||
81 | unsigned OpSize = OpTy->getScalarSizeInBits(); | ||||||||||||||
| |||||||||||||||
82 | if (DL.isLegalInteger(OpSize) && | ||||||||||||||
83 | OpSize <= DL.getPointerTypeSizeInBits(Ty)) | ||||||||||||||
84 | return TTI::TCC_Free; | ||||||||||||||
85 | |||||||||||||||
86 | // Otherwise it's not a no-op. | ||||||||||||||
87 | return TTI::TCC_Basic; | ||||||||||||||
88 | } | ||||||||||||||
89 | case Instruction::PtrToInt: { | ||||||||||||||
90 | // A ptrtoint cast is free so long as the result is large enough to store | ||||||||||||||
91 | // the pointer, and a legal integer type. | ||||||||||||||
92 | unsigned DestSize = Ty->getScalarSizeInBits(); | ||||||||||||||
93 | if (DL.isLegalInteger(DestSize) && | ||||||||||||||
94 | DestSize >= DL.getPointerTypeSizeInBits(OpTy)) | ||||||||||||||
95 | return TTI::TCC_Free; | ||||||||||||||
96 | |||||||||||||||
97 | // Otherwise it's not a no-op. | ||||||||||||||
98 | return TTI::TCC_Basic; | ||||||||||||||
99 | } | ||||||||||||||
100 | case Instruction::Trunc: | ||||||||||||||
101 | // trunc to a native type is free (assuming the target has compare and | ||||||||||||||
102 | // shift-right of the same width). | ||||||||||||||
103 | if (DL.isLegalInteger(DL.getTypeSizeInBits(Ty))) | ||||||||||||||
104 | return TTI::TCC_Free; | ||||||||||||||
105 | |||||||||||||||
106 | return TTI::TCC_Basic; | ||||||||||||||
107 | } | ||||||||||||||
108 | } | ||||||||||||||
109 | |||||||||||||||
110 | int getGEPCost(Type *PointeeType, const Value *Ptr, | ||||||||||||||
111 | ArrayRef<const Value *> Operands) { | ||||||||||||||
112 | // In the basic model, we just assume that all-constant GEPs will be folded | ||||||||||||||
113 | // into their uses via addressing modes. | ||||||||||||||
114 | for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) | ||||||||||||||
115 | if (!isa<Constant>(Operands[Idx])) | ||||||||||||||
116 | return TTI::TCC_Basic; | ||||||||||||||
117 | |||||||||||||||
118 | return TTI::TCC_Free; | ||||||||||||||
119 | } | ||||||||||||||
120 | |||||||||||||||
121 | unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, | ||||||||||||||
122 | unsigned &JTSize, | ||||||||||||||
123 | ProfileSummaryInfo *PSI, | ||||||||||||||
124 | BlockFrequencyInfo *BFI) { | ||||||||||||||
125 | (void)PSI; | ||||||||||||||
126 | (void)BFI; | ||||||||||||||
127 | JTSize = 0; | ||||||||||||||
128 | return SI.getNumCases(); | ||||||||||||||
129 | } | ||||||||||||||
130 | |||||||||||||||
131 | int getExtCost(const Instruction *I, const Value *Src) { | ||||||||||||||
132 | return TTI::TCC_Basic; | ||||||||||||||
133 | } | ||||||||||||||
134 | |||||||||||||||
135 | unsigned getCallCost(FunctionType *FTy, int NumArgs, const User *U) { | ||||||||||||||
136 | assert(FTy && "FunctionType must be provided to this routine.")((FTy && "FunctionType must be provided to this routine." ) ? static_cast<void> (0) : __assert_fail ("FTy && \"FunctionType must be provided to this routine.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 136, __PRETTY_FUNCTION__)); | ||||||||||||||
137 | |||||||||||||||
138 | // The target-independent implementation just measures the size of the | ||||||||||||||
139 | // function by approximating that each argument will take on average one | ||||||||||||||
140 | // instruction to prepare. | ||||||||||||||
141 | |||||||||||||||
142 | if (NumArgs < 0) | ||||||||||||||
143 | // Set the argument number to the number of explicit arguments in the | ||||||||||||||
144 | // function. | ||||||||||||||
145 | NumArgs = FTy->getNumParams(); | ||||||||||||||
146 | |||||||||||||||
147 | return TTI::TCC_Basic * (NumArgs + 1); | ||||||||||||||
148 | } | ||||||||||||||
149 | |||||||||||||||
150 | unsigned getInliningThresholdMultiplier() { return 1; } | ||||||||||||||
151 | |||||||||||||||
152 | int getInlinerVectorBonusPercent() { return 150; } | ||||||||||||||
153 | |||||||||||||||
154 | unsigned getMemcpyCost(const Instruction *I) { | ||||||||||||||
155 | return TTI::TCC_Expensive; | ||||||||||||||
156 | } | ||||||||||||||
157 | |||||||||||||||
158 | bool hasBranchDivergence() { return false; } | ||||||||||||||
159 | |||||||||||||||
160 | bool useGPUDivergenceAnalysis() { return false; } | ||||||||||||||
161 | |||||||||||||||
162 | bool isSourceOfDivergence(const Value *V) { return false; } | ||||||||||||||
163 | |||||||||||||||
164 | bool isAlwaysUniform(const Value *V) { return false; } | ||||||||||||||
165 | |||||||||||||||
166 | unsigned getFlatAddressSpace () { | ||||||||||||||
167 | return -1; | ||||||||||||||
168 | } | ||||||||||||||
169 | |||||||||||||||
170 | bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, | ||||||||||||||
171 | Intrinsic::ID IID) const { | ||||||||||||||
172 | return false; | ||||||||||||||
173 | } | ||||||||||||||
174 | |||||||||||||||
175 | bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, | ||||||||||||||
176 | Value *OldV, Value *NewV) const { | ||||||||||||||
177 | return false; | ||||||||||||||
178 | } | ||||||||||||||
179 | |||||||||||||||
180 | bool isLoweredToCall(const Function *F) { | ||||||||||||||
181 | assert(F && "A concrete function must be provided to this routine.")((F && "A concrete function must be provided to this routine." ) ? static_cast<void> (0) : __assert_fail ("F && \"A concrete function must be provided to this routine.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 181, __PRETTY_FUNCTION__)); | ||||||||||||||
182 | |||||||||||||||
183 | // FIXME: These should almost certainly not be handled here, and instead | ||||||||||||||
184 | // handled with the help of TLI or the target itself. This was largely | ||||||||||||||
185 | // ported from existing analysis heuristics here so that such refactorings | ||||||||||||||
186 | // can take place in the future. | ||||||||||||||
187 | |||||||||||||||
188 | if (F->isIntrinsic()) | ||||||||||||||
189 | return false; | ||||||||||||||
190 | |||||||||||||||
191 | if (F->hasLocalLinkage() || !F->hasName()) | ||||||||||||||
192 | return true; | ||||||||||||||
193 | |||||||||||||||
194 | StringRef Name = F->getName(); | ||||||||||||||
195 | |||||||||||||||
196 | // These will all likely lower to a single selection DAG node. | ||||||||||||||
197 | if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || | ||||||||||||||
198 | Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" || | ||||||||||||||
199 | Name == "fmin" || Name == "fminf" || Name == "fminl" || | ||||||||||||||
200 | Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" || | ||||||||||||||
201 | Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" || | ||||||||||||||
202 | Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") | ||||||||||||||
203 | return false; | ||||||||||||||
204 | |||||||||||||||
205 | // These are all likely to be optimized into something smaller. | ||||||||||||||
206 | if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" || | ||||||||||||||
207 | Name == "exp2l" || Name == "exp2f" || Name == "floor" || | ||||||||||||||
208 | Name == "floorf" || Name == "ceil" || Name == "round" || | ||||||||||||||
209 | Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" || | ||||||||||||||
210 | Name == "llabs") | ||||||||||||||
211 | return false; | ||||||||||||||
212 | |||||||||||||||
213 | return true; | ||||||||||||||
214 | } | ||||||||||||||
215 | |||||||||||||||
216 | bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, | ||||||||||||||
217 | AssumptionCache &AC, | ||||||||||||||
218 | TargetLibraryInfo *LibInfo, | ||||||||||||||
219 | HardwareLoopInfo &HWLoopInfo) { | ||||||||||||||
220 | return false; | ||||||||||||||
221 | } | ||||||||||||||
222 | |||||||||||||||
223 | bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, | ||||||||||||||
224 | AssumptionCache &AC, TargetLibraryInfo *TLI, | ||||||||||||||
225 | DominatorTree *DT, | ||||||||||||||
226 | const LoopAccessInfo *LAI) const { | ||||||||||||||
227 | return false; | ||||||||||||||
228 | } | ||||||||||||||
229 | |||||||||||||||
230 | void getUnrollingPreferences(Loop *, ScalarEvolution &, | ||||||||||||||
231 | TTI::UnrollingPreferences &) {} | ||||||||||||||
232 | |||||||||||||||
233 | bool isLegalAddImmediate(int64_t Imm) { return false; } | ||||||||||||||
234 | |||||||||||||||
235 | bool isLegalICmpImmediate(int64_t Imm) { return false; } | ||||||||||||||
236 | |||||||||||||||
237 | bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, | ||||||||||||||
238 | bool HasBaseReg, int64_t Scale, | ||||||||||||||
239 | unsigned AddrSpace, Instruction *I = nullptr) { | ||||||||||||||
240 | // Guess that only reg and reg+reg addressing is allowed. This heuristic is | ||||||||||||||
241 | // taken from the implementation of LSR. | ||||||||||||||
242 | return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1); | ||||||||||||||
243 | } | ||||||||||||||
244 | |||||||||||||||
245 | bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) { | ||||||||||||||
246 | return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds, | ||||||||||||||
247 | C1.ScaleCost, C1.ImmCost, C1.SetupCost) < | ||||||||||||||
248 | std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds, | ||||||||||||||
249 | C2.ScaleCost, C2.ImmCost, C2.SetupCost); | ||||||||||||||
250 | } | ||||||||||||||
251 | |||||||||||||||
252 | bool canMacroFuseCmp() { return false; } | ||||||||||||||
253 | |||||||||||||||
254 | bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, | ||||||||||||||
255 | DominatorTree *DT, AssumptionCache *AC, | ||||||||||||||
256 | TargetLibraryInfo *LibInfo) { | ||||||||||||||
257 | return false; | ||||||||||||||
258 | } | ||||||||||||||
259 | |||||||||||||||
260 | bool shouldFavorPostInc() const { return false; } | ||||||||||||||
261 | |||||||||||||||
262 | bool shouldFavorBackedgeIndex(const Loop *L) const { return false; } | ||||||||||||||
263 | |||||||||||||||
264 | bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) { return false; } | ||||||||||||||
265 | |||||||||||||||
266 | bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) { return false; } | ||||||||||||||
267 | |||||||||||||||
268 | bool isLegalNTStore(Type *DataType, Align Alignment) { | ||||||||||||||
269 | // By default, assume nontemporal memory stores are available for stores | ||||||||||||||
270 | // that are aligned and have a size that is a power of 2. | ||||||||||||||
271 | unsigned DataSize = DL.getTypeStoreSize(DataType); | ||||||||||||||
272 | return Alignment >= DataSize && isPowerOf2_32(DataSize); | ||||||||||||||
273 | } | ||||||||||||||
274 | |||||||||||||||
275 | bool isLegalNTLoad(Type *DataType, Align Alignment) { | ||||||||||||||
276 | // By default, assume nontemporal memory loads are available for loads that | ||||||||||||||
277 | // are aligned and have a size that is a power of 2. | ||||||||||||||
278 | unsigned DataSize = DL.getTypeStoreSize(DataType); | ||||||||||||||
279 | return Alignment >= DataSize && isPowerOf2_32(DataSize); | ||||||||||||||
280 | } | ||||||||||||||
281 | |||||||||||||||
282 | bool isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment) { | ||||||||||||||
283 | return false; | ||||||||||||||
284 | } | ||||||||||||||
285 | |||||||||||||||
286 | bool isLegalMaskedGather(Type *DataType, MaybeAlign Alignment) { | ||||||||||||||
287 | return false; | ||||||||||||||
288 | } | ||||||||||||||
289 | |||||||||||||||
290 | bool isLegalMaskedCompressStore(Type *DataType) { return false; } | ||||||||||||||
291 | |||||||||||||||
292 | bool isLegalMaskedExpandLoad(Type *DataType) { return false; } | ||||||||||||||
293 | |||||||||||||||
294 | bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; } | ||||||||||||||
295 | |||||||||||||||
296 | bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; } | ||||||||||||||
297 | |||||||||||||||
298 | bool prefersVectorizedAddressing() { return true; } | ||||||||||||||
299 | |||||||||||||||
300 | int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, | ||||||||||||||
301 | bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { | ||||||||||||||
302 | // Guess that all legal addressing mode are free. | ||||||||||||||
303 | if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, | ||||||||||||||
304 | Scale, AddrSpace)) | ||||||||||||||
305 | return 0; | ||||||||||||||
306 | return -1; | ||||||||||||||
307 | } | ||||||||||||||
308 | |||||||||||||||
309 | bool LSRWithInstrQueries() { return false; } | ||||||||||||||
310 | |||||||||||||||
311 | bool isTruncateFree(Type *Ty1, Type *Ty2) { return false; } | ||||||||||||||
312 | |||||||||||||||
313 | bool isProfitableToHoist(Instruction *I) { return true; } | ||||||||||||||
314 | |||||||||||||||
315 | bool useAA() { return false; } | ||||||||||||||
316 | |||||||||||||||
317 | bool isTypeLegal(Type *Ty) { return false; } | ||||||||||||||
318 | |||||||||||||||
319 | bool shouldBuildLookupTables() { return true; } | ||||||||||||||
320 | bool shouldBuildLookupTablesForConstant(Constant *C) { return true; } | ||||||||||||||
321 | |||||||||||||||
322 | bool useColdCCForColdCall(Function &F) { return false; } | ||||||||||||||
323 | |||||||||||||||
324 | unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { | ||||||||||||||
325 | return 0; | ||||||||||||||
326 | } | ||||||||||||||
327 | |||||||||||||||
328 | unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, | ||||||||||||||
329 | unsigned VF) { return 0; } | ||||||||||||||
330 | |||||||||||||||
331 | bool supportsEfficientVectorElementLoadStore() { return false; } | ||||||||||||||
332 | |||||||||||||||
333 | bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; } | ||||||||||||||
334 | |||||||||||||||
335 | TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, | ||||||||||||||
336 | bool IsZeroCmp) const { | ||||||||||||||
337 | return {}; | ||||||||||||||
338 | } | ||||||||||||||
339 | |||||||||||||||
340 | bool enableInterleavedAccessVectorization() { return false; } | ||||||||||||||
341 | |||||||||||||||
342 | bool enableMaskedInterleavedAccessVectorization() { return false; } | ||||||||||||||
343 | |||||||||||||||
344 | bool isFPVectorizationPotentiallyUnsafe() { return false; } | ||||||||||||||
345 | |||||||||||||||
346 | bool allowsMisalignedMemoryAccesses(LLVMContext &Context, | ||||||||||||||
347 | unsigned BitWidth, | ||||||||||||||
348 | unsigned AddressSpace, | ||||||||||||||
349 | unsigned Alignment, | ||||||||||||||
350 | bool *Fast) { return false; } | ||||||||||||||
351 | |||||||||||||||
352 | TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) { | ||||||||||||||
353 | return TTI::PSK_Software; | ||||||||||||||
354 | } | ||||||||||||||
355 | |||||||||||||||
356 | bool haveFastSqrt(Type *Ty) { return false; } | ||||||||||||||
357 | |||||||||||||||
358 | bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { return true; } | ||||||||||||||
359 | |||||||||||||||
360 | unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; } | ||||||||||||||
361 | |||||||||||||||
362 | int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, | ||||||||||||||
363 | Type *Ty) { | ||||||||||||||
364 | return 0; | ||||||||||||||
365 | } | ||||||||||||||
366 | |||||||||||||||
367 | unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; } | ||||||||||||||
368 | |||||||||||||||
369 | unsigned getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, | ||||||||||||||
370 | Type *Ty) { | ||||||||||||||
371 | return TTI::TCC_Free; | ||||||||||||||
372 | } | ||||||||||||||
373 | |||||||||||||||
374 | unsigned getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, | ||||||||||||||
375 | const APInt &Imm, Type *Ty) { | ||||||||||||||
376 | return TTI::TCC_Free; | ||||||||||||||
377 | } | ||||||||||||||
378 | |||||||||||||||
379 | unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; } | ||||||||||||||
380 | |||||||||||||||
381 | unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const { | ||||||||||||||
382 | return Vector ? 1 : 0; | ||||||||||||||
383 | }; | ||||||||||||||
384 | |||||||||||||||
385 | const char* getRegisterClassName(unsigned ClassID) const { | ||||||||||||||
386 | switch (ClassID) { | ||||||||||||||
387 | default: | ||||||||||||||
388 | return "Generic::Unknown Register Class"; | ||||||||||||||
389 | case 0: return "Generic::ScalarRC"; | ||||||||||||||
390 | case 1: return "Generic::VectorRC"; | ||||||||||||||
391 | } | ||||||||||||||
392 | } | ||||||||||||||
393 | |||||||||||||||
394 | unsigned getRegisterBitWidth(bool Vector) const { return 32; } | ||||||||||||||
395 | |||||||||||||||
396 | unsigned getMinVectorRegisterBitWidth() { return 128; } | ||||||||||||||
397 | |||||||||||||||
398 | bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; } | ||||||||||||||
399 | |||||||||||||||
400 | unsigned getMinimumVF(unsigned ElemWidth) const { return 0; } | ||||||||||||||
401 | |||||||||||||||
402 | bool | ||||||||||||||
403 | shouldConsiderAddressTypePromotion(const Instruction &I, | ||||||||||||||
404 | bool &AllowPromotionWithoutCommonHeader) { | ||||||||||||||
405 | AllowPromotionWithoutCommonHeader = false; | ||||||||||||||
406 | return false; | ||||||||||||||
407 | } | ||||||||||||||
408 | |||||||||||||||
409 | unsigned getCacheLineSize() const { return 0; } | ||||||||||||||
410 | |||||||||||||||
411 | llvm::Optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level) const { | ||||||||||||||
412 | switch (Level) { | ||||||||||||||
413 | case TargetTransformInfo::CacheLevel::L1D: | ||||||||||||||
414 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||||||||||||
415 | case TargetTransformInfo::CacheLevel::L2D: | ||||||||||||||
416 | return llvm::Optional<unsigned>(); | ||||||||||||||
417 | } | ||||||||||||||
418 | llvm_unreachable("Unknown TargetTransformInfo::CacheLevel")::llvm::llvm_unreachable_internal("Unknown TargetTransformInfo::CacheLevel" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 418); | ||||||||||||||
419 | } | ||||||||||||||
420 | |||||||||||||||
421 | llvm::Optional<unsigned> getCacheAssociativity( | ||||||||||||||
422 | TargetTransformInfo::CacheLevel Level) const { | ||||||||||||||
423 | switch (Level) { | ||||||||||||||
424 | case TargetTransformInfo::CacheLevel::L1D: | ||||||||||||||
425 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||||||||||||
426 | case TargetTransformInfo::CacheLevel::L2D: | ||||||||||||||
427 | return llvm::Optional<unsigned>(); | ||||||||||||||
428 | } | ||||||||||||||
429 | |||||||||||||||
430 | llvm_unreachable("Unknown TargetTransformInfo::CacheLevel")::llvm::llvm_unreachable_internal("Unknown TargetTransformInfo::CacheLevel" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 430); | ||||||||||||||
431 | } | ||||||||||||||
432 | |||||||||||||||
433 | unsigned getPrefetchDistance() const { return 0; } | ||||||||||||||
434 | unsigned getMinPrefetchStride() const { return 1; } | ||||||||||||||
435 | unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX(2147483647 *2U +1U); } | ||||||||||||||
436 | |||||||||||||||
437 | unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } | ||||||||||||||
438 | |||||||||||||||
439 | unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, | ||||||||||||||
440 | TTI::OperandValueKind Opd1Info, | ||||||||||||||
441 | TTI::OperandValueKind Opd2Info, | ||||||||||||||
442 | TTI::OperandValueProperties Opd1PropInfo, | ||||||||||||||
443 | TTI::OperandValueProperties Opd2PropInfo, | ||||||||||||||
444 | ArrayRef<const Value *> Args, | ||||||||||||||
445 | const Instruction *CxtI = nullptr) { | ||||||||||||||
446 | return 1; | ||||||||||||||
447 | } | ||||||||||||||
448 | |||||||||||||||
449 | unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Ty, int Index, | ||||||||||||||
450 | Type *SubTp) { | ||||||||||||||
451 | return 1; | ||||||||||||||
452 | } | ||||||||||||||
453 | |||||||||||||||
454 | unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, | ||||||||||||||
455 | const Instruction *I) { return 1; } | ||||||||||||||
456 | |||||||||||||||
457 | unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, | ||||||||||||||
458 | VectorType *VecTy, unsigned Index) { | ||||||||||||||
459 | return 1; | ||||||||||||||
460 | } | ||||||||||||||
461 | |||||||||||||||
462 | unsigned getCFInstrCost(unsigned Opcode) { return 1; } | ||||||||||||||
463 | |||||||||||||||
464 | unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, | ||||||||||||||
465 | const Instruction *I) { | ||||||||||||||
466 | return 1; | ||||||||||||||
467 | } | ||||||||||||||
468 | |||||||||||||||
469 | unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { | ||||||||||||||
470 | return 1; | ||||||||||||||
471 | } | ||||||||||||||
472 | |||||||||||||||
473 | unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, | ||||||||||||||
474 | unsigned AddressSpace, const Instruction *I) { | ||||||||||||||
475 | return 1; | ||||||||||||||
476 | } | ||||||||||||||
477 | |||||||||||||||
478 | unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, | ||||||||||||||
479 | unsigned AddressSpace) { | ||||||||||||||
480 | return 1; | ||||||||||||||
481 | } | ||||||||||||||
482 | |||||||||||||||
483 | unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, | ||||||||||||||
484 | bool VariableMask, | ||||||||||||||
485 | unsigned Alignment) { | ||||||||||||||
486 | return 1; | ||||||||||||||
487 | } | ||||||||||||||
488 | |||||||||||||||
489 | unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, | ||||||||||||||
490 | unsigned Factor, | ||||||||||||||
491 | ArrayRef<unsigned> Indices, | ||||||||||||||
492 | unsigned Alignment, unsigned AddressSpace, | ||||||||||||||
493 | bool UseMaskForCond = false, | ||||||||||||||
494 | bool UseMaskForGaps = false) { | ||||||||||||||
495 | return 1; | ||||||||||||||
496 | } | ||||||||||||||
497 | |||||||||||||||
498 | unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, | ||||||||||||||
499 | ArrayRef<Type *> Tys, FastMathFlags FMF, | ||||||||||||||
500 | unsigned ScalarizationCostPassed) { | ||||||||||||||
501 | return 1; | ||||||||||||||
502 | } | ||||||||||||||
503 | unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, | ||||||||||||||
504 | ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) { | ||||||||||||||
505 | return 1; | ||||||||||||||
506 | } | ||||||||||||||
507 | |||||||||||||||
508 | unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) { | ||||||||||||||
509 | return 1; | ||||||||||||||
510 | } | ||||||||||||||
511 | |||||||||||||||
512 | unsigned getNumberOfParts(Type *Tp) { return 0; } | ||||||||||||||
513 | |||||||||||||||
514 | unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *, | ||||||||||||||
515 | const SCEV *) { | ||||||||||||||
516 | return 0; | ||||||||||||||
517 | } | ||||||||||||||
518 | |||||||||||||||
519 | unsigned getArithmeticReductionCost(unsigned, Type *, bool) { return 1; } | ||||||||||||||
520 | |||||||||||||||
521 | unsigned getMinMaxReductionCost(Type *, Type *, bool, bool) { return 1; } | ||||||||||||||
522 | |||||||||||||||
523 | unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { return 0; } | ||||||||||||||
524 | |||||||||||||||
525 | bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) { | ||||||||||||||
526 | return false; | ||||||||||||||
527 | } | ||||||||||||||
528 | |||||||||||||||
529 | unsigned getAtomicMemIntrinsicMaxElementSize() const { | ||||||||||||||
530 | // Note for overrides: You must ensure for all element unordered-atomic | ||||||||||||||
531 | // memory intrinsics that all power-of-2 element sizes up to, and | ||||||||||||||
532 | // including, the return value of this method have a corresponding | ||||||||||||||
533 | // runtime lib call. These runtime lib call definitions can be found | ||||||||||||||
534 | // in RuntimeLibcalls.h | ||||||||||||||
535 | return 0; | ||||||||||||||
536 | } | ||||||||||||||
537 | |||||||||||||||
538 | Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, | ||||||||||||||
539 | Type *ExpectedType) { | ||||||||||||||
540 | return nullptr; | ||||||||||||||
541 | } | ||||||||||||||
542 | |||||||||||||||
543 | Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, | ||||||||||||||
544 | unsigned SrcAlign, unsigned DestAlign) const { | ||||||||||||||
545 | return Type::getInt8Ty(Context); | ||||||||||||||
546 | } | ||||||||||||||
547 | |||||||||||||||
548 | void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut, | ||||||||||||||
549 | LLVMContext &Context, | ||||||||||||||
550 | unsigned RemainingBytes, | ||||||||||||||
551 | unsigned SrcAlign, | ||||||||||||||
552 | unsigned DestAlign) const { | ||||||||||||||
553 | for (unsigned i = 0; i != RemainingBytes; ++i) | ||||||||||||||
554 | OpsOut.push_back(Type::getInt8Ty(Context)); | ||||||||||||||
555 | } | ||||||||||||||
556 | |||||||||||||||
557 | bool areInlineCompatible(const Function *Caller, | ||||||||||||||
558 | const Function *Callee) const { | ||||||||||||||
559 | return (Caller->getFnAttribute("target-cpu") == | ||||||||||||||
560 | Callee->getFnAttribute("target-cpu")) && | ||||||||||||||
561 | (Caller->getFnAttribute("target-features") == | ||||||||||||||
562 | Callee->getFnAttribute("target-features")); | ||||||||||||||
563 | } | ||||||||||||||
564 | |||||||||||||||
565 | bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, | ||||||||||||||
566 | SmallPtrSetImpl<Argument *> &Args) const { | ||||||||||||||
567 | return (Caller->getFnAttribute("target-cpu") == | ||||||||||||||
568 | Callee->getFnAttribute("target-cpu")) && | ||||||||||||||
569 | (Caller->getFnAttribute("target-features") == | ||||||||||||||
570 | Callee->getFnAttribute("target-features")); | ||||||||||||||
571 | } | ||||||||||||||
572 | |||||||||||||||
573 | bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty, | ||||||||||||||
574 | const DataLayout &DL) const { | ||||||||||||||
575 | return false; | ||||||||||||||
576 | } | ||||||||||||||
577 | |||||||||||||||
578 | bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty, | ||||||||||||||
579 | const DataLayout &DL) const { | ||||||||||||||
580 | return false; | ||||||||||||||
581 | } | ||||||||||||||
582 | |||||||||||||||
583 | unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; } | ||||||||||||||
584 | |||||||||||||||
585 | bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; } | ||||||||||||||
586 | |||||||||||||||
587 | bool isLegalToVectorizeStore(StoreInst *SI) const { return true; } | ||||||||||||||
588 | |||||||||||||||
589 | bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, | ||||||||||||||
590 | unsigned Alignment, | ||||||||||||||
591 | unsigned AddrSpace) const { | ||||||||||||||
592 | return true; | ||||||||||||||
593 | } | ||||||||||||||
594 | |||||||||||||||
595 | bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, | ||||||||||||||
596 | unsigned Alignment, | ||||||||||||||
597 | unsigned AddrSpace) const { | ||||||||||||||
598 | return true; | ||||||||||||||
599 | } | ||||||||||||||
600 | |||||||||||||||
601 | unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, | ||||||||||||||
602 | unsigned ChainSizeInBytes, | ||||||||||||||
603 | VectorType *VecTy) const { | ||||||||||||||
604 | return VF; | ||||||||||||||
605 | } | ||||||||||||||
606 | |||||||||||||||
607 | unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, | ||||||||||||||
608 | unsigned ChainSizeInBytes, | ||||||||||||||
609 | VectorType *VecTy) const { | ||||||||||||||
610 | return VF; | ||||||||||||||
611 | } | ||||||||||||||
612 | |||||||||||||||
613 | bool useReductionIntrinsic(unsigned Opcode, Type *Ty, | ||||||||||||||
614 | TTI::ReductionFlags Flags) const { | ||||||||||||||
615 | return false; | ||||||||||||||
616 | } | ||||||||||||||
617 | |||||||||||||||
618 | bool shouldExpandReduction(const IntrinsicInst *II) const { | ||||||||||||||
619 | return true; | ||||||||||||||
620 | } | ||||||||||||||
621 | |||||||||||||||
622 | unsigned getGISelRematGlobalCost() const { | ||||||||||||||
623 | return 1; | ||||||||||||||
624 | } | ||||||||||||||
625 | |||||||||||||||
626 | protected: | ||||||||||||||
627 | // Obtain the minimum required size to hold the value (without the sign) | ||||||||||||||
628 | // In case of a vector it returns the min required size for one element. | ||||||||||||||
629 | unsigned minRequiredElementSize(const Value* Val, bool &isSigned) { | ||||||||||||||
630 | if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) { | ||||||||||||||
631 | const auto* VectorValue = cast<Constant>(Val); | ||||||||||||||
632 | |||||||||||||||
633 | // In case of a vector need to pick the max between the min | ||||||||||||||
634 | // required size for each element | ||||||||||||||
635 | auto *VT = cast<VectorType>(Val->getType()); | ||||||||||||||
636 | |||||||||||||||
637 | // Assume unsigned elements | ||||||||||||||
638 | isSigned = false; | ||||||||||||||
639 | |||||||||||||||
640 | // The max required size is the total vector width divided by num | ||||||||||||||
641 | // of elements in the vector | ||||||||||||||
642 | unsigned MaxRequiredSize = VT->getBitWidth() / VT->getNumElements(); | ||||||||||||||
643 | |||||||||||||||
644 | unsigned MinRequiredSize = 0; | ||||||||||||||
645 | for(unsigned i = 0, e = VT->getNumElements(); i < e; ++i) { | ||||||||||||||
646 | if (auto* IntElement = | ||||||||||||||
647 | dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) { | ||||||||||||||
648 | bool signedElement = IntElement->getValue().isNegative(); | ||||||||||||||
649 | // Get the element min required size. | ||||||||||||||
650 | unsigned ElementMinRequiredSize = | ||||||||||||||
651 | IntElement->getValue().getMinSignedBits() - 1; | ||||||||||||||
652 | // In case one element is signed then all the vector is signed. | ||||||||||||||
653 | isSigned |= signedElement; | ||||||||||||||
654 | // Save the max required bit size between all the elements. | ||||||||||||||
655 | MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize); | ||||||||||||||
656 | } | ||||||||||||||
657 | else { | ||||||||||||||
658 | // not an int constant element | ||||||||||||||
659 | return MaxRequiredSize; | ||||||||||||||
660 | } | ||||||||||||||
661 | } | ||||||||||||||
662 | return MinRequiredSize; | ||||||||||||||
663 | } | ||||||||||||||
664 | |||||||||||||||
665 | if (const auto* CI = dyn_cast<ConstantInt>(Val)) { | ||||||||||||||
666 | isSigned = CI->getValue().isNegative(); | ||||||||||||||
667 | return CI->getValue().getMinSignedBits() - 1; | ||||||||||||||
668 | } | ||||||||||||||
669 | |||||||||||||||
670 | if (const auto* Cast = dyn_cast<SExtInst>(Val)) { | ||||||||||||||
671 | isSigned = true; | ||||||||||||||
672 | return Cast->getSrcTy()->getScalarSizeInBits() - 1; | ||||||||||||||
673 | } | ||||||||||||||
674 | |||||||||||||||
675 | if (const auto* Cast = dyn_cast<ZExtInst>(Val)) { | ||||||||||||||
676 | isSigned = false; | ||||||||||||||
677 | return Cast->getSrcTy()->getScalarSizeInBits(); | ||||||||||||||
678 | } | ||||||||||||||
679 | |||||||||||||||
680 | isSigned = false; | ||||||||||||||
681 | return Val->getType()->getScalarSizeInBits(); | ||||||||||||||
682 | } | ||||||||||||||
683 | |||||||||||||||
684 | bool isStridedAccess(const SCEV *Ptr) { | ||||||||||||||
685 | return Ptr && isa<SCEVAddRecExpr>(Ptr); | ||||||||||||||
686 | } | ||||||||||||||
687 | |||||||||||||||
688 | const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE, | ||||||||||||||
689 | const SCEV *Ptr) { | ||||||||||||||
690 | if (!isStridedAccess(Ptr)) | ||||||||||||||
691 | return nullptr; | ||||||||||||||
692 | const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr); | ||||||||||||||
693 | return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE)); | ||||||||||||||
694 | } | ||||||||||||||
695 | |||||||||||||||
696 | bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, | ||||||||||||||
697 | int64_t MergeDistance) { | ||||||||||||||
698 | const SCEVConstant *Step = getConstantStrideStep(SE, Ptr); | ||||||||||||||
699 | if (!Step) | ||||||||||||||
700 | return false; | ||||||||||||||
701 | APInt StrideVal = Step->getAPInt(); | ||||||||||||||
702 | if (StrideVal.getBitWidth() > 64) | ||||||||||||||
703 | return false; | ||||||||||||||
704 | // FIXME: Need to take absolute value for negative stride case. | ||||||||||||||
705 | return StrideVal.getSExtValue() < MergeDistance; | ||||||||||||||
706 | } | ||||||||||||||
707 | }; | ||||||||||||||
708 | |||||||||||||||
709 | /// CRTP base class for use as a mix-in that aids implementing | ||||||||||||||
710 | /// a TargetTransformInfo-compatible class. | ||||||||||||||
711 | template <typename T> | ||||||||||||||
712 | class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { | ||||||||||||||
713 | private: | ||||||||||||||
714 | typedef TargetTransformInfoImplBase BaseT; | ||||||||||||||
715 | |||||||||||||||
716 | protected: | ||||||||||||||
717 | explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {} | ||||||||||||||
718 | |||||||||||||||
719 | public: | ||||||||||||||
720 | using BaseT::getCallCost; | ||||||||||||||
721 | |||||||||||||||
722 | unsigned getCallCost(const Function *F, int NumArgs, const User *U) { | ||||||||||||||
723 | assert(F && "A concrete function must be provided to this routine.")((F && "A concrete function must be provided to this routine." ) ? static_cast<void> (0) : __assert_fail ("F && \"A concrete function must be provided to this routine.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 723, __PRETTY_FUNCTION__)); | ||||||||||||||
724 | |||||||||||||||
725 | if (NumArgs < 0) | ||||||||||||||
726 | // Set the argument number to the number of explicit arguments in the | ||||||||||||||
727 | // function. | ||||||||||||||
728 | NumArgs = F->arg_size(); | ||||||||||||||
729 | |||||||||||||||
730 | if (Intrinsic::ID IID = F->getIntrinsicID()) { | ||||||||||||||
731 | FunctionType *FTy = F->getFunctionType(); | ||||||||||||||
732 | SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end()); | ||||||||||||||
733 | return static_cast<T *>(this) | ||||||||||||||
734 | ->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys, U); | ||||||||||||||
735 | } | ||||||||||||||
736 | |||||||||||||||
737 | if (!static_cast<T *>(this)->isLoweredToCall(F)) | ||||||||||||||
738 | return TTI::TCC_Basic; // Give a basic cost if it will be lowered | ||||||||||||||
739 | // directly. | ||||||||||||||
740 | |||||||||||||||
741 | return static_cast<T *>(this)->getCallCost(F->getFunctionType(), NumArgs, U); | ||||||||||||||
742 | } | ||||||||||||||
743 | |||||||||||||||
744 | unsigned getCallCost(const Function *F, ArrayRef<const Value *> Arguments, | ||||||||||||||
745 | const User *U) { | ||||||||||||||
746 | // Simply delegate to generic handling of the call. | ||||||||||||||
747 | // FIXME: We should use instsimplify or something else to catch calls which | ||||||||||||||
748 | // will constant fold with these arguments. | ||||||||||||||
749 | return static_cast<T *>(this)->getCallCost(F, Arguments.size(), U); | ||||||||||||||
750 | } | ||||||||||||||
751 | |||||||||||||||
752 | using BaseT::getGEPCost; | ||||||||||||||
753 | |||||||||||||||
754 | int getGEPCost(Type *PointeeType, const Value *Ptr, | ||||||||||||||
755 | ArrayRef<const Value *> Operands) { | ||||||||||||||
756 | assert(PointeeType && Ptr && "can't get GEPCost of nullptr")((PointeeType && Ptr && "can't get GEPCost of nullptr" ) ? static_cast<void> (0) : __assert_fail ("PointeeType && Ptr && \"can't get GEPCost of nullptr\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 756, __PRETTY_FUNCTION__)); | ||||||||||||||
757 | // TODO: will remove this when pointers have an opaque type. | ||||||||||||||
758 | assert(Ptr->getType()->getScalarType()->getPointerElementType() ==((Ptr->getType()->getScalarType()->getPointerElementType () == PointeeType && "explicit pointee type doesn't match operand's pointee type" ) ? static_cast<void> (0) : __assert_fail ("Ptr->getType()->getScalarType()->getPointerElementType() == PointeeType && \"explicit pointee type doesn't match operand's pointee type\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 760, __PRETTY_FUNCTION__)) | ||||||||||||||
759 | PointeeType &&((Ptr->getType()->getScalarType()->getPointerElementType () == PointeeType && "explicit pointee type doesn't match operand's pointee type" ) ? static_cast<void> (0) : __assert_fail ("Ptr->getType()->getScalarType()->getPointerElementType() == PointeeType && \"explicit pointee type doesn't match operand's pointee type\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 760, __PRETTY_FUNCTION__)) | ||||||||||||||
760 | "explicit pointee type doesn't match operand's pointee type")((Ptr->getType()->getScalarType()->getPointerElementType () == PointeeType && "explicit pointee type doesn't match operand's pointee type" ) ? static_cast<void> (0) : __assert_fail ("Ptr->getType()->getScalarType()->getPointerElementType() == PointeeType && \"explicit pointee type doesn't match operand's pointee type\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 760, __PRETTY_FUNCTION__)); | ||||||||||||||
761 | auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts()); | ||||||||||||||
762 | bool HasBaseReg = (BaseGV == nullptr); | ||||||||||||||
763 | |||||||||||||||
764 | auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType()); | ||||||||||||||
765 | APInt BaseOffset(PtrSizeBits, 0); | ||||||||||||||
766 | int64_t Scale = 0; | ||||||||||||||
767 | |||||||||||||||
768 | auto GTI = gep_type_begin(PointeeType, Operands); | ||||||||||||||
769 | Type *TargetType = nullptr; | ||||||||||||||
770 | |||||||||||||||
771 | // Handle the case where the GEP instruction has a single operand, | ||||||||||||||
772 | // the basis, therefore TargetType is a nullptr. | ||||||||||||||
773 | if (Operands.empty()) | ||||||||||||||
774 | return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic; | ||||||||||||||
775 | |||||||||||||||
776 | for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) { | ||||||||||||||
777 | TargetType = GTI.getIndexedType(); | ||||||||||||||
778 | // We assume that the cost of Scalar GEP with constant index and the | ||||||||||||||
779 | // cost of Vector GEP with splat constant index are the same. | ||||||||||||||
780 | const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I); | ||||||||||||||
781 | if (!ConstIdx) | ||||||||||||||
782 | if (auto Splat = getSplatValue(*I)) | ||||||||||||||
783 | ConstIdx = dyn_cast<ConstantInt>(Splat); | ||||||||||||||
784 | if (StructType *STy = GTI.getStructTypeOrNull()) { | ||||||||||||||
785 | // For structures the index is always splat or scalar constant | ||||||||||||||
786 | assert(ConstIdx && "Unexpected GEP index")((ConstIdx && "Unexpected GEP index") ? static_cast< void> (0) : __assert_fail ("ConstIdx && \"Unexpected GEP index\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h" , 786, __PRETTY_FUNCTION__)); | ||||||||||||||
787 | uint64_t Field = ConstIdx->getZExtValue(); | ||||||||||||||
788 | BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field); | ||||||||||||||
789 | } else { | ||||||||||||||
790 | int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType()); | ||||||||||||||
791 | if (ConstIdx) { | ||||||||||||||
792 | BaseOffset += | ||||||||||||||
793 | ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize; | ||||||||||||||
794 | } else { | ||||||||||||||
795 | // Needs scale register. | ||||||||||||||
796 | if (Scale != 0) | ||||||||||||||
797 | // No addressing mode takes two scale registers. | ||||||||||||||
798 | return TTI::TCC_Basic; | ||||||||||||||
799 | Scale = ElementSize; | ||||||||||||||
800 | } | ||||||||||||||
801 | } | ||||||||||||||
802 | } | ||||||||||||||
803 | |||||||||||||||
804 | if (static_cast<T *>(this)->isLegalAddressingMode( | ||||||||||||||
805 | TargetType, const_cast<GlobalValue *>(BaseGV), | ||||||||||||||
806 | BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale, | ||||||||||||||
807 | Ptr->getType()->getPointerAddressSpace())) | ||||||||||||||
808 | return TTI::TCC_Free; | ||||||||||||||
809 | return TTI::TCC_Basic; | ||||||||||||||
810 | } | ||||||||||||||
811 | |||||||||||||||
812 | unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, | ||||||||||||||
813 | ArrayRef<Type *> ParamTys, const User *U) { | ||||||||||||||
814 | switch (IID) { | ||||||||||||||
815 | default: | ||||||||||||||
816 | // Intrinsics rarely (if ever) have normal argument setup constraints. | ||||||||||||||
817 | // Model them as having a basic instruction cost. | ||||||||||||||
818 | return TTI::TCC_Basic; | ||||||||||||||
819 | |||||||||||||||
820 | // TODO: other libc intrinsics. | ||||||||||||||
821 | case Intrinsic::memcpy: | ||||||||||||||
822 | return static_cast<T *>(this)->getMemcpyCost(dyn_cast<Instruction>(U)); | ||||||||||||||
823 | |||||||||||||||
824 | case Intrinsic::annotation: | ||||||||||||||
825 | case Intrinsic::assume: | ||||||||||||||
826 | case Intrinsic::sideeffect: | ||||||||||||||
827 | case Intrinsic::dbg_declare: | ||||||||||||||
828 | case Intrinsic::dbg_value: | ||||||||||||||
829 | case Intrinsic::dbg_label: | ||||||||||||||
830 | case Intrinsic::invariant_start: | ||||||||||||||
831 | case Intrinsic::invariant_end: | ||||||||||||||
832 | case Intrinsic::launder_invariant_group: | ||||||||||||||
833 | case Intrinsic::strip_invariant_group: | ||||||||||||||
834 | case Intrinsic::is_constant: | ||||||||||||||
835 | case Intrinsic::lifetime_start: | ||||||||||||||
836 | case Intrinsic::lifetime_end: | ||||||||||||||
837 | case Intrinsic::objectsize: | ||||||||||||||
838 | case Intrinsic::ptr_annotation: | ||||||||||||||
839 | case Intrinsic::var_annotation: | ||||||||||||||
840 | case Intrinsic::experimental_gc_result: | ||||||||||||||
841 | case Intrinsic::experimental_gc_relocate: | ||||||||||||||
842 | case Intrinsic::coro_alloc: | ||||||||||||||
843 | case Intrinsic::coro_begin: | ||||||||||||||
844 | case Intrinsic::coro_free: | ||||||||||||||
845 | case Intrinsic::coro_end: | ||||||||||||||
846 | case Intrinsic::coro_frame: | ||||||||||||||
847 | case Intrinsic::coro_size: | ||||||||||||||
848 | case Intrinsic::coro_suspend: | ||||||||||||||
849 | case Intrinsic::coro_param: | ||||||||||||||
850 | case Intrinsic::coro_subfn_addr: | ||||||||||||||
851 | // These intrinsics don't actually represent code after lowering. | ||||||||||||||
852 | return TTI::TCC_Free; | ||||||||||||||
853 | } | ||||||||||||||
854 | } | ||||||||||||||
855 | |||||||||||||||
856 | unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, | ||||||||||||||
857 | ArrayRef<const Value *> Arguments, const User *U) { | ||||||||||||||
858 | // Delegate to the generic intrinsic handling code. This mostly provides an | ||||||||||||||
859 | // opportunity for targets to (for example) special case the cost of | ||||||||||||||
860 | // certain intrinsics based on constants used as arguments. | ||||||||||||||
861 | SmallVector<Type *, 8> ParamTys; | ||||||||||||||
862 | ParamTys.reserve(Arguments.size()); | ||||||||||||||
863 | for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx) | ||||||||||||||
864 | ParamTys.push_back(Arguments[Idx]->getType()); | ||||||||||||||
865 | return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys, U); | ||||||||||||||
866 | } | ||||||||||||||
867 | |||||||||||||||
868 | unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands) { | ||||||||||||||
869 | if (isa<PHINode>(U)) | ||||||||||||||
870 | return TTI::TCC_Free; // Model all PHI nodes as free. | ||||||||||||||
871 | |||||||||||||||
872 | if (isa<ExtractValueInst>(U)) | ||||||||||||||
873 | return TTI::TCC_Free; // Model all ExtractValue nodes as free. | ||||||||||||||
874 | |||||||||||||||
875 | if (isa<FreezeInst>(U)) | ||||||||||||||
876 | return TTI::TCC_Free; // Model all Freeze nodes as free. | ||||||||||||||
877 | |||||||||||||||
878 | // Static alloca doesn't generate target instructions. | ||||||||||||||
879 | if (auto *A
| ||||||||||||||
880 | if (A->isStaticAlloca()) | ||||||||||||||
881 | return TTI::TCC_Free; | ||||||||||||||
882 | |||||||||||||||
883 | if (const GEPOperator *GEP
| ||||||||||||||
884 | return static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(), | ||||||||||||||
885 | GEP->getPointerOperand(), | ||||||||||||||
886 | Operands.drop_front()); | ||||||||||||||
887 | } | ||||||||||||||
888 | |||||||||||||||
889 | if (auto CS = ImmutableCallSite(U)) { | ||||||||||||||
890 | const Function *F = CS.getCalledFunction(); | ||||||||||||||
891 | if (!F) { | ||||||||||||||
892 | // Just use the called value type. | ||||||||||||||
893 | Type *FTy = CS.getCalledValue()->getType()->getPointerElementType(); | ||||||||||||||
894 | return static_cast<T *>(this) | ||||||||||||||
895 | ->getCallCost(cast<FunctionType>(FTy), CS.arg_size(), U); | ||||||||||||||
896 | } | ||||||||||||||
897 | |||||||||||||||
898 | SmallVector<const Value *, 8> Arguments(CS.arg_begin(), CS.arg_end()); | ||||||||||||||
899 | return static_cast<T *>(this)->getCallCost(F, Arguments, U); | ||||||||||||||
900 | } | ||||||||||||||
901 | |||||||||||||||
902 | if (isa<SExtInst>(U) || isa<ZExtInst>(U) || isa<FPExtInst>(U)) | ||||||||||||||
903 | // The old behaviour of generally treating extensions of icmp to be free | ||||||||||||||
904 | // has been removed. A target that needs it should override getUserCost(). | ||||||||||||||
905 | return static_cast<T *>(this)->getExtCost(cast<Instruction>(U), | ||||||||||||||
906 | Operands.back()); | ||||||||||||||
907 | |||||||||||||||
908 | return static_cast<T *>(this)->getOperationCost( | ||||||||||||||
909 | Operator::getOpcode(U), U->getType(), | ||||||||||||||
910 | U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr); | ||||||||||||||
911 | } | ||||||||||||||
912 | |||||||||||||||
913 | int getInstructionLatency(const Instruction *I) { | ||||||||||||||
914 | SmallVector<const Value *, 4> Operands(I->value_op_begin(), | ||||||||||||||
915 | I->value_op_end()); | ||||||||||||||
916 | if (getUserCost(I, Operands) == TTI::TCC_Free) | ||||||||||||||
917 | return 0; | ||||||||||||||
918 | |||||||||||||||
919 | if (isa<LoadInst>(I)) | ||||||||||||||
920 | return 4; | ||||||||||||||
921 | |||||||||||||||
922 | Type *DstTy = I->getType(); | ||||||||||||||
923 | |||||||||||||||
924 | // Usually an intrinsic is a simple instruction. | ||||||||||||||
925 | // A real function call is much slower. | ||||||||||||||
926 | if (auto *CI = dyn_cast<CallInst>(I)) { | ||||||||||||||
927 | const Function *F = CI->getCalledFunction(); | ||||||||||||||
928 | if (!F || static_cast<T *>(this)->isLoweredToCall(F)) | ||||||||||||||
929 | return 40; | ||||||||||||||
930 | // Some intrinsics return a value and a flag, we use the value type | ||||||||||||||
931 | // to decide its latency. | ||||||||||||||
932 | if (StructType* StructTy = dyn_cast<StructType>(DstTy)) | ||||||||||||||
933 | DstTy = StructTy->getElementType(0); | ||||||||||||||
934 | // Fall through to simple instructions. | ||||||||||||||
935 | } | ||||||||||||||
936 | |||||||||||||||
937 | if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy)) | ||||||||||||||
938 | DstTy = VectorTy->getElementType(); | ||||||||||||||
939 | if (DstTy->isFloatingPointTy()) | ||||||||||||||
940 | return 3; | ||||||||||||||
941 | |||||||||||||||
942 | return 1; | ||||||||||||||
943 | } | ||||||||||||||
944 | }; | ||||||||||||||
945 | } | ||||||||||||||
946 | |||||||||||||||
947 | #endif |
1 | //===- CallSite.h - Abstract Call & Invoke instrs ---------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the CallSite class, which is a handy wrapper for code that |
10 | // wants to treat Call, Invoke and CallBr instructions in a generic way. When |
11 | // in non-mutation context (e.g. an analysis) ImmutableCallSite should be used. |
12 | // Finally, when some degree of customization is necessary between these two |
13 | // extremes, CallSiteBase<> can be supplied with fine-tuned parameters. |
14 | // |
15 | // NOTE: These classes are supposed to have "value semantics". So they should be |
16 | // passed by value, not by reference; they should not be "new"ed or "delete"d. |
17 | // They are efficiently copyable, assignable and constructable, with cost |
18 | // equivalent to copying a pointer (notice that they have only a single data |
19 | // member). The internal representation carries a flag which indicates which of |
20 | // the three variants is enclosed. This allows for cheaper checks when various |
21 | // accessors of CallSite are employed. |
22 | // |
23 | //===----------------------------------------------------------------------===// |
24 | |
25 | #ifndef LLVM_IR_CALLSITE_H |
26 | #define LLVM_IR_CALLSITE_H |
27 | |
28 | #include "llvm/ADT/Optional.h" |
29 | #include "llvm/ADT/PointerIntPair.h" |
30 | #include "llvm/ADT/iterator_range.h" |
31 | #include "llvm/IR/Attributes.h" |
32 | #include "llvm/IR/CallingConv.h" |
33 | #include "llvm/IR/Function.h" |
34 | #include "llvm/IR/InstrTypes.h" |
35 | #include "llvm/IR/Instruction.h" |
36 | #include "llvm/IR/Instructions.h" |
37 | #include "llvm/IR/Use.h" |
38 | #include "llvm/IR/User.h" |
39 | #include "llvm/IR/Value.h" |
40 | #include "llvm/Support/Casting.h" |
41 | #include <cassert> |
42 | #include <cstdint> |
43 | #include <iterator> |
44 | |
45 | namespace llvm { |
46 | |
47 | namespace Intrinsic { |
48 | typedef unsigned ID; |
49 | } |
50 | |
51 | template <typename FunTy = const Function, typename BBTy = const BasicBlock, |
52 | typename ValTy = const Value, typename UserTy = const User, |
53 | typename UseTy = const Use, typename InstrTy = const Instruction, |
54 | typename CallTy = const CallInst, |
55 | typename InvokeTy = const InvokeInst, |
56 | typename CallBrTy = const CallBrInst, |
57 | typename IterTy = User::const_op_iterator> |
58 | class CallSiteBase { |
59 | protected: |
60 | PointerIntPair<InstrTy *, 2, int> I; |
61 | |
62 | CallSiteBase() = default; |
63 | CallSiteBase(CallTy *CI) : I(CI, 1) { assert(CI)((CI) ? static_cast<void> (0) : __assert_fail ("CI", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h" , 63, __PRETTY_FUNCTION__)); } |
64 | CallSiteBase(InvokeTy *II) : I(II, 0) { assert(II)((II) ? static_cast<void> (0) : __assert_fail ("II", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h" , 64, __PRETTY_FUNCTION__)); } |
65 | CallSiteBase(CallBrTy *CBI) : I(CBI, 2) { assert(CBI)((CBI) ? static_cast<void> (0) : __assert_fail ("CBI", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h" , 65, __PRETTY_FUNCTION__)); } |
66 | explicit CallSiteBase(ValTy *II) { *this = get(II); } |
67 | |
68 | private: |
69 | /// This static method is like a constructor. It will create an appropriate |
70 | /// call site for a Call, Invoke or CallBr instruction, but it can also create |
71 | /// a null initialized CallSiteBase object for something which is NOT a call |
72 | /// site. |
73 | static CallSiteBase get(ValTy *V) { |
74 | if (InstrTy *II = dyn_cast<InstrTy>(V)) { |
75 | if (II->getOpcode() == Instruction::Call) |
76 | return CallSiteBase(static_cast<CallTy*>(II)); |
77 | if (II->getOpcode() == Instruction::Invoke) |
78 | return CallSiteBase(static_cast<InvokeTy*>(II)); |
79 | if (II->getOpcode() == Instruction::CallBr) |
80 | return CallSiteBase(static_cast<CallBrTy *>(II)); |
81 | } |
82 | return CallSiteBase(); |
83 | } |
84 | |
85 | public: |
86 | /// Return true if a CallInst is enclosed. |
87 | bool isCall() const { return I.getInt() == 1; } |
88 | |
89 | /// Return true if a InvokeInst is enclosed. !I.getInt() may also signify a |
90 | /// NULL instruction pointer, so check that. |
91 | bool isInvoke() const { return getInstruction() && I.getInt() == 0; } |
92 | |
93 | /// Return true if a CallBrInst is enclosed. |
94 | bool isCallBr() const { return I.getInt() == 2; } |
95 | |
96 | InstrTy *getInstruction() const { return I.getPointer(); } |
97 | InstrTy *operator->() const { return I.getPointer(); } |
98 | explicit operator bool() const { return I.getPointer(); } |
99 | |
100 | /// Get the basic block containing the call site. |
101 | BBTy* getParent() const { return getInstruction()->getParent(); } |
102 | |
103 | /// Return the pointer to function that is being called. |
104 | ValTy *getCalledValue() const { |
105 | assert(getInstruction() && "Not a call, invoke or callbr instruction!")((getInstruction() && "Not a call, invoke or callbr instruction!" ) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call, invoke or callbr instruction!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h" , 105, __PRETTY_FUNCTION__)); |
106 | return *getCallee(); |
107 | } |
108 | |
109 | /// Return the function being called if this is a direct call, otherwise |
110 | /// return null (if it's an indirect call). |
111 | FunTy *getCalledFunction() const { |
112 | return dyn_cast<FunTy>(getCalledValue()); |
113 | } |
114 | |
115 | /// Return true if the callsite is an indirect call. |
116 | bool isIndirectCall() const { |
117 | const Value *V = getCalledValue(); |
118 | if (!V) |
119 | return false; |
120 | if (isa<FunTy>(V) || isa<Constant>(V)) |
121 | return false; |
122 | if (const CallBase *CB = dyn_cast<CallBase>(getInstruction())) |
123 | if (CB->isInlineAsm()) |
124 | return false; |
125 | return true; |
126 | } |
127 | |
128 | /// Set the callee to the specified value. Unlike the function of the same |
129 | /// name on CallBase, does not modify the type! |
130 | void setCalledFunction(Value *V) { |
131 | assert(getInstruction() && "Not a call, callbr, or invoke instruction!")((getInstruction() && "Not a call, callbr, or invoke instruction!" ) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call, callbr, or invoke instruction!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h" , 131, __PRETTY_FUNCTION__)); |
132 | assert(cast<PointerType>(V->getType())->getElementType() ==((cast<PointerType>(V->getType())->getElementType () == cast<CallBase>(getInstruction())->getFunctionType () && "New callee type does not match FunctionType on call" ) ? static_cast<void> (0) : __assert_fail ("cast<PointerType>(V->getType())->getElementType() == cast<CallBase>(getInstruction())->getFunctionType() && \"New callee type does not match FunctionType on call\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h" , 134, __PRETTY_FUNCTION__)) |
133 | cast<CallBase>(getInstruction())->getFunctionType() &&((cast<PointerType>(V->getType())->getElementType () == cast<CallBase>(getInstruction())->getFunctionType () && "New callee type does not match FunctionType on call" ) ? static_cast<void> (0) : __assert_fail ("cast<PointerType>(V->getType())->getElementType() == cast<CallBase>(getInstruction())->getFunctionType() && \"New callee type does not match FunctionType on call\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h" , 134, __PRETTY_FUNCTION__)) |
134 | "New callee type does not match FunctionType on call")((cast<PointerType>(V->getType())->getElementType () == cast<CallBase>(getInstruction())->getFunctionType () && "New callee type does not match FunctionType on call" ) ? static_cast<void> (0) : __assert_fail ("cast<PointerType>(V->getType())->getElementType() == cast<CallBase>(getInstruction())->getFunctionType() && \"New callee type does not match FunctionType on call\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h" , 134, __PRETTY_FUNCTION__)); |
135 | *getCallee() = V; |
136 | } |
137 | |
138 | /// Return the intrinsic ID of the intrinsic called by this CallSite, |
139 | /// or Intrinsic::not_intrinsic if the called function is not an |
140 | /// intrinsic, or if this CallSite is an indirect call. |
141 | Intrinsic::ID getIntrinsicID() const { |
142 | if (auto *F = getCalledFunction()) |
143 | return F->getIntrinsicID(); |
144 | // Don't use Intrinsic::not_intrinsic, as it will require pulling |
145 | // Intrinsics.h into every header that uses CallSite. |
146 | return static_cast<Intrinsic::ID>(0); |
147 | } |
148 | |
149 | /// Determine whether the passed iterator points to the callee operand's Use. |
150 | bool isCallee(Value::const_user_iterator UI) const { |
151 | return isCallee(&UI.getUse()); |
152 | } |
153 | |
154 | /// Determine whether this Use is the callee operand's Use. |
155 | bool isCallee(const Use *U) const { return getCallee() == U; } |
156 | |
157 | /// Determine whether the passed iterator points to an argument operand. |
158 | bool isArgOperand(Value::const_user_iterator UI) const { |
159 | return isArgOperand(&UI.getUse()); |
160 | } |
161 | |
162 | /// Determine whether the passed use points to an argument operand. |
163 | bool isArgOperand(const Use *U) const { |
164 | assert(getInstruction() == U->getUser())((getInstruction() == U->getUser()) ? static_cast<void> (0) : __assert_fail ("getInstruction() == U->getUser()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h" , 164, __PRETTY_FUNCTION__)); |
165 | return arg_begin() <= U && U < arg_end(); |
166 | } |
167 | |
168 | /// Determine whether the passed iterator points to a bundle operand. |
169 | bool isBundleOperand(Value::const_user_iterator UI) const { |
170 | return isBundleOperand(&UI.getUse()); |
171 | } |
172 | |
173 | /// Determine whether the passed use points to a bundle operand. |
174 | bool isBundleOperand(const Use *U) const { |
175 | assert(getInstruction() == U->getUser())((getInstruction() == U->getUser()) ? static_cast<void> (0) : __assert_fail ("getInstruction() == U->getUser()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h" , 175, __PRETTY_FUNCTION__)); |
176 | if (!hasOperandBundles()) |
177 | return false; |
178 | unsigned OperandNo = U - (*this)->op_begin(); |
179 | return getBundleOperandsStartIndex() <= OperandNo && |
180 | OperandNo < getBundleOperandsEndIndex(); |
181 | } |
182 | |
183 | /// Determine whether the passed iterator points to a data operand. |
184 | bool isDataOperand(Value::const_user_iterator UI) const { |
185 | return isDataOperand(&UI.getUse()); |
186 | } |
187 | |
188 | /// Determine whether the passed use points to a data operand. |
189 | bool isDataOperand(const Use *U) const { |
190 | return data_operands_begin() <= U && U < data_operands_end(); |
191 | } |
192 | |
193 | ValTy *getArgument(unsigned ArgNo) const { |
194 | assert(arg_begin() + ArgNo < arg_end() && "Argument # out of range!")((arg_begin() + ArgNo < arg_end() && "Argument # out of range!" ) ? static_cast<void> (0) : __assert_fail ("arg_begin() + ArgNo < arg_end() && \"Argument # out of range!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h" , 194, __PRETTY_FUNCTION__)); |
195 | return *(arg_begin() + ArgNo); |
196 | } |
197 | |
198 | void setArgument(unsigned ArgNo, Value* newVal) { |
199 | assert(getInstruction() && "Not a call, invoke or callbr instruction!")((getInstruction() && "Not a call, invoke or callbr instruction!" ) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call, invoke or callbr instruction!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h" , 199, __PRETTY_FUNCTION__)); |
200 | assert(arg_begin() + ArgNo < arg_end() && "Argument # out of range!")((arg_begin() + ArgNo < arg_end() && "Argument # out of range!" ) ? static_cast<void> (0) : __assert_fail ("arg_begin() + ArgNo < arg_end() && \"Argument # out of range!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h" , 200, __PRETTY_FUNCTION__)); |
201 | getInstruction()->setOperand(ArgNo, newVal); |
202 | } |
203 | |
204 | /// Given a value use iterator, returns the argument that corresponds to it. |
205 | /// Iterator must actually correspond to an argument. |
206 | unsigned getArgumentNo(Value::const_user_iterator I) const { |
207 | return getArgumentNo(&I.getUse()); |
208 | } |
209 | |
210 | /// Given a use for an argument, get the argument number that corresponds to |
211 | /// it. |
212 | unsigned getArgumentNo(const Use *U) const { |
213 | assert(getInstruction() && "Not a call, invoke or callbr instruction!")((getInstruction() && "Not a call, invoke or callbr instruction!" ) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call, invoke or callbr instruction!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h" , 213, __PRETTY_FUNCTION__)); |
214 | assert(isArgOperand(U) && "Argument # out of range!")((isArgOperand(U) && "Argument # out of range!") ? static_cast <void> (0) : __assert_fail ("isArgOperand(U) && \"Argument # out of range!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h" , 214, __PRETTY_FUNCTION__)); |
215 | return U - arg_begin(); |
216 | } |
217 | |
218 | /// The type of iterator to use when looping over actual arguments at this |
219 | /// call site. |
220 | using arg_iterator = IterTy; |
221 | |
222 | iterator_range<IterTy> args() const { |
223 | return make_range(arg_begin(), arg_end()); |
224 | } |
225 | bool arg_empty() const { return arg_end() == arg_begin(); } |
226 | unsigned arg_size() const { return unsigned(arg_end() - arg_begin()); } |
227 | |
228 | /// Given a value use iterator, return the data operand corresponding to it. |
229 | /// Iterator must actually correspond to a data operand. |
230 | unsigned getDataOperandNo(Value::const_user_iterator UI) const { |
231 | return getDataOperandNo(&UI.getUse()); |
232 | } |
233 | |
234 | /// Given a use for a data operand, get the data operand number that |
235 | /// corresponds to it. |
236 | unsigned getDataOperandNo(const Use *U) const { |
237 | assert(getInstruction() && "Not a call, invoke or callbr instruction!")((getInstruction() && "Not a call, invoke or callbr instruction!" ) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call, invoke or callbr instruction!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h" , 237, __PRETTY_FUNCTION__)); |
238 | assert(isDataOperand(U) && "Data operand # out of range!")((isDataOperand(U) && "Data operand # out of range!") ? static_cast<void> (0) : __assert_fail ("isDataOperand(U) && \"Data operand # out of range!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h" , 238, __PRETTY_FUNCTION__)); |
239 | return U - data_operands_begin(); |
240 | } |
241 | |
242 | /// Type of iterator to use when looping over data operands at this call site |
243 | /// (see below). |
244 | using data_operand_iterator = IterTy; |
245 | |
246 | /// data_operands_begin/data_operands_end - Return iterators iterating over |
247 | /// the call / invoke / callbr argument list and bundle operands. For invokes, |
248 | /// this is the set of instruction operands except the invoke target and the |
249 | /// two successor blocks; for calls this is the set of instruction operands |
250 | /// except the call target; for callbrs the number of labels to skip must be |
251 | /// determined first. |
252 | |
253 | IterTy data_operands_begin() const { |
254 | assert(getInstruction() && "Not a call or invoke instruction!")((getInstruction() && "Not a call or invoke instruction!" ) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call or invoke instruction!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h" , 254, __PRETTY_FUNCTION__)); |
255 | return cast<CallBase>(getInstruction())->data_operands_begin(); |
256 | } |
257 | IterTy data_operands_end() const { |
258 | assert(getInstruction() && "Not a call or invoke instruction!")((getInstruction() && "Not a call or invoke instruction!" ) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call or invoke instruction!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h" , 258, __PRETTY_FUNCTION__)); |
259 | return cast<CallBase>(getInstruction())->data_operands_end(); |
260 | } |
261 | iterator_range<IterTy> data_ops() const { |
262 | return make_range(data_operands_begin(), data_operands_end()); |
263 | } |
264 | bool data_operands_empty() const { |
265 | return data_operands_end() == data_operands_begin(); |
266 | } |
267 | unsigned data_operands_size() const { |
268 | return std::distance(data_operands_begin(), data_operands_end()); |
269 | } |
270 | |
271 | /// Return the type of the instruction that generated this call site. |
272 | Type *getType() const { return (*this)->getType(); } |
273 | |
274 | /// Return the caller function for this call site. |
275 | FunTy *getCaller() const { return (*this)->getParent()->getParent(); } |
276 | |
277 | /// Tests if this call site must be tail call optimized. Only a CallInst can |
278 | /// be tail call optimized. |
279 | bool isMustTailCall() const { |
280 | return isCall() && cast<CallInst>(getInstruction())->isMustTailCall(); |
281 | } |
282 | |
283 | /// Tests if this call site is marked as a tail call. |
284 | bool isTailCall() const { |
285 | return isCall() && cast<CallInst>(getInstruction())->isTailCall(); |
286 | } |
287 | |
288 | #define CALLSITE_DELEGATE_GETTER(METHOD) \ |
289 | InstrTy *II = getInstruction(); \ |
290 | return isCall() ? cast<CallInst>(II)->METHOD \ |
291 | : isCallBr() ? cast<CallBrInst>(II)->METHOD \ |
292 | : cast<InvokeInst>(II)->METHOD |
293 | |
294 | #define CALLSITE_DELEGATE_SETTER(METHOD) \ |
295 | InstrTy *II = getInstruction(); \ |
296 | if (isCall()) \ |
297 | cast<CallInst>(II)->METHOD; \ |
298 | else if (isCallBr()) \ |
299 | cast<CallBrInst>(II)->METHOD; \ |
300 | else \ |
301 | cast<InvokeInst>(II)->METHOD |
302 | |
303 | unsigned getNumArgOperands() const { |
304 | CALLSITE_DELEGATE_GETTER(getNumArgOperands()); |
305 | } |
306 | |
307 | ValTy *getArgOperand(unsigned i) const { |
308 | CALLSITE_DELEGATE_GETTER(getArgOperand(i)); |
309 | } |
310 | |
311 | ValTy *getReturnedArgOperand() const { |
312 | CALLSITE_DELEGATE_GETTER(getReturnedArgOperand()); |
313 | } |
314 | |
315 | bool isInlineAsm() const { |
316 | return cast<CallBase>(getInstruction())->isInlineAsm(); |
317 | } |
318 | |
319 | /// Get the calling convention of the call. |
320 | CallingConv::ID getCallingConv() const { |
321 | CALLSITE_DELEGATE_GETTER(getCallingConv()); |
322 | } |
323 | /// Set the calling convention of the call. |
324 | void setCallingConv(CallingConv::ID CC) { |
325 | CALLSITE_DELEGATE_SETTER(setCallingConv(CC)); |
326 | } |
327 | |
328 | FunctionType *getFunctionType() const { |
329 | CALLSITE_DELEGATE_GETTER(getFunctionType()); |
330 | } |
331 | |
332 | void mutateFunctionType(FunctionType *Ty) const { |
333 | CALLSITE_DELEGATE_SETTER(mutateFunctionType(Ty)); |
334 | } |
335 | |
336 | /// Get the parameter attributes of the call. |
337 | AttributeList getAttributes() const { |
338 | CALLSITE_DELEGATE_GETTER(getAttributes()); |
339 | } |
340 | /// Set the parameter attributes of the call. |
341 | void setAttributes(AttributeList PAL) { |
342 | CALLSITE_DELEGATE_SETTER(setAttributes(PAL)); |
343 | } |
344 | |
345 | void addAttribute(unsigned i, Attribute::AttrKind Kind) { |
346 | CALLSITE_DELEGATE_SETTER(addAttribute(i, Kind)); |
347 | } |
348 | |
349 | void addAttribute(unsigned i, Attribute Attr) { |
350 | CALLSITE_DELEGATE_SETTER(addAttribute(i, Attr)); |
351 | } |
352 | |
353 | void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { |
354 | CALLSITE_DELEGATE_SETTER(addParamAttr(ArgNo, Kind)); |
355 | } |
356 | |
357 | void removeAttribute(unsigned i, Attribute::AttrKind Kind) { |
358 | CALLSITE_DELEGATE_SETTER(removeAttribute(i, Kind)); |
359 | } |
360 | |
361 | void removeAttribute(unsigned i, StringRef Kind) { |
362 | CALLSITE_DELEGATE_SETTER(removeAttribute(i, Kind)); |
363 | } |
364 | |
365 | void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { |
366 | CALLSITE_DELEGATE_SETTER(removeParamAttr(ArgNo, Kind)); |
367 | } |
368 | |
369 | /// Return true if this function has the given attribute. |
370 | bool hasFnAttr(Attribute::AttrKind Kind) const { |
371 | CALLSITE_DELEGATE_GETTER(hasFnAttr(Kind)); |
372 | } |
373 | |
374 | /// Return true if this function has the given attribute. |
375 | bool hasFnAttr(StringRef Kind) const { |
376 | CALLSITE_DELEGATE_GETTER(hasFnAttr(Kind)); |
377 | } |
378 | |
379 | /// Return true if this return value has the given attribute. |
380 | bool hasRetAttr(Attribute::AttrKind Kind) const { |
381 | CALLSITE_DELEGATE_GETTER(hasRetAttr(Kind)); |
382 | } |
383 | |
384 | /// Return true if the call or the callee has the given attribute. |
385 | bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const { |
386 | CALLSITE_DELEGATE_GETTER(paramHasAttr(ArgNo, Kind)); |
387 | } |
388 | |
389 | Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const { |
390 | CALLSITE_DELEGATE_GETTER(getAttribute(i, Kind)); |
391 | } |
392 | |
393 | Attribute getAttribute(unsigned i, StringRef Kind) const { |
394 | CALLSITE_DELEGATE_GETTER(getAttribute(i, Kind)); |
395 | } |
396 | |
397 | /// Return true if the data operand at index \p i directly or indirectly has |
398 | /// the attribute \p A. |
399 | /// |
400 | /// Normal call, invoke or callbr arguments have per operand attributes, as |
401 | /// specified in the attribute set attached to this instruction, while operand |
402 | /// bundle operands may have some attributes implied by the type of its |
403 | /// containing operand bundle. |
404 | bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind Kind) const { |
405 | CALLSITE_DELEGATE_GETTER(dataOperandHasImpliedAttr(i, Kind)); |
406 | } |
407 | |
408 | /// Extract the alignment of the return value. |
409 | unsigned getRetAlignment() const { |
410 | CALLSITE_DELEGATE_GETTER(getRetAlignment()); |
411 | } |
412 | |
413 | /// Extract the alignment for a call or parameter (0=unknown). |
414 | unsigned getParamAlignment(unsigned ArgNo) const { |
415 | CALLSITE_DELEGATE_GETTER(getParamAlignment(ArgNo)); |
416 | } |
417 | |
418 | /// Extract the byval type for a call or parameter (nullptr=unknown). |
419 | Type *getParamByValType(unsigned ArgNo) const { |
420 | CALLSITE_DELEGATE_GETTER(getParamByValType(ArgNo)); |
421 | } |
422 | |
423 | /// Extract the number of dereferenceable bytes for a call or parameter |
424 | /// (0=unknown). |
425 | uint64_t getDereferenceableBytes(unsigned i) const { |
426 | CALLSITE_DELEGATE_GETTER(getDereferenceableBytes(i)); |
427 | } |
428 | |
429 | /// Extract the number of dereferenceable_or_null bytes for a call or |
430 | /// parameter (0=unknown). |
431 | uint64_t getDereferenceableOrNullBytes(unsigned i) const { |
432 | CALLSITE_DELEGATE_GETTER(getDereferenceableOrNullBytes(i)); |
433 | } |
434 | |
435 | /// Determine if the return value is marked with NoAlias attribute. |
436 | bool returnDoesNotAlias() const { |
437 | CALLSITE_DELEGATE_GETTER(returnDoesNotAlias()); |
438 | } |
439 | |
440 | /// Return true if the call should not be treated as a call to a builtin. |
441 | bool isNoBuiltin() const { |
442 | CALLSITE_DELEGATE_GETTER(isNoBuiltin()); |
443 | } |
444 | |
445 | /// Return true if the call requires strict floating point semantics. |
446 | bool isStrictFP() const { |
447 | CALLSITE_DELEGATE_GETTER(isStrictFP()); |
448 | } |
449 | |
450 | /// Return true if the call should not be inlined. |
451 | bool isNoInline() const { |
452 | CALLSITE_DELEGATE_GETTER(isNoInline()); |
453 | } |
454 | void setIsNoInline(bool Value = true) { |
455 | CALLSITE_DELEGATE_SETTER(setIsNoInline(Value)); |
456 | } |
457 | |
458 | /// Determine if the call does not access memory. |
459 | bool doesNotAccessMemory() const { |
460 | CALLSITE_DELEGATE_GETTER(doesNotAccessMemory()); |
461 | } |
462 | void setDoesNotAccessMemory() { |
463 | CALLSITE_DELEGATE_SETTER(setDoesNotAccessMemory()); |
464 | } |
465 | |
466 | /// Determine if the call does not access or only reads memory. |
467 | bool onlyReadsMemory() const { |
468 | CALLSITE_DELEGATE_GETTER(onlyReadsMemory()); |
469 | } |
470 | void setOnlyReadsMemory() { |
471 | CALLSITE_DELEGATE_SETTER(setOnlyReadsMemory()); |
472 | } |
473 | |
474 | /// Determine if the call does not access or only writes memory. |
475 | bool doesNotReadMemory() const { |
476 | CALLSITE_DELEGATE_GETTER(doesNotReadMemory()); |
477 | } |
478 | void setDoesNotReadMemory() { |
479 | CALLSITE_DELEGATE_SETTER(setDoesNotReadMemory()); |
480 | } |
481 | |
482 | /// Determine if the call can access memmory only using pointers based |
483 | /// on its arguments. |
484 | bool onlyAccessesArgMemory() const { |
485 | CALLSITE_DELEGATE_GETTER(onlyAccessesArgMemory()); |
486 | } |
487 | void setOnlyAccessesArgMemory() { |
488 | CALLSITE_DELEGATE_SETTER(setOnlyAccessesArgMemory()); |
489 | } |
490 | |
491 | /// Determine if the function may only access memory that is |
492 | /// inaccessible from the IR. |
493 | bool onlyAccessesInaccessibleMemory() const { |
494 | CALLSITE_DELEGATE_GETTER(onlyAccessesInaccessibleMemory()); |
495 | } |
496 | void setOnlyAccessesInaccessibleMemory() { |
497 | CALLSITE_DELEGATE_SETTER(setOnlyAccessesInaccessibleMemory()); |
498 | } |
499 | |
500 | /// Determine if the function may only access memory that is |
501 | /// either inaccessible from the IR or pointed to by its arguments. |
502 | bool onlyAccessesInaccessibleMemOrArgMem() const { |
503 | CALLSITE_DELEGATE_GETTER(onlyAccessesInaccessibleMemOrArgMem()); |
504 | } |
505 | void setOnlyAccessesInaccessibleMemOrArgMem() { |
506 | CALLSITE_DELEGATE_SETTER(setOnlyAccessesInaccessibleMemOrArgMem()); |
507 | } |
508 | |
509 | /// Determine if the call cannot return. |
510 | bool doesNotReturn() const { |
511 | CALLSITE_DELEGATE_GETTER(doesNotReturn()); |
512 | } |
513 | void setDoesNotReturn() { |
514 | CALLSITE_DELEGATE_SETTER(setDoesNotReturn()); |
515 | } |
516 | |
517 | /// Determine if the call cannot unwind. |
518 | bool doesNotThrow() const { |
519 | CALLSITE_DELEGATE_GETTER(doesNotThrow()); |
520 | } |
521 | void setDoesNotThrow() { |
522 | CALLSITE_DELEGATE_SETTER(setDoesNotThrow()); |
523 | } |
524 | |
525 | /// Determine if the call can be duplicated. |
526 | bool cannotDuplicate() const { |
527 | CALLSITE_DELEGATE_GETTER(cannotDuplicate()); |
528 | } |
529 | void setCannotDuplicate() { |
530 | CALLSITE_DELEGATE_SETTER(setCannotDuplicate()); |
531 | } |
532 | |
533 | /// Determine if the call is convergent. |
534 | bool isConvergent() const { |
535 | CALLSITE_DELEGATE_GETTER(isConvergent()); |
536 | } |
537 | void setConvergent() { |
538 | CALLSITE_DELEGATE_SETTER(setConvergent()); |
539 | } |
540 | void setNotConvergent() { |
541 | CALLSITE_DELEGATE_SETTER(setNotConvergent()); |
542 | } |
543 | |
544 | unsigned getNumOperandBundles() const { |
545 | CALLSITE_DELEGATE_GETTER(getNumOperandBundles()); |
546 | } |
547 | |
548 | bool hasOperandBundles() const { |
549 | CALLSITE_DELEGATE_GETTER(hasOperandBundles()); |
550 | } |
551 | |
552 | unsigned getBundleOperandsStartIndex() const { |
553 | CALLSITE_DELEGATE_GETTER(getBundleOperandsStartIndex()); |
554 | } |
555 | |
556 | unsigned getBundleOperandsEndIndex() const { |
557 | CALLSITE_DELEGATE_GETTER(getBundleOperandsEndIndex()); |
558 | } |
559 | |
560 | unsigned getNumTotalBundleOperands() const { |
561 | CALLSITE_DELEGATE_GETTER(getNumTotalBundleOperands()); |
562 | } |
563 | |
564 | OperandBundleUse getOperandBundleAt(unsigned Index) const { |
565 | CALLSITE_DELEGATE_GETTER(getOperandBundleAt(Index)); |
566 | } |
567 | |
568 | Optional<OperandBundleUse> getOperandBundle(StringRef Name) const { |
569 | CALLSITE_DELEGATE_GETTER(getOperandBundle(Name)); |
570 | } |
571 | |
572 | Optional<OperandBundleUse> getOperandBundle(uint32_t ID) const { |
573 | CALLSITE_DELEGATE_GETTER(getOperandBundle(ID)); |
574 | } |
575 | |
576 | unsigned countOperandBundlesOfType(uint32_t ID) const { |
577 | CALLSITE_DELEGATE_GETTER(countOperandBundlesOfType(ID)); |
578 | } |
579 | |
580 | bool isBundleOperand(unsigned Idx) const { |
581 | CALLSITE_DELEGATE_GETTER(isBundleOperand(Idx)); |
582 | } |
583 | |
584 | IterTy arg_begin() const { |
585 | CALLSITE_DELEGATE_GETTER(arg_begin()); |
586 | } |
587 | |
588 | IterTy arg_end() const { |
589 | CALLSITE_DELEGATE_GETTER(arg_end()); |
590 | } |
591 | |
592 | #undef CALLSITE_DELEGATE_GETTER |
593 | #undef CALLSITE_DELEGATE_SETTER |
594 | |
595 | void getOperandBundlesAsDefs(SmallVectorImpl<OperandBundleDef> &Defs) const { |
596 | // Since this is actually a getter that "looks like" a setter, don't use the |
597 | // above macros to avoid confusion. |
598 | cast<CallBase>(getInstruction())->getOperandBundlesAsDefs(Defs); |
599 | } |
600 | |
601 | /// Determine whether this data operand is not captured. |
602 | bool doesNotCapture(unsigned OpNo) const { |
603 | return dataOperandHasImpliedAttr(OpNo + 1, Attribute::NoCapture); |
604 | } |
605 | |
606 | /// Determine whether this argument is passed by value. |
607 | bool isByValArgument(unsigned ArgNo) const { |
608 | return paramHasAttr(ArgNo, Attribute::ByVal); |
609 | } |
610 | |
611 | /// Determine whether this argument is passed in an alloca. |
612 | bool isInAllocaArgument(unsigned ArgNo) const { |
613 | return paramHasAttr(ArgNo, Attribute::InAlloca); |
614 | } |
615 | |
616 | /// Determine whether this argument is passed by value or in an alloca. |
617 | bool isByValOrInAllocaArgument(unsigned ArgNo) const { |
618 | return paramHasAttr(ArgNo, Attribute::ByVal) || |
619 | paramHasAttr(ArgNo, Attribute::InAlloca); |
620 | } |
621 | |
622 | /// Determine if there are is an inalloca argument. Only the last argument can |
623 | /// have the inalloca attribute. |
624 | bool hasInAllocaArgument() const { |
625 | return !arg_empty() && paramHasAttr(arg_size() - 1, Attribute::InAlloca); |
626 | } |
627 | |
628 | bool doesNotAccessMemory(unsigned OpNo) const { |
629 | return dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone); |
630 | } |
631 | |
632 | bool onlyReadsMemory(unsigned OpNo) const { |
633 | return dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadOnly) || |
634 | dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone); |
635 | } |
636 | |
637 | bool doesNotReadMemory(unsigned OpNo) const { |
638 | return dataOperandHasImpliedAttr(OpNo + 1, Attribute::WriteOnly) || |
639 | dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone); |
640 | } |
641 | |
642 | /// Return true if the return value is known to be not null. |
643 | /// This may be because it has the nonnull attribute, or because at least |
644 | /// one byte is dereferenceable and the pointer is in addrspace(0). |
645 | bool isReturnNonNull() const { |
646 | if (hasRetAttr(Attribute::NonNull)) |
647 | return true; |
648 | else if (getDereferenceableBytes(AttributeList::ReturnIndex) > 0 && |
649 | !NullPointerIsDefined(getCaller(), |
650 | getType()->getPointerAddressSpace())) |
651 | return true; |
652 | |
653 | return false; |
654 | } |
655 | |
656 | /// Returns true if this CallSite passes the given Value* as an argument to |
657 | /// the called function. |
658 | bool hasArgument(const Value *Arg) const { |
659 | for (arg_iterator AI = this->arg_begin(), E = this->arg_end(); AI != E; |
660 | ++AI) |
661 | if (AI->get() == Arg) |
662 | return true; |
663 | return false; |
664 | } |
665 | |
666 | private: |
667 | IterTy getCallee() const { |
668 | return cast<CallBase>(getInstruction())->op_end() - 1; |
669 | } |
670 | }; |
671 | |
672 | class CallSite : public CallSiteBase<Function, BasicBlock, Value, User, Use, |
673 | Instruction, CallInst, InvokeInst, |
674 | CallBrInst, User::op_iterator> { |
675 | public: |
676 | CallSite() = default; |
677 | CallSite(CallSiteBase B) : CallSiteBase(B) {} |
678 | CallSite(CallInst *CI) : CallSiteBase(CI) {} |
679 | CallSite(InvokeInst *II) : CallSiteBase(II) {} |
680 | CallSite(CallBrInst *CBI) : CallSiteBase(CBI) {} |
681 | explicit CallSite(Instruction *II) : CallSiteBase(II) {} |
682 | explicit CallSite(Value *V) : CallSiteBase(V) {} |
683 | |
684 | bool operator==(const CallSite &CS) const { return I == CS.I; } |
685 | bool operator!=(const CallSite &CS) const { return I != CS.I; } |
686 | bool operator<(const CallSite &CS) const { |
687 | return getInstruction() < CS.getInstruction(); |
688 | } |
689 | |
690 | private: |
691 | friend struct DenseMapInfo<CallSite>; |
692 | |
693 | User::op_iterator getCallee() const; |
694 | }; |
695 | |
696 | /// Establish a view to a call site for examination. |
697 | class ImmutableCallSite : public CallSiteBase<> { |
698 | public: |
699 | ImmutableCallSite() = default; |
700 | ImmutableCallSite(const CallInst *CI) : CallSiteBase(CI) {} |
701 | ImmutableCallSite(const InvokeInst *II) : CallSiteBase(II) {} |
702 | ImmutableCallSite(const CallBrInst *CBI) : CallSiteBase(CBI) {} |
703 | explicit ImmutableCallSite(const Instruction *II) : CallSiteBase(II) {} |
704 | explicit ImmutableCallSite(const Value *V) : CallSiteBase(V) {} |
705 | ImmutableCallSite(CallSite CS) : CallSiteBase(CS.getInstruction()) {} |
706 | }; |
707 | |
708 | /// AbstractCallSite |
709 | /// |
710 | /// An abstract call site is a wrapper that allows to treat direct, |
711 | /// indirect, and callback calls the same. If an abstract call site |
712 | /// represents a direct or indirect call site it behaves like a stripped |
713 | /// down version of a normal call site object. The abstract call site can |
714 | /// also represent a callback call, thus the fact that the initially |
715 | /// called function (=broker) may invoke a third one (=callback callee). |
716 | /// In this case, the abstract call site hides the middle man, hence the |
717 | /// broker function. The result is a representation of the callback call, |
718 | /// inside the broker, but in the context of the original call to the broker. |
719 | /// |
720 | /// There are up to three functions involved when we talk about callback call |
721 | /// sites. The caller (1), which invokes the broker function. The broker |
722 | /// function (2), that will invoke the callee zero or more times. And finally |
723 | /// the callee (3), which is the target of the callback call. |
724 | /// |
725 | /// The abstract call site will handle the mapping from parameters to arguments |
726 | /// depending on the semantic of the broker function. However, it is important |
727 | /// to note that the mapping is often partial. Thus, some arguments of the |
728 | /// call/invoke instruction are mapped to parameters of the callee while others |
729 | /// are not. |
730 | class AbstractCallSite { |
731 | public: |
732 | |
733 | /// The encoding of a callback with regards to the underlying instruction. |
734 | struct CallbackInfo { |
735 | |
736 | /// For direct/indirect calls the parameter encoding is empty. If it is not, |
737 | /// the abstract call site represents a callback. In that case, the first |
738 | /// element of the encoding vector represents which argument of the call |
739 | /// site CS is the callback callee. The remaining elements map parameters |
740 | /// (identified by their position) to the arguments that will be passed |
741 | /// through (also identified by position but in the call site instruction). |
742 | /// |
743 | /// NOTE that we use LLVM argument numbers (starting at 0) and not |
744 | /// clang/source argument numbers (starting at 1). The -1 entries represent |
745 | /// unknown values that are passed to the callee. |
746 | using ParameterEncodingTy = SmallVector<int, 0>; |
747 | ParameterEncodingTy ParameterEncoding; |
748 | |
749 | }; |
750 | |
751 | private: |
752 | |
753 | /// The underlying call site: |
754 | /// caller -> callee, if this is a direct or indirect call site |
755 | /// caller -> broker function, if this is a callback call site |
756 | CallSite CS; |
757 | |
758 | /// The encoding of a callback with regards to the underlying instruction. |
759 | CallbackInfo CI; |
760 | |
761 | public: |
762 | /// Sole constructor for abstract call sites (ACS). |
763 | /// |
764 | /// An abstract call site can only be constructed through a llvm::Use because |
765 | /// each operand (=use) of an instruction could potentially be a different |
766 | /// abstract call site. Furthermore, even if the value of the llvm::Use is the |
767 | /// same, and the user is as well, the abstract call sites might not be. |
768 | /// |
769 | /// If a use is not associated with an abstract call site the constructed ACS |
770 | /// will evaluate to false if converted to a boolean. |
771 | /// |
772 | /// If the use is the callee use of a call or invoke instruction, the |
773 | /// constructed abstract call site will behave as a llvm::CallSite would. |
774 | /// |
775 | /// If the use is not a callee use of a call or invoke instruction, the |
776 | /// callback metadata is used to determine the argument <-> parameter mapping |
777 | /// as well as the callee of the abstract call site. |
778 | AbstractCallSite(const Use *U); |
779 | |
780 | /// Add operand uses of \p ICS that represent callback uses into \p CBUses. |
781 | /// |
782 | /// All uses added to \p CBUses can be used to create abstract call sites for |
783 | /// which AbstractCallSite::isCallbackCall() will return true. |
784 | static void getCallbackUses(ImmutableCallSite ICS, |
785 | SmallVectorImpl<const Use *> &CBUses); |
786 | |
787 | /// Conversion operator to conveniently check for a valid/initialized ACS. |
788 | explicit operator bool() const { return (bool)CS; } |
789 | |
790 | /// Return the underlying instruction. |
791 | Instruction *getInstruction() const { return CS.getInstruction(); } |
792 | |
793 | /// Return the call site abstraction for the underlying instruction. |
794 | CallSite getCallSite() const { return CS; } |
795 | |
796 | /// Return true if this ACS represents a direct call. |
797 | bool isDirectCall() const { |
798 | return !isCallbackCall() && !CS.isIndirectCall(); |
799 | } |
800 | |
801 | /// Return true if this ACS represents an indirect call. |
802 | bool isIndirectCall() const { |
803 | return !isCallbackCall() && CS.isIndirectCall(); |
804 | } |
805 | |
806 | /// Return true if this ACS represents a callback call. |
807 | bool isCallbackCall() const { |
808 | // For a callback call site the callee is ALWAYS stored first in the |
809 | // transitive values vector. Thus, a non-empty vector indicates a callback. |
810 | return !CI.ParameterEncoding.empty(); |
811 | } |
812 | |
813 | /// Return true if @p UI is the use that defines the callee of this ACS. |
814 | bool isCallee(Value::const_user_iterator UI) const { |
815 | return isCallee(&UI.getUse()); |
816 | } |
817 | |
818 | /// Return true if @p U is the use that defines the callee of this ACS. |
819 | bool isCallee(const Use *U) const { |
820 | if (isDirectCall()) |
821 | return CS.isCallee(U); |
822 | |
823 | assert(!CI.ParameterEncoding.empty() &&((!CI.ParameterEncoding.empty() && "Callback without parameter encoding!" ) ? static_cast<void> (0) : __assert_fail ("!CI.ParameterEncoding.empty() && \"Callback without parameter encoding!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h" , 824, __PRETTY_FUNCTION__)) |
824 | "Callback without parameter encoding!")((!CI.ParameterEncoding.empty() && "Callback without parameter encoding!" ) ? static_cast<void> (0) : __assert_fail ("!CI.ParameterEncoding.empty() && \"Callback without parameter encoding!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h" , 824, __PRETTY_FUNCTION__)); |
825 | |
826 | return (int)CS.getArgumentNo(U) == CI.ParameterEncoding[0]; |
827 | } |
828 | |
829 | /// Return the number of parameters of the callee. |
830 | unsigned getNumArgOperands() const { |
831 | if (isDirectCall()) |
832 | return CS.getNumArgOperands(); |
833 | // Subtract 1 for the callee encoding. |
834 | return CI.ParameterEncoding.size() - 1; |
835 | } |
836 | |
837 | /// Return the operand index of the underlying instruction associated with @p |
838 | /// Arg. |
839 | int getCallArgOperandNo(Argument &Arg) const { |
840 | return getCallArgOperandNo(Arg.getArgNo()); |
841 | } |
842 | |
843 | /// Return the operand index of the underlying instruction associated with |
844 | /// the function parameter number @p ArgNo or -1 if there is none. |
845 | int getCallArgOperandNo(unsigned ArgNo) const { |
846 | if (isDirectCall()) |
847 | return ArgNo; |
848 | // Add 1 for the callee encoding. |
849 | return CI.ParameterEncoding[ArgNo + 1]; |
850 | } |
851 | |
852 | /// Return the operand of the underlying instruction associated with @p Arg. |
853 | Value *getCallArgOperand(Argument &Arg) const { |
854 | return getCallArgOperand(Arg.getArgNo()); |
855 | } |
856 | |
857 | /// Return the operand of the underlying instruction associated with the |
858 | /// function parameter number @p ArgNo or nullptr if there is none. |
859 | Value *getCallArgOperand(unsigned ArgNo) const { |
860 | if (isDirectCall()) |
861 | return CS.getArgOperand(ArgNo); |
862 | // Add 1 for the callee encoding. |
863 | return CI.ParameterEncoding[ArgNo + 1] >= 0 |
864 | ? CS.getArgOperand(CI.ParameterEncoding[ArgNo + 1]) |
865 | : nullptr; |
866 | } |
867 | |
868 | /// Return the operand index of the underlying instruction associated with the |
869 | /// callee of this ACS. Only valid for callback calls! |
870 | int getCallArgOperandNoForCallee() const { |
871 | assert(isCallbackCall())((isCallbackCall()) ? static_cast<void> (0) : __assert_fail ("isCallbackCall()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h" , 871, __PRETTY_FUNCTION__)); |
872 | assert(CI.ParameterEncoding.size() && CI.ParameterEncoding[0] >= 0)((CI.ParameterEncoding.size() && CI.ParameterEncoding [0] >= 0) ? static_cast<void> (0) : __assert_fail ("CI.ParameterEncoding.size() && CI.ParameterEncoding[0] >= 0" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h" , 872, __PRETTY_FUNCTION__)); |
873 | return CI.ParameterEncoding[0]; |
874 | } |
875 | |
876 | /// Return the use of the callee value in the underlying instruction. Only |
877 | /// valid for callback calls! |
878 | const Use &getCalleeUseForCallback() const { |
879 | int CalleeArgIdx = getCallArgOperandNoForCallee(); |
880 | assert(CalleeArgIdx >= 0 &&((CalleeArgIdx >= 0 && unsigned(CalleeArgIdx) < getInstruction()->getNumOperands()) ? static_cast<void > (0) : __assert_fail ("CalleeArgIdx >= 0 && unsigned(CalleeArgIdx) < getInstruction()->getNumOperands()" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h" , 881, __PRETTY_FUNCTION__)) |
881 | unsigned(CalleeArgIdx) < getInstruction()->getNumOperands())((CalleeArgIdx >= 0 && unsigned(CalleeArgIdx) < getInstruction()->getNumOperands()) ? static_cast<void > (0) : __assert_fail ("CalleeArgIdx >= 0 && unsigned(CalleeArgIdx) < getInstruction()->getNumOperands()" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h" , 881, __PRETTY_FUNCTION__)); |
882 | return getInstruction()->getOperandUse(CalleeArgIdx); |
883 | } |
884 | |
885 | /// Return the pointer to function that is being called. |
886 | Value *getCalledValue() const { |
887 | if (isDirectCall()) |
888 | return CS.getCalledValue(); |
889 | return CS.getArgOperand(getCallArgOperandNoForCallee()); |
890 | } |
891 | |
892 | /// Return the function being called if this is a direct call, otherwise |
893 | /// return null (if it's an indirect call). |
894 | Function *getCalledFunction() const { |
895 | Value *V = getCalledValue(); |
896 | return V ? dyn_cast<Function>(V->stripPointerCasts()) : nullptr; |
897 | } |
898 | }; |
899 | |
900 | template <> struct DenseMapInfo<CallSite> { |
901 | using BaseInfo = DenseMapInfo<decltype(CallSite::I)>; |
902 | |
903 | static CallSite getEmptyKey() { |
904 | CallSite CS; |
905 | CS.I = BaseInfo::getEmptyKey(); |
906 | return CS; |
907 | } |
908 | |
909 | static CallSite getTombstoneKey() { |
910 | CallSite CS; |
911 | CS.I = BaseInfo::getTombstoneKey(); |
912 | return CS; |
913 | } |
914 | |
915 | static unsigned getHashValue(const CallSite &CS) { |
916 | return BaseInfo::getHashValue(CS.I); |
917 | } |
918 | |
919 | static bool isEqual(const CallSite &LHS, const CallSite &RHS) { |
920 | return LHS == RHS; |
921 | } |
922 | }; |
923 | |
924 | } // end namespace llvm |
925 | |
926 | #endif // LLVM_IR_CALLSITE_H |
1 | //===- llvm/ADT/PointerIntPair.h - Pair for pointer and int -----*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the PointerIntPair class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_ADT_POINTERINTPAIR_H |
14 | #define LLVM_ADT_POINTERINTPAIR_H |
15 | |
16 | #include "llvm/Support/Compiler.h" |
17 | #include "llvm/Support/PointerLikeTypeTraits.h" |
18 | #include "llvm/Support/type_traits.h" |
19 | #include <cassert> |
20 | #include <cstdint> |
21 | #include <limits> |
22 | |
23 | namespace llvm { |
24 | |
25 | template <typename T> struct DenseMapInfo; |
26 | template <typename PointerT, unsigned IntBits, typename PtrTraits> |
27 | struct PointerIntPairInfo; |
28 | |
29 | /// PointerIntPair - This class implements a pair of a pointer and small |
30 | /// integer. It is designed to represent this in the space required by one |
31 | /// pointer by bitmangling the integer into the low part of the pointer. This |
32 | /// can only be done for small integers: typically up to 3 bits, but it depends |
33 | /// on the number of bits available according to PointerLikeTypeTraits for the |
34 | /// type. |
35 | /// |
36 | /// Note that PointerIntPair always puts the IntVal part in the highest bits |
37 | /// possible. For example, PointerIntPair<void*, 1, bool> will put the bit for |
38 | /// the bool into bit #2, not bit #0, which allows the low two bits to be used |
39 | /// for something else. For example, this allows: |
40 | /// PointerIntPair<PointerIntPair<void*, 1, bool>, 1, bool> |
41 | /// ... and the two bools will land in different bits. |
42 | template <typename PointerTy, unsigned IntBits, typename IntType = unsigned, |
43 | typename PtrTraits = PointerLikeTypeTraits<PointerTy>, |
44 | typename Info = PointerIntPairInfo<PointerTy, IntBits, PtrTraits>> |
45 | class PointerIntPair { |
46 | // Used by MSVC visualizer and generally helpful for debugging/visualizing. |
47 | using InfoTy = Info; |
48 | intptr_t Value = 0; |
49 | |
50 | public: |
51 | constexpr PointerIntPair() = default; |
52 | |
53 | PointerIntPair(PointerTy PtrVal, IntType IntVal) { |
54 | setPointerAndInt(PtrVal, IntVal); |
55 | } |
56 | |
57 | explicit PointerIntPair(PointerTy PtrVal) { initWithPointer(PtrVal); } |
58 | |
59 | PointerTy getPointer() const { return Info::getPointer(Value); } |
60 | |
61 | IntType getInt() const { return (IntType)Info::getInt(Value); } |
62 | |
63 | void setPointer(PointerTy PtrVal) LLVM_LVALUE_FUNCTION& { |
64 | Value = Info::updatePointer(Value, PtrVal); |
65 | } |
66 | |
67 | void setInt(IntType IntVal) LLVM_LVALUE_FUNCTION& { |
68 | Value = Info::updateInt(Value, static_cast<intptr_t>(IntVal)); |
69 | } |
70 | |
71 | void initWithPointer(PointerTy PtrVal) LLVM_LVALUE_FUNCTION& { |
72 | Value = Info::updatePointer(0, PtrVal); |
73 | } |
74 | |
75 | void setPointerAndInt(PointerTy PtrVal, IntType IntVal) LLVM_LVALUE_FUNCTION& { |
76 | Value = Info::updateInt(Info::updatePointer(0, PtrVal), |
77 | static_cast<intptr_t>(IntVal)); |
78 | } |
79 | |
80 | PointerTy const *getAddrOfPointer() const { |
81 | return const_cast<PointerIntPair *>(this)->getAddrOfPointer(); |
82 | } |
83 | |
84 | PointerTy *getAddrOfPointer() { |
85 | assert(Value == reinterpret_cast<intptr_t>(getPointer()) &&((Value == reinterpret_cast<intptr_t>(getPointer()) && "Can only return the address if IntBits is cleared and " "PtrTraits doesn't change the pointer" ) ? static_cast<void> (0) : __assert_fail ("Value == reinterpret_cast<intptr_t>(getPointer()) && \"Can only return the address if IntBits is cleared and \" \"PtrTraits doesn't change the pointer\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/PointerIntPair.h" , 87, __PRETTY_FUNCTION__)) |
86 | "Can only return the address if IntBits is cleared and "((Value == reinterpret_cast<intptr_t>(getPointer()) && "Can only return the address if IntBits is cleared and " "PtrTraits doesn't change the pointer" ) ? static_cast<void> (0) : __assert_fail ("Value == reinterpret_cast<intptr_t>(getPointer()) && \"Can only return the address if IntBits is cleared and \" \"PtrTraits doesn't change the pointer\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/PointerIntPair.h" , 87, __PRETTY_FUNCTION__)) |
87 | "PtrTraits doesn't change the pointer")((Value == reinterpret_cast<intptr_t>(getPointer()) && "Can only return the address if IntBits is cleared and " "PtrTraits doesn't change the pointer" ) ? static_cast<void> (0) : __assert_fail ("Value == reinterpret_cast<intptr_t>(getPointer()) && \"Can only return the address if IntBits is cleared and \" \"PtrTraits doesn't change the pointer\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/PointerIntPair.h" , 87, __PRETTY_FUNCTION__)); |
88 | return reinterpret_cast<PointerTy *>(&Value); |
89 | } |
90 | |
91 | void *getOpaqueValue() const { return reinterpret_cast<void *>(Value); } |
92 | |
93 | void setFromOpaqueValue(void *Val) LLVM_LVALUE_FUNCTION& { |
94 | Value = reinterpret_cast<intptr_t>(Val); |
95 | } |
96 | |
97 | static PointerIntPair getFromOpaqueValue(void *V) { |
98 | PointerIntPair P; |
99 | P.setFromOpaqueValue(V); |
100 | return P; |
101 | } |
102 | |
103 | // Allow PointerIntPairs to be created from const void * if and only if the |
104 | // pointer type could be created from a const void *. |
105 | static PointerIntPair getFromOpaqueValue(const void *V) { |
106 | (void)PtrTraits::getFromVoidPointer(V); |
107 | return getFromOpaqueValue(const_cast<void *>(V)); |
108 | } |
109 | |
110 | bool operator==(const PointerIntPair &RHS) const { |
111 | return Value == RHS.Value; |
112 | } |
113 | |
114 | bool operator!=(const PointerIntPair &RHS) const { |
115 | return Value != RHS.Value; |
116 | } |
117 | |
118 | bool operator<(const PointerIntPair &RHS) const { return Value < RHS.Value; } |
119 | bool operator>(const PointerIntPair &RHS) const { return Value > RHS.Value; } |
120 | |
121 | bool operator<=(const PointerIntPair &RHS) const { |
122 | return Value <= RHS.Value; |
123 | } |
124 | |
125 | bool operator>=(const PointerIntPair &RHS) const { |
126 | return Value >= RHS.Value; |
127 | } |
128 | }; |
129 | |
130 | // Specialize is_trivially_copyable to avoid limitation of llvm::is_trivially_copyable |
131 | // when compiled with gcc 4.9. |
132 | template <typename PointerTy, unsigned IntBits, typename IntType, |
133 | typename PtrTraits, |
134 | typename Info> |
135 | struct is_trivially_copyable<PointerIntPair<PointerTy, IntBits, IntType, PtrTraits, Info>> : std::true_type { |
136 | #ifdef HAVE_STD_IS_TRIVIALLY_COPYABLE |
137 | static_assert(std::is_trivially_copyable<PointerIntPair<PointerTy, IntBits, IntType, PtrTraits, Info>>::value, |
138 | "inconsistent behavior between llvm:: and std:: implementation of is_trivially_copyable"); |
139 | #endif |
140 | }; |
141 | |
142 | |
143 | template <typename PointerT, unsigned IntBits, typename PtrTraits> |
144 | struct PointerIntPairInfo { |
145 | static_assert(PtrTraits::NumLowBitsAvailable < |
146 | std::numeric_limits<uintptr_t>::digits, |
147 | "cannot use a pointer type that has all bits free"); |
148 | static_assert(IntBits <= PtrTraits::NumLowBitsAvailable, |
149 | "PointerIntPair with integer size too large for pointer"); |
150 | enum MaskAndShiftConstants : uintptr_t { |
151 | /// PointerBitMask - The bits that come from the pointer. |
152 | PointerBitMask = |
153 | ~(uintptr_t)(((intptr_t)1 << PtrTraits::NumLowBitsAvailable) - 1), |
154 | |
155 | /// IntShift - The number of low bits that we reserve for other uses, and |
156 | /// keep zero. |
157 | IntShift = (uintptr_t)PtrTraits::NumLowBitsAvailable - IntBits, |
158 | |
159 | /// IntMask - This is the unshifted mask for valid bits of the int type. |
160 | IntMask = (uintptr_t)(((intptr_t)1 << IntBits) - 1), |
161 | |
162 | // ShiftedIntMask - This is the bits for the integer shifted in place. |
163 | ShiftedIntMask = (uintptr_t)(IntMask << IntShift) |
164 | }; |
165 | |
166 | static PointerT getPointer(intptr_t Value) { |
167 | return PtrTraits::getFromVoidPointer( |
168 | reinterpret_cast<void *>(Value & PointerBitMask)); |
169 | } |
170 | |
171 | static intptr_t getInt(intptr_t Value) { |
172 | return (Value >> IntShift) & IntMask; |
173 | } |
174 | |
175 | static intptr_t updatePointer(intptr_t OrigValue, PointerT Ptr) { |
176 | intptr_t PtrWord = |
177 | reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(Ptr)); |
178 | assert((PtrWord & ~PointerBitMask) == 0 &&(((PtrWord & ~PointerBitMask) == 0 && "Pointer is not sufficiently aligned" ) ? static_cast<void> (0) : __assert_fail ("(PtrWord & ~PointerBitMask) == 0 && \"Pointer is not sufficiently aligned\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/PointerIntPair.h" , 179, __PRETTY_FUNCTION__)) |
179 | "Pointer is not sufficiently aligned")(((PtrWord & ~PointerBitMask) == 0 && "Pointer is not sufficiently aligned" ) ? static_cast<void> (0) : __assert_fail ("(PtrWord & ~PointerBitMask) == 0 && \"Pointer is not sufficiently aligned\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/PointerIntPair.h" , 179, __PRETTY_FUNCTION__)); |
180 | // Preserve all low bits, just update the pointer. |
181 | return PtrWord | (OrigValue & ~PointerBitMask); |
182 | } |
183 | |
184 | static intptr_t updateInt(intptr_t OrigValue, intptr_t Int) { |
185 | intptr_t IntWord = static_cast<intptr_t>(Int); |
186 | assert((IntWord & ~IntMask) == 0 && "Integer too large for field")(((IntWord & ~IntMask) == 0 && "Integer too large for field" ) ? static_cast<void> (0) : __assert_fail ("(IntWord & ~IntMask) == 0 && \"Integer too large for field\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/PointerIntPair.h" , 186, __PRETTY_FUNCTION__)); |
187 | |
188 | // Preserve all bits other than the ones we are updating. |
189 | return (OrigValue & ~ShiftedIntMask) | IntWord << IntShift; |
190 | } |
191 | }; |
192 | |
193 | // Provide specialization of DenseMapInfo for PointerIntPair. |
194 | template <typename PointerTy, unsigned IntBits, typename IntType> |
195 | struct DenseMapInfo<PointerIntPair<PointerTy, IntBits, IntType>> { |
196 | using Ty = PointerIntPair<PointerTy, IntBits, IntType>; |
197 | |
198 | static Ty getEmptyKey() { |
199 | uintptr_t Val = static_cast<uintptr_t>(-1); |
200 | Val <<= PointerLikeTypeTraits<Ty>::NumLowBitsAvailable; |
201 | return Ty::getFromOpaqueValue(reinterpret_cast<void *>(Val)); |
202 | } |
203 | |
204 | static Ty getTombstoneKey() { |
205 | uintptr_t Val = static_cast<uintptr_t>(-2); |
206 | Val <<= PointerLikeTypeTraits<PointerTy>::NumLowBitsAvailable; |
207 | return Ty::getFromOpaqueValue(reinterpret_cast<void *>(Val)); |
208 | } |
209 | |
210 | static unsigned getHashValue(Ty V) { |
211 | uintptr_t IV = reinterpret_cast<uintptr_t>(V.getOpaqueValue()); |
212 | return unsigned(IV) ^ unsigned(IV >> 9); |
213 | } |
214 | |
215 | static bool isEqual(const Ty &LHS, const Ty &RHS) { return LHS == RHS; } |
216 | }; |
217 | |
218 | // Teach SmallPtrSet that PointerIntPair is "basically a pointer". |
219 | template <typename PointerTy, unsigned IntBits, typename IntType, |
220 | typename PtrTraits> |
221 | struct PointerLikeTypeTraits< |
222 | PointerIntPair<PointerTy, IntBits, IntType, PtrTraits>> { |
223 | static inline void * |
224 | getAsVoidPointer(const PointerIntPair<PointerTy, IntBits, IntType> &P) { |
225 | return P.getOpaqueValue(); |
226 | } |
227 | |
228 | static inline PointerIntPair<PointerTy, IntBits, IntType> |
229 | getFromVoidPointer(void *P) { |
230 | return PointerIntPair<PointerTy, IntBits, IntType>::getFromOpaqueValue(P); |
231 | } |
232 | |
233 | static inline PointerIntPair<PointerTy, IntBits, IntType> |
234 | getFromVoidPointer(const void *P) { |
235 | return PointerIntPair<PointerTy, IntBits, IntType>::getFromOpaqueValue(P); |
236 | } |
237 | |
238 | static constexpr int NumLowBitsAvailable = |
239 | PtrTraits::NumLowBitsAvailable - IntBits; |
240 | }; |
241 | |
242 | } // end namespace llvm |
243 | |
244 | #endif // LLVM_ADT_POINTERINTPAIR_H |
1 | //===- llvm/Support/PointerLikeTypeTraits.h - Pointer Traits ----*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the PointerLikeTypeTraits class. This allows data |
10 | // structures to reason about pointers and other things that are pointer sized. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_SUPPORT_POINTERLIKETYPETRAITS_H |
15 | #define LLVM_SUPPORT_POINTERLIKETYPETRAITS_H |
16 | |
17 | #include "llvm/Support/DataTypes.h" |
18 | #include <assert.h> |
19 | #include <type_traits> |
20 | |
21 | namespace llvm { |
22 | |
23 | /// A traits type that is used to handle pointer types and things that are just |
24 | /// wrappers for pointers as a uniform entity. |
25 | template <typename T> struct PointerLikeTypeTraits; |
26 | |
27 | namespace detail { |
28 | /// A tiny meta function to compute the log2 of a compile time constant. |
29 | template <size_t N> |
30 | struct ConstantLog2 |
31 | : std::integral_constant<size_t, ConstantLog2<N / 2>::value + 1> {}; |
32 | template <> struct ConstantLog2<1> : std::integral_constant<size_t, 0> {}; |
33 | |
34 | // Provide a trait to check if T is pointer-like. |
35 | template <typename T, typename U = void> struct HasPointerLikeTypeTraits { |
36 | static const bool value = false; |
37 | }; |
38 | |
39 | // sizeof(T) is valid only for a complete T. |
40 | template <typename T> struct HasPointerLikeTypeTraits< |
41 | T, decltype((sizeof(PointerLikeTypeTraits<T>) + sizeof(T)), void())> { |
42 | static const bool value = true; |
43 | }; |
44 | |
45 | template <typename T> struct IsPointerLike { |
46 | static const bool value = HasPointerLikeTypeTraits<T>::value; |
47 | }; |
48 | |
49 | template <typename T> struct IsPointerLike<T *> { |
50 | static const bool value = true; |
51 | }; |
52 | } // namespace detail |
53 | |
54 | // Provide PointerLikeTypeTraits for non-cvr pointers. |
55 | template <typename T> struct PointerLikeTypeTraits<T *> { |
56 | static inline void *getAsVoidPointer(T *P) { return P; } |
57 | static inline T *getFromVoidPointer(void *P) { return static_cast<T *>(P); } |
58 | |
59 | static constexpr int NumLowBitsAvailable = |
60 | detail::ConstantLog2<alignof(T)>::value; |
61 | }; |
62 | |
63 | template <> struct PointerLikeTypeTraits<void *> { |
64 | static inline void *getAsVoidPointer(void *P) { return P; } |
65 | static inline void *getFromVoidPointer(void *P) { return P; } |
66 | |
67 | /// Note, we assume here that void* is related to raw malloc'ed memory and |
68 | /// that malloc returns objects at least 4-byte aligned. However, this may be |
69 | /// wrong, or pointers may be from something other than malloc. In this case, |
70 | /// you should specify a real typed pointer or avoid this template. |
71 | /// |
72 | /// All clients should use assertions to do a run-time check to ensure that |
73 | /// this is actually true. |
74 | static constexpr int NumLowBitsAvailable = 2; |
75 | }; |
76 | |
77 | // Provide PointerLikeTypeTraits for const things. |
78 | template <typename T> struct PointerLikeTypeTraits<const T> { |
79 | typedef PointerLikeTypeTraits<T> NonConst; |
80 | |
81 | static inline const void *getAsVoidPointer(const T P) { |
82 | return NonConst::getAsVoidPointer(P); |
83 | } |
84 | static inline const T getFromVoidPointer(const void *P) { |
85 | return NonConst::getFromVoidPointer(const_cast<void *>(P)); |
86 | } |
87 | static constexpr int NumLowBitsAvailable = NonConst::NumLowBitsAvailable; |
88 | }; |
89 | |
90 | // Provide PointerLikeTypeTraits for const pointers. |
91 | template <typename T> struct PointerLikeTypeTraits<const T *> { |
92 | typedef PointerLikeTypeTraits<T *> NonConst; |
93 | |
94 | static inline const void *getAsVoidPointer(const T *P) { |
95 | return NonConst::getAsVoidPointer(const_cast<T *>(P)); |
96 | } |
97 | static inline const T *getFromVoidPointer(const void *P) { |
98 | return NonConst::getFromVoidPointer(const_cast<void *>(P)); |
99 | } |
100 | static constexpr int NumLowBitsAvailable = NonConst::NumLowBitsAvailable; |
101 | }; |
102 | |
103 | // Provide PointerLikeTypeTraits for uintptr_t. |
104 | template <> struct PointerLikeTypeTraits<uintptr_t> { |
105 | static inline void *getAsVoidPointer(uintptr_t P) { |
106 | return reinterpret_cast<void *>(P); |
107 | } |
108 | static inline uintptr_t getFromVoidPointer(void *P) { |
109 | return reinterpret_cast<uintptr_t>(P); |
110 | } |
111 | // No bits are available! |
112 | static constexpr int NumLowBitsAvailable = 0; |
113 | }; |
114 | |
115 | /// Provide suitable custom traits struct for function pointers. |
116 | /// |
117 | /// Function pointers can't be directly given these traits as functions can't |
118 | /// have their alignment computed with `alignof` and we need different casting. |
119 | /// |
120 | /// To rely on higher alignment for a specialized use, you can provide a |
121 | /// customized form of this template explicitly with higher alignment, and |
122 | /// potentially use alignment attributes on functions to satisfy that. |
123 | template <int Alignment, typename FunctionPointerT> |
124 | struct FunctionPointerLikeTypeTraits { |
125 | static constexpr int NumLowBitsAvailable = |
126 | detail::ConstantLog2<Alignment>::value; |
127 | static inline void *getAsVoidPointer(FunctionPointerT P) { |
128 | assert((reinterpret_cast<uintptr_t>(P) &(((reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 && "Alignment not satisfied for an actual function pointer!" ) ? static_cast<void> (0) : __assert_fail ("(reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 && \"Alignment not satisfied for an actual function pointer!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/PointerLikeTypeTraits.h" , 130, __PRETTY_FUNCTION__)) |
129 | ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 &&(((reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 && "Alignment not satisfied for an actual function pointer!" ) ? static_cast<void> (0) : __assert_fail ("(reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 && \"Alignment not satisfied for an actual function pointer!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/PointerLikeTypeTraits.h" , 130, __PRETTY_FUNCTION__)) |
130 | "Alignment not satisfied for an actual function pointer!")(((reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 && "Alignment not satisfied for an actual function pointer!" ) ? static_cast<void> (0) : __assert_fail ("(reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 && \"Alignment not satisfied for an actual function pointer!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/PointerLikeTypeTraits.h" , 130, __PRETTY_FUNCTION__)); |
131 | return reinterpret_cast<void *>(P); |
132 | } |
133 | static inline FunctionPointerT getFromVoidPointer(void *P) { |
134 | return reinterpret_cast<FunctionPointerT>(P); |
135 | } |
136 | }; |
137 | |
138 | /// Provide a default specialization for function pointers that assumes 4-byte |
139 | /// alignment. |
140 | /// |
141 | /// We assume here that functions used with this are always at least 4-byte |
142 | /// aligned. This means that, for example, thumb functions won't work or systems |
143 | /// with weird unaligned function pointers won't work. But all practical systems |
144 | /// we support satisfy this requirement. |
145 | template <typename ReturnT, typename... ParamTs> |
146 | struct PointerLikeTypeTraits<ReturnT (*)(ParamTs...)> |
147 | : FunctionPointerLikeTypeTraits<4, ReturnT (*)(ParamTs...)> {}; |
148 | |
149 | } // end namespace llvm |
150 | |
151 | #endif |
1 | //===-- llvm/Operator.h - Operator utility subclass -------------*- C++ -*-===// | ||||||||||||||
2 | // | ||||||||||||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||||||||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||||||||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||||||||||
6 | // | ||||||||||||||
7 | //===----------------------------------------------------------------------===// | ||||||||||||||
8 | // | ||||||||||||||
9 | // This file defines various classes for working with Instructions and | ||||||||||||||
10 | // ConstantExprs. | ||||||||||||||
11 | // | ||||||||||||||
12 | //===----------------------------------------------------------------------===// | ||||||||||||||
13 | |||||||||||||||
14 | #ifndef LLVM_IR_OPERATOR_H | ||||||||||||||
15 | #define LLVM_IR_OPERATOR_H | ||||||||||||||
16 | |||||||||||||||
17 | #include "llvm/ADT/None.h" | ||||||||||||||
18 | #include "llvm/ADT/Optional.h" | ||||||||||||||
19 | #include "llvm/IR/Constants.h" | ||||||||||||||
20 | #include "llvm/IR/Instruction.h" | ||||||||||||||
21 | #include "llvm/IR/Type.h" | ||||||||||||||
22 | #include "llvm/IR/Value.h" | ||||||||||||||
23 | #include "llvm/Support/Casting.h" | ||||||||||||||
24 | #include <cstddef> | ||||||||||||||
25 | |||||||||||||||
26 | namespace llvm { | ||||||||||||||
27 | |||||||||||||||
28 | /// This is a utility class that provides an abstraction for the common | ||||||||||||||
29 | /// functionality between Instructions and ConstantExprs. | ||||||||||||||
30 | class Operator : public User { | ||||||||||||||
31 | public: | ||||||||||||||
32 | // The Operator class is intended to be used as a utility, and is never itself | ||||||||||||||
33 | // instantiated. | ||||||||||||||
34 | Operator() = delete; | ||||||||||||||
35 | ~Operator() = delete; | ||||||||||||||
36 | |||||||||||||||
37 | void *operator new(size_t s) = delete; | ||||||||||||||
38 | |||||||||||||||
39 | /// Return the opcode for this Instruction or ConstantExpr. | ||||||||||||||
40 | unsigned getOpcode() const { | ||||||||||||||
41 | if (const Instruction *I = dyn_cast<Instruction>(this)) | ||||||||||||||
42 | return I->getOpcode(); | ||||||||||||||
43 | return cast<ConstantExpr>(this)->getOpcode(); | ||||||||||||||
44 | } | ||||||||||||||
45 | |||||||||||||||
46 | /// If V is an Instruction or ConstantExpr, return its opcode. | ||||||||||||||
47 | /// Otherwise return UserOp1. | ||||||||||||||
48 | static unsigned getOpcode(const Value *V) { | ||||||||||||||
49 | if (const Instruction *I
| ||||||||||||||
50 | return I->getOpcode(); | ||||||||||||||
51 | if (const ConstantExpr *CE
| ||||||||||||||
52 | return CE->getOpcode(); | ||||||||||||||
53 | return Instruction::UserOp1; | ||||||||||||||
54 | } | ||||||||||||||
55 | |||||||||||||||
56 | static bool classof(const Instruction *) { return true; } | ||||||||||||||
57 | static bool classof(const ConstantExpr *) { return true; } | ||||||||||||||
58 | static bool classof(const Value *V) { | ||||||||||||||
59 | return isa<Instruction>(V) || isa<ConstantExpr>(V); | ||||||||||||||
60 | } | ||||||||||||||
61 | }; | ||||||||||||||
62 | |||||||||||||||
63 | /// Utility class for integer operators which may exhibit overflow - Add, Sub, | ||||||||||||||
64 | /// Mul, and Shl. It does not include SDiv, despite that operator having the | ||||||||||||||
65 | /// potential for overflow. | ||||||||||||||
66 | class OverflowingBinaryOperator : public Operator { | ||||||||||||||
67 | public: | ||||||||||||||
68 | enum { | ||||||||||||||
69 | AnyWrap = 0, | ||||||||||||||
70 | NoUnsignedWrap = (1 << 0), | ||||||||||||||
71 | NoSignedWrap = (1 << 1) | ||||||||||||||
72 | }; | ||||||||||||||
73 | |||||||||||||||
74 | private: | ||||||||||||||
75 | friend class Instruction; | ||||||||||||||
76 | friend class ConstantExpr; | ||||||||||||||
77 | |||||||||||||||
78 | void setHasNoUnsignedWrap(bool B) { | ||||||||||||||
79 | SubclassOptionalData = | ||||||||||||||
80 | (SubclassOptionalData & ~NoUnsignedWrap) | (B * NoUnsignedWrap); | ||||||||||||||
81 | } | ||||||||||||||
82 | void setHasNoSignedWrap(bool B) { | ||||||||||||||
83 | SubclassOptionalData = | ||||||||||||||
84 | (SubclassOptionalData & ~NoSignedWrap) | (B * NoSignedWrap); | ||||||||||||||
85 | } | ||||||||||||||
86 | |||||||||||||||
87 | public: | ||||||||||||||
88 | /// Test whether this operation is known to never | ||||||||||||||
89 | /// undergo unsigned overflow, aka the nuw property. | ||||||||||||||
90 | bool hasNoUnsignedWrap() const { | ||||||||||||||
91 | return SubclassOptionalData & NoUnsignedWrap; | ||||||||||||||
92 | } | ||||||||||||||
93 | |||||||||||||||
94 | /// Test whether this operation is known to never | ||||||||||||||
95 | /// undergo signed overflow, aka the nsw property. | ||||||||||||||
96 | bool hasNoSignedWrap() const { | ||||||||||||||
97 | return (SubclassOptionalData & NoSignedWrap) != 0; | ||||||||||||||
98 | } | ||||||||||||||
99 | |||||||||||||||
100 | static bool classof(const Instruction *I) { | ||||||||||||||
101 | return I->getOpcode() == Instruction::Add || | ||||||||||||||
102 | I->getOpcode() == Instruction::Sub || | ||||||||||||||
103 | I->getOpcode() == Instruction::Mul || | ||||||||||||||
104 | I->getOpcode() == Instruction::Shl; | ||||||||||||||
105 | } | ||||||||||||||
106 | static bool classof(const ConstantExpr *CE) { | ||||||||||||||
107 | return CE->getOpcode() == Instruction::Add || | ||||||||||||||
108 | CE->getOpcode() == Instruction::Sub || | ||||||||||||||
109 | CE->getOpcode() == Instruction::Mul || | ||||||||||||||
110 | CE->getOpcode() == Instruction::Shl; | ||||||||||||||
111 | } | ||||||||||||||
112 | static bool classof(const Value *V) { | ||||||||||||||
113 | return (isa<Instruction>(V) && classof(cast<Instruction>(V))) || | ||||||||||||||
114 | (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V))); | ||||||||||||||
115 | } | ||||||||||||||
116 | }; | ||||||||||||||
117 | |||||||||||||||
118 | /// A udiv or sdiv instruction, which can be marked as "exact", | ||||||||||||||
119 | /// indicating that no bits are destroyed. | ||||||||||||||
120 | class PossiblyExactOperator : public Operator { | ||||||||||||||
121 | public: | ||||||||||||||
122 | enum { | ||||||||||||||
123 | IsExact = (1 << 0) | ||||||||||||||
124 | }; | ||||||||||||||
125 | |||||||||||||||
126 | private: | ||||||||||||||
127 | friend class Instruction; | ||||||||||||||
128 | friend class ConstantExpr; | ||||||||||||||
129 | |||||||||||||||
130 | void setIsExact(bool B) { | ||||||||||||||
131 | SubclassOptionalData = (SubclassOptionalData & ~IsExact) | (B * IsExact); | ||||||||||||||
132 | } | ||||||||||||||
133 | |||||||||||||||
134 | public: | ||||||||||||||
135 | /// Test whether this division is known to be exact, with zero remainder. | ||||||||||||||
136 | bool isExact() const { | ||||||||||||||
137 | return SubclassOptionalData & IsExact; | ||||||||||||||
138 | } | ||||||||||||||
139 | |||||||||||||||
140 | static bool isPossiblyExactOpcode(unsigned OpC) { | ||||||||||||||
141 | return OpC == Instruction::SDiv || | ||||||||||||||
142 | OpC == Instruction::UDiv || | ||||||||||||||
143 | OpC == Instruction::AShr || | ||||||||||||||
144 | OpC == Instruction::LShr; | ||||||||||||||
145 | } | ||||||||||||||
146 | |||||||||||||||
147 | static bool classof(const ConstantExpr *CE) { | ||||||||||||||
148 | return isPossiblyExactOpcode(CE->getOpcode()); | ||||||||||||||
149 | } | ||||||||||||||
150 | static bool classof(const Instruction *I) { | ||||||||||||||
151 | return isPossiblyExactOpcode(I->getOpcode()); | ||||||||||||||
152 | } | ||||||||||||||
153 | static bool classof(const Value *V) { | ||||||||||||||
154 | return (isa<Instruction>(V) && classof(cast<Instruction>(V))) || | ||||||||||||||
155 | (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V))); | ||||||||||||||
156 | } | ||||||||||||||
157 | }; | ||||||||||||||
158 | |||||||||||||||
159 | /// Convenience struct for specifying and reasoning about fast-math flags. | ||||||||||||||
160 | class FastMathFlags { | ||||||||||||||
161 | private: | ||||||||||||||
162 | friend class FPMathOperator; | ||||||||||||||
163 | |||||||||||||||
164 | unsigned Flags = 0; | ||||||||||||||
165 | |||||||||||||||
166 | FastMathFlags(unsigned F) { | ||||||||||||||
167 | // If all 7 bits are set, turn this into -1. If the number of bits grows, | ||||||||||||||
168 | // this must be updated. This is intended to provide some forward binary | ||||||||||||||
169 | // compatibility insurance for the meaning of 'fast' in case bits are added. | ||||||||||||||
170 | if (F == 0x7F) Flags = ~0U; | ||||||||||||||
171 | else Flags = F; | ||||||||||||||
172 | } | ||||||||||||||
173 | |||||||||||||||
174 | public: | ||||||||||||||
175 | // This is how the bits are used in Value::SubclassOptionalData so they | ||||||||||||||
176 | // should fit there too. | ||||||||||||||
177 | // WARNING: We're out of space. SubclassOptionalData only has 7 bits. New | ||||||||||||||
178 | // functionality will require a change in how this information is stored. | ||||||||||||||
179 | enum { | ||||||||||||||
180 | AllowReassoc = (1 << 0), | ||||||||||||||
181 | NoNaNs = (1 << 1), | ||||||||||||||
182 | NoInfs = (1 << 2), | ||||||||||||||
183 | NoSignedZeros = (1 << 3), | ||||||||||||||
184 | AllowReciprocal = (1 << 4), | ||||||||||||||
185 | AllowContract = (1 << 5), | ||||||||||||||
186 | ApproxFunc = (1 << 6) | ||||||||||||||
187 | }; | ||||||||||||||
188 | |||||||||||||||
189 | FastMathFlags() = default; | ||||||||||||||
190 | |||||||||||||||
191 | static FastMathFlags getFast() { | ||||||||||||||
192 | FastMathFlags FMF; | ||||||||||||||
193 | FMF.setFast(); | ||||||||||||||
194 | return FMF; | ||||||||||||||
195 | } | ||||||||||||||
196 | |||||||||||||||
197 | bool any() const { return Flags != 0; } | ||||||||||||||
198 | bool none() const { return Flags == 0; } | ||||||||||||||
199 | bool all() const { return Flags == ~0U; } | ||||||||||||||
200 | |||||||||||||||
201 | void clear() { Flags = 0; } | ||||||||||||||
202 | void set() { Flags = ~0U; } | ||||||||||||||
203 | |||||||||||||||
204 | /// Flag queries | ||||||||||||||
205 | bool allowReassoc() const { return 0 != (Flags & AllowReassoc); } | ||||||||||||||
206 | bool noNaNs() const { return 0 != (Flags & NoNaNs); } | ||||||||||||||
207 | bool noInfs() const { return 0 != (Flags & NoInfs); } | ||||||||||||||
208 | bool noSignedZeros() const { return 0 != (Flags & NoSignedZeros); } | ||||||||||||||
209 | bool allowReciprocal() const { return 0 != (Flags & AllowReciprocal); } | ||||||||||||||
210 | bool allowContract() const { return 0 != (Flags & AllowContract); } | ||||||||||||||
211 | bool approxFunc() const { return 0 != (Flags & ApproxFunc); } | ||||||||||||||
212 | /// 'Fast' means all bits are set. | ||||||||||||||
213 | bool isFast() const { return all(); } | ||||||||||||||
214 | |||||||||||||||
215 | /// Flag setters | ||||||||||||||
216 | void setAllowReassoc(bool B = true) { | ||||||||||||||
217 | Flags = (Flags & ~AllowReassoc) | B * AllowReassoc; | ||||||||||||||
218 | } | ||||||||||||||
219 | void setNoNaNs(bool B = true) { | ||||||||||||||
220 | Flags = (Flags & ~NoNaNs) | B * NoNaNs; | ||||||||||||||
221 | } | ||||||||||||||
222 | void setNoInfs(bool B = true) { | ||||||||||||||
223 | Flags = (Flags & ~NoInfs) | B * NoInfs; | ||||||||||||||
224 | } | ||||||||||||||
225 | void setNoSignedZeros(bool B = true) { | ||||||||||||||
226 | Flags = (Flags & ~NoSignedZeros) | B * NoSignedZeros; | ||||||||||||||
227 | } | ||||||||||||||
228 | void setAllowReciprocal(bool B = true) { | ||||||||||||||
229 | Flags = (Flags & ~AllowReciprocal) | B * AllowReciprocal; | ||||||||||||||
230 | } | ||||||||||||||
231 | void setAllowContract(bool B = true) { | ||||||||||||||
232 | Flags = (Flags & ~AllowContract) | B * AllowContract; | ||||||||||||||
233 | } | ||||||||||||||
234 | void setApproxFunc(bool B = true) { | ||||||||||||||
235 | Flags = (Flags & ~ApproxFunc) | B * ApproxFunc; | ||||||||||||||
236 | } | ||||||||||||||
237 | void setFast(bool B = true) { B ? set() : clear(); } | ||||||||||||||
238 | |||||||||||||||
239 | void operator&=(const FastMathFlags &OtherFlags) { | ||||||||||||||
240 | Flags &= OtherFlags.Flags; | ||||||||||||||
241 | } | ||||||||||||||
242 | }; | ||||||||||||||
243 | |||||||||||||||
244 | /// Utility class for floating point operations which can have | ||||||||||||||
245 | /// information about relaxed accuracy requirements attached to them. | ||||||||||||||
246 | class FPMathOperator : public Operator { | ||||||||||||||
247 | private: | ||||||||||||||
248 | friend class Instruction; | ||||||||||||||
249 | |||||||||||||||
250 | /// 'Fast' means all bits are set. | ||||||||||||||
251 | void setFast(bool B) { | ||||||||||||||
252 | setHasAllowReassoc(B); | ||||||||||||||
253 | setHasNoNaNs(B); | ||||||||||||||
254 | setHasNoInfs(B); | ||||||||||||||
255 | setHasNoSignedZeros(B); | ||||||||||||||
256 | setHasAllowReciprocal(B); | ||||||||||||||
257 | setHasAllowContract(B); | ||||||||||||||
258 | setHasApproxFunc(B); | ||||||||||||||
259 | } | ||||||||||||||
260 | |||||||||||||||
261 | void setHasAllowReassoc(bool B) { | ||||||||||||||
262 | SubclassOptionalData = | ||||||||||||||
263 | (SubclassOptionalData & ~FastMathFlags::AllowReassoc) | | ||||||||||||||
264 | (B * FastMathFlags::AllowReassoc); | ||||||||||||||
265 | } | ||||||||||||||
266 | |||||||||||||||
267 | void setHasNoNaNs(bool B) { | ||||||||||||||
268 | SubclassOptionalData = | ||||||||||||||
269 | (SubclassOptionalData & ~FastMathFlags::NoNaNs) | | ||||||||||||||
270 | (B * FastMathFlags::NoNaNs); | ||||||||||||||
271 | } | ||||||||||||||
272 | |||||||||||||||
273 | void setHasNoInfs(bool B) { | ||||||||||||||
274 | SubclassOptionalData = | ||||||||||||||
275 | (SubclassOptionalData & ~FastMathFlags::NoInfs) | | ||||||||||||||
276 | (B * FastMathFlags::NoInfs); | ||||||||||||||
277 | } | ||||||||||||||
278 | |||||||||||||||
279 | void setHasNoSignedZeros(bool B) { | ||||||||||||||
280 | SubclassOptionalData = | ||||||||||||||
281 | (SubclassOptionalData & ~FastMathFlags::NoSignedZeros) | | ||||||||||||||
282 | (B * FastMathFlags::NoSignedZeros); | ||||||||||||||
283 | } | ||||||||||||||
284 | |||||||||||||||
285 | void setHasAllowReciprocal(bool B) { | ||||||||||||||
286 | SubclassOptionalData = | ||||||||||||||
287 | (SubclassOptionalData & ~FastMathFlags::AllowReciprocal) | | ||||||||||||||
288 | (B * FastMathFlags::AllowReciprocal); | ||||||||||||||
289 | } | ||||||||||||||
290 | |||||||||||||||
291 | void setHasAllowContract(bool B) { | ||||||||||||||
292 | SubclassOptionalData = | ||||||||||||||
293 | (SubclassOptionalData & ~FastMathFlags::AllowContract) | | ||||||||||||||
294 | (B * FastMathFlags::AllowContract); | ||||||||||||||
295 | } | ||||||||||||||
296 | |||||||||||||||
297 | void setHasApproxFunc(bool B) { | ||||||||||||||
298 | SubclassOptionalData = | ||||||||||||||
299 | (SubclassOptionalData & ~FastMathFlags::ApproxFunc) | | ||||||||||||||
300 | (B * FastMathFlags::ApproxFunc); | ||||||||||||||
301 | } | ||||||||||||||
302 | |||||||||||||||
303 | /// Convenience function for setting multiple fast-math flags. | ||||||||||||||
304 | /// FMF is a mask of the bits to set. | ||||||||||||||
305 | void setFastMathFlags(FastMathFlags FMF) { | ||||||||||||||
306 | SubclassOptionalData |= FMF.Flags; | ||||||||||||||
307 | } | ||||||||||||||
308 | |||||||||||||||
309 | /// Convenience function for copying all fast-math flags. | ||||||||||||||
310 | /// All values in FMF are transferred to this operator. | ||||||||||||||
311 | void copyFastMathFlags(FastMathFlags FMF) { | ||||||||||||||
312 | SubclassOptionalData = FMF.Flags; | ||||||||||||||
313 | } | ||||||||||||||
314 | |||||||||||||||
315 | public: | ||||||||||||||
316 | /// Test if this operation allows all non-strict floating-point transforms. | ||||||||||||||
317 | bool isFast() const { | ||||||||||||||
318 | return ((SubclassOptionalData & FastMathFlags::AllowReassoc) != 0 && | ||||||||||||||
319 | (SubclassOptionalData & FastMathFlags::NoNaNs) != 0 && | ||||||||||||||
320 | (SubclassOptionalData & FastMathFlags::NoInfs) != 0 && | ||||||||||||||
321 | (SubclassOptionalData & FastMathFlags::NoSignedZeros) != 0 && | ||||||||||||||
322 | (SubclassOptionalData & FastMathFlags::AllowReciprocal) != 0 && | ||||||||||||||
323 | (SubclassOptionalData & FastMathFlags::AllowContract) != 0 && | ||||||||||||||
324 | (SubclassOptionalData & FastMathFlags::ApproxFunc) != 0); | ||||||||||||||
325 | } | ||||||||||||||
326 | |||||||||||||||
327 | /// Test if this operation may be simplified with reassociative transforms. | ||||||||||||||
328 | bool hasAllowReassoc() const { | ||||||||||||||
329 | return (SubclassOptionalData & FastMathFlags::AllowReassoc) != 0; | ||||||||||||||
330 | } | ||||||||||||||
331 | |||||||||||||||
332 | /// Test if this operation's arguments and results are assumed not-NaN. | ||||||||||||||
333 | bool hasNoNaNs() const { | ||||||||||||||
334 | return (SubclassOptionalData & FastMathFlags::NoNaNs) != 0; | ||||||||||||||
335 | } | ||||||||||||||
336 | |||||||||||||||
337 | /// Test if this operation's arguments and results are assumed not-infinite. | ||||||||||||||
338 | bool hasNoInfs() const { | ||||||||||||||
339 | return (SubclassOptionalData & FastMathFlags::NoInfs) != 0; | ||||||||||||||
340 | } | ||||||||||||||
341 | |||||||||||||||
342 | /// Test if this operation can ignore the sign of zero. | ||||||||||||||
343 | bool hasNoSignedZeros() const { | ||||||||||||||
344 | return (SubclassOptionalData & FastMathFlags::NoSignedZeros) != 0; | ||||||||||||||
345 | } | ||||||||||||||
346 | |||||||||||||||
347 | /// Test if this operation can use reciprocal multiply instead of division. | ||||||||||||||
348 | bool hasAllowReciprocal() const { | ||||||||||||||
349 | return (SubclassOptionalData & FastMathFlags::AllowReciprocal) != 0; | ||||||||||||||
350 | } | ||||||||||||||
351 | |||||||||||||||
352 | /// Test if this operation can be floating-point contracted (FMA). | ||||||||||||||
353 | bool hasAllowContract() const { | ||||||||||||||
354 | return (SubclassOptionalData & FastMathFlags::AllowContract) != 0; | ||||||||||||||
355 | } | ||||||||||||||
356 | |||||||||||||||
357 | /// Test if this operation allows approximations of math library functions or | ||||||||||||||
358 | /// intrinsics. | ||||||||||||||
359 | bool hasApproxFunc() const { | ||||||||||||||
360 | return (SubclassOptionalData & FastMathFlags::ApproxFunc) != 0; | ||||||||||||||
361 | } | ||||||||||||||
362 | |||||||||||||||
363 | /// Convenience function for getting all the fast-math flags | ||||||||||||||
364 | FastMathFlags getFastMathFlags() const { | ||||||||||||||
365 | return FastMathFlags(SubclassOptionalData); | ||||||||||||||
366 | } | ||||||||||||||
367 | |||||||||||||||
368 | /// Get the maximum error permitted by this operation in ULPs. An accuracy of | ||||||||||||||
369 | /// 0.0 means that the operation should be performed with the default | ||||||||||||||
370 | /// precision. | ||||||||||||||
371 | float getFPAccuracy() const; | ||||||||||||||
372 | |||||||||||||||
373 | static bool classof(const Value *V) { | ||||||||||||||
374 | unsigned Opcode; | ||||||||||||||
375 | if (auto *I = dyn_cast<Instruction>(V)) | ||||||||||||||
376 | Opcode = I->getOpcode(); | ||||||||||||||
377 | else if (auto *CE = dyn_cast<ConstantExpr>(V)) | ||||||||||||||
378 | Opcode = CE->getOpcode(); | ||||||||||||||
379 | else | ||||||||||||||
380 | return false; | ||||||||||||||
381 | |||||||||||||||
382 | switch (Opcode) { | ||||||||||||||
383 | case Instruction::FNeg: | ||||||||||||||
384 | case Instruction::FAdd: | ||||||||||||||
385 | case Instruction::FSub: | ||||||||||||||
386 | case Instruction::FMul: | ||||||||||||||
387 | case Instruction::FDiv: | ||||||||||||||
388 | case Instruction::FRem: | ||||||||||||||
389 | // FIXME: To clean up and correct the semantics of fast-math-flags, FCmp | ||||||||||||||
390 | // should not be treated as a math op, but the other opcodes should. | ||||||||||||||
391 | // This would make things consistent with Select/PHI (FP value type | ||||||||||||||
392 | // determines whether they are math ops and, therefore, capable of | ||||||||||||||
393 | // having fast-math-flags). | ||||||||||||||
394 | case Instruction::FCmp: | ||||||||||||||
395 | return true; | ||||||||||||||
396 | case Instruction::PHI: | ||||||||||||||
397 | case Instruction::Select: | ||||||||||||||
398 | case Instruction::Call: { | ||||||||||||||
399 | Type *Ty = V->getType(); | ||||||||||||||
400 | while (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) | ||||||||||||||
401 | Ty = ArrTy->getElementType(); | ||||||||||||||
402 | return Ty->isFPOrFPVectorTy(); | ||||||||||||||
403 | } | ||||||||||||||
404 | default: | ||||||||||||||
405 | return false; | ||||||||||||||
406 | } | ||||||||||||||
407 | } | ||||||||||||||
408 | }; | ||||||||||||||
409 | |||||||||||||||
410 | /// A helper template for defining operators for individual opcodes. | ||||||||||||||
411 | template<typename SuperClass, unsigned Opc> | ||||||||||||||
412 | class ConcreteOperator : public SuperClass { | ||||||||||||||
413 | public: | ||||||||||||||
414 | static bool classof(const Instruction *I) { | ||||||||||||||
415 | return I->getOpcode() == Opc; | ||||||||||||||
416 | } | ||||||||||||||
417 | static bool classof(const ConstantExpr *CE) { | ||||||||||||||
418 | return CE->getOpcode() == Opc; | ||||||||||||||
419 | } | ||||||||||||||
420 | static bool classof(const Value *V) { | ||||||||||||||
421 | return (isa<Instruction>(V) && classof(cast<Instruction>(V))) || | ||||||||||||||
422 | (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V))); | ||||||||||||||
423 | } | ||||||||||||||
424 | }; | ||||||||||||||
425 | |||||||||||||||
426 | class AddOperator | ||||||||||||||
427 | : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Add> { | ||||||||||||||
428 | }; | ||||||||||||||
429 | class SubOperator | ||||||||||||||
430 | : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Sub> { | ||||||||||||||
431 | }; | ||||||||||||||
432 | class MulOperator | ||||||||||||||
433 | : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Mul> { | ||||||||||||||
434 | }; | ||||||||||||||
435 | class ShlOperator | ||||||||||||||
436 | : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Shl> { | ||||||||||||||
437 | }; | ||||||||||||||
438 | |||||||||||||||
439 | class SDivOperator | ||||||||||||||
440 | : public ConcreteOperator<PossiblyExactOperator, Instruction::SDiv> { | ||||||||||||||
441 | }; | ||||||||||||||
442 | class UDivOperator | ||||||||||||||
443 | : public ConcreteOperator<PossiblyExactOperator, Instruction::UDiv> { | ||||||||||||||
444 | }; | ||||||||||||||
445 | class AShrOperator | ||||||||||||||
446 | : public ConcreteOperator<PossiblyExactOperator, Instruction::AShr> { | ||||||||||||||
447 | }; | ||||||||||||||
448 | class LShrOperator | ||||||||||||||
449 | : public ConcreteOperator<PossiblyExactOperator, Instruction::LShr> { | ||||||||||||||
450 | }; | ||||||||||||||
451 | |||||||||||||||
452 | class ZExtOperator : public ConcreteOperator<Operator, Instruction::ZExt> {}; | ||||||||||||||
453 | |||||||||||||||
454 | class GEPOperator | ||||||||||||||
455 | : public ConcreteOperator<Operator, Instruction::GetElementPtr> { | ||||||||||||||
456 | friend class GetElementPtrInst; | ||||||||||||||
457 | friend class ConstantExpr; | ||||||||||||||
458 | |||||||||||||||
459 | enum { | ||||||||||||||
460 | IsInBounds = (1 << 0), | ||||||||||||||
461 | // InRangeIndex: bits 1-6 | ||||||||||||||
462 | }; | ||||||||||||||
463 | |||||||||||||||
464 | void setIsInBounds(bool B) { | ||||||||||||||
465 | SubclassOptionalData = | ||||||||||||||
466 | (SubclassOptionalData & ~IsInBounds) | (B * IsInBounds); | ||||||||||||||
467 | } | ||||||||||||||
468 | |||||||||||||||
469 | public: | ||||||||||||||
470 | /// Test whether this is an inbounds GEP, as defined by LangRef.html. | ||||||||||||||
471 | bool isInBounds() const { | ||||||||||||||
472 | return SubclassOptionalData & IsInBounds; | ||||||||||||||
473 | } | ||||||||||||||
474 | |||||||||||||||
475 | /// Returns the offset of the index with an inrange attachment, or None if | ||||||||||||||
476 | /// none. | ||||||||||||||
477 | Optional<unsigned> getInRangeIndex() const { | ||||||||||||||
478 | if (SubclassOptionalData >> 1 == 0) return None; | ||||||||||||||
479 | return (SubclassOptionalData >> 1) - 1; | ||||||||||||||
480 | } | ||||||||||||||
481 | |||||||||||||||
482 | inline op_iterator idx_begin() { return op_begin()+1; } | ||||||||||||||
483 | inline const_op_iterator idx_begin() const { return op_begin()+1; } | ||||||||||||||
484 | inline op_iterator idx_end() { return op_end(); } | ||||||||||||||
485 | inline const_op_iterator idx_end() const { return op_end(); } | ||||||||||||||
486 | |||||||||||||||
487 | Value *getPointerOperand() { | ||||||||||||||
488 | return getOperand(0); | ||||||||||||||
489 | } | ||||||||||||||
490 | const Value *getPointerOperand() const { | ||||||||||||||
491 | return getOperand(0); | ||||||||||||||
492 | } | ||||||||||||||
493 | static unsigned getPointerOperandIndex() { | ||||||||||||||
494 | return 0U; // get index for modifying correct operand | ||||||||||||||
495 | } | ||||||||||||||
496 | |||||||||||||||
497 | /// Method to return the pointer operand as a PointerType. | ||||||||||||||
498 | Type *getPointerOperandType() const { | ||||||||||||||
499 | return getPointerOperand()->getType(); | ||||||||||||||
500 | } | ||||||||||||||
501 | |||||||||||||||
502 | Type *getSourceElementType() const; | ||||||||||||||
503 | Type *getResultElementType() const; | ||||||||||||||
504 | |||||||||||||||
505 | /// Method to return the address space of the pointer operand. | ||||||||||||||
506 | unsigned getPointerAddressSpace() const { | ||||||||||||||
507 | return getPointerOperandType()->getPointerAddressSpace(); | ||||||||||||||
508 | } | ||||||||||||||
509 | |||||||||||||||
510 | unsigned getNumIndices() const { // Note: always non-negative | ||||||||||||||
511 | return getNumOperands() - 1; | ||||||||||||||
512 | } | ||||||||||||||
513 | |||||||||||||||
514 | bool hasIndices() const { | ||||||||||||||
515 | return getNumOperands() > 1; | ||||||||||||||
516 | } | ||||||||||||||
517 | |||||||||||||||
518 | /// Return true if all of the indices of this GEP are zeros. | ||||||||||||||
519 | /// If so, the result pointer and the first operand have the same | ||||||||||||||
520 | /// value, just potentially different types. | ||||||||||||||
521 | bool hasAllZeroIndices() const { | ||||||||||||||
522 | for (const_op_iterator I = idx_begin(), E = idx_end(); I != E; ++I) { | ||||||||||||||
523 | if (ConstantInt *C = dyn_cast<ConstantInt>(I)) | ||||||||||||||
524 | if (C->isZero()) | ||||||||||||||
525 | continue; | ||||||||||||||
526 | return false; | ||||||||||||||
527 | } | ||||||||||||||
528 | return true; | ||||||||||||||
529 | } | ||||||||||||||
530 | |||||||||||||||
531 | /// Return true if all of the indices of this GEP are constant integers. | ||||||||||||||
532 | /// If so, the result pointer and the first operand have | ||||||||||||||
533 | /// a constant offset between them. | ||||||||||||||
534 | bool hasAllConstantIndices() const { | ||||||||||||||
535 | for (const_op_iterator I = idx_begin(), E = idx_end(); I != E; ++I) { | ||||||||||||||
536 | if (!isa<ConstantInt>(I)) | ||||||||||||||
537 | return false; | ||||||||||||||
538 | } | ||||||||||||||
539 | return true; | ||||||||||||||
540 | } | ||||||||||||||
541 | |||||||||||||||
542 | unsigned countNonConstantIndices() const { | ||||||||||||||
543 | return count_if(make_range(idx_begin(), idx_end()), [](const Use& use) { | ||||||||||||||
544 | return !isa<ConstantInt>(*use); | ||||||||||||||
545 | }); | ||||||||||||||
546 | } | ||||||||||||||
547 | |||||||||||||||
548 | /// Accumulate the constant address offset of this GEP if possible. | ||||||||||||||
549 | /// | ||||||||||||||
550 | /// This routine accepts an APInt into which it will accumulate the constant | ||||||||||||||
551 | /// offset of this GEP if the GEP is in fact constant. If the GEP is not | ||||||||||||||
552 | /// all-constant, it returns false and the value of the offset APInt is | ||||||||||||||
553 | /// undefined (it is *not* preserved!). The APInt passed into this routine | ||||||||||||||
554 | /// must be at exactly as wide as the IntPtr type for the address space of the | ||||||||||||||
555 | /// base GEP pointer. | ||||||||||||||
556 | bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset) const; | ||||||||||||||
557 | }; | ||||||||||||||
558 | |||||||||||||||
559 | class PtrToIntOperator | ||||||||||||||
560 | : public ConcreteOperator<Operator, Instruction::PtrToInt> { | ||||||||||||||
561 | friend class PtrToInt; | ||||||||||||||
562 | friend class ConstantExpr; | ||||||||||||||
563 | |||||||||||||||
564 | public: | ||||||||||||||
565 | Value *getPointerOperand() { | ||||||||||||||
566 | return getOperand(0); | ||||||||||||||
567 | } | ||||||||||||||
568 | const Value *getPointerOperand() const { | ||||||||||||||
569 | return getOperand(0); | ||||||||||||||
570 | } | ||||||||||||||
571 | |||||||||||||||
572 | static unsigned getPointerOperandIndex() { | ||||||||||||||
573 | return 0U; // get index for modifying correct operand | ||||||||||||||
574 | } | ||||||||||||||
575 | |||||||||||||||
576 | /// Method to return the pointer operand as a PointerType. | ||||||||||||||
577 | Type *getPointerOperandType() const { | ||||||||||||||
578 | return getPointerOperand()->getType(); | ||||||||||||||
579 | } | ||||||||||||||
580 | |||||||||||||||
581 | /// Method to return the address space of the pointer operand. | ||||||||||||||
582 | unsigned getPointerAddressSpace() const { | ||||||||||||||
583 | return cast<PointerType>(getPointerOperandType())->getAddressSpace(); | ||||||||||||||
584 | } | ||||||||||||||
585 | }; | ||||||||||||||
586 | |||||||||||||||
587 | class BitCastOperator | ||||||||||||||
588 | : public ConcreteOperator<Operator, Instruction::BitCast> { | ||||||||||||||
589 | friend class BitCastInst; | ||||||||||||||
590 | friend class ConstantExpr; | ||||||||||||||
591 | |||||||||||||||
592 | public: | ||||||||||||||
593 | Type *getSrcTy() const { | ||||||||||||||
594 | return getOperand(0)->getType(); | ||||||||||||||
595 | } | ||||||||||||||
596 | |||||||||||||||
597 | Type *getDestTy() const { | ||||||||||||||
598 | return getType(); | ||||||||||||||
599 | } | ||||||||||||||
600 | }; | ||||||||||||||
601 | |||||||||||||||
602 | } // end namespace llvm | ||||||||||||||
603 | |||||||||||||||
604 | #endif // LLVM_IR_OPERATOR_H |
1 | //===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// This file provides a helper that implements much of the TTI interface in |
11 | /// terms of the target-independent code generator and TargetLowering |
12 | /// interfaces. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #ifndef LLVM_CODEGEN_BASICTTIIMPL_H |
17 | #define LLVM_CODEGEN_BASICTTIIMPL_H |
18 | |
19 | #include "llvm/ADT/APInt.h" |
20 | #include "llvm/ADT/ArrayRef.h" |
21 | #include "llvm/ADT/BitVector.h" |
22 | #include "llvm/ADT/SmallPtrSet.h" |
23 | #include "llvm/ADT/SmallVector.h" |
24 | #include "llvm/Analysis/LoopInfo.h" |
25 | #include "llvm/Analysis/TargetTransformInfo.h" |
26 | #include "llvm/Analysis/TargetTransformInfoImpl.h" |
27 | #include "llvm/CodeGen/ISDOpcodes.h" |
28 | #include "llvm/CodeGen/TargetLowering.h" |
29 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
30 | #include "llvm/CodeGen/ValueTypes.h" |
31 | #include "llvm/IR/BasicBlock.h" |
32 | #include "llvm/IR/CallSite.h" |
33 | #include "llvm/IR/Constant.h" |
34 | #include "llvm/IR/Constants.h" |
35 | #include "llvm/IR/DataLayout.h" |
36 | #include "llvm/IR/DerivedTypes.h" |
37 | #include "llvm/IR/InstrTypes.h" |
38 | #include "llvm/IR/Instruction.h" |
39 | #include "llvm/IR/Instructions.h" |
40 | #include "llvm/IR/Intrinsics.h" |
41 | #include "llvm/IR/Operator.h" |
42 | #include "llvm/IR/Type.h" |
43 | #include "llvm/IR/Value.h" |
44 | #include "llvm/MC/MCSchedule.h" |
45 | #include "llvm/Support/Casting.h" |
46 | #include "llvm/Support/CommandLine.h" |
47 | #include "llvm/Support/ErrorHandling.h" |
48 | #include "llvm/Support/MachineValueType.h" |
49 | #include "llvm/Support/MathExtras.h" |
50 | #include <algorithm> |
51 | #include <cassert> |
52 | #include <cstdint> |
53 | #include <limits> |
54 | #include <utility> |
55 | |
56 | namespace llvm { |
57 | |
58 | class Function; |
59 | class GlobalValue; |
60 | class LLVMContext; |
61 | class ScalarEvolution; |
62 | class SCEV; |
63 | class TargetMachine; |
64 | |
65 | extern cl::opt<unsigned> PartialUnrollingThreshold; |
66 | |
67 | /// Base class which can be used to help build a TTI implementation. |
68 | /// |
69 | /// This class provides as much implementation of the TTI interface as is |
70 | /// possible using the target independent parts of the code generator. |
71 | /// |
72 | /// In order to subclass it, your class must implement a getST() method to |
73 | /// return the subtarget, and a getTLI() method to return the target lowering. |
74 | /// We need these methods implemented in the derived class so that this class |
75 | /// doesn't have to duplicate storage for them. |
76 | template <typename T> |
77 | class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> { |
78 | private: |
79 | using BaseT = TargetTransformInfoImplCRTPBase<T>; |
80 | using TTI = TargetTransformInfo; |
81 | |
82 | /// Estimate a cost of Broadcast as an extract and sequence of insert |
83 | /// operations. |
84 | unsigned getBroadcastShuffleOverhead(Type *Ty) { |
85 | assert(Ty->isVectorTy() && "Can only shuffle vectors")((Ty->isVectorTy() && "Can only shuffle vectors") ? static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only shuffle vectors\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 85, __PRETTY_FUNCTION__)); |
86 | unsigned Cost = 0; |
87 | // Broadcast cost is equal to the cost of extracting the zero'th element |
88 | // plus the cost of inserting it into every element of the result vector. |
89 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
90 | Instruction::ExtractElement, Ty, 0); |
91 | |
92 | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { |
93 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
94 | Instruction::InsertElement, Ty, i); |
95 | } |
96 | return Cost; |
97 | } |
98 | |
99 | /// Estimate a cost of shuffle as a sequence of extract and insert |
100 | /// operations. |
101 | unsigned getPermuteShuffleOverhead(Type *Ty) { |
102 | assert(Ty->isVectorTy() && "Can only shuffle vectors")((Ty->isVectorTy() && "Can only shuffle vectors") ? static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only shuffle vectors\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 102, __PRETTY_FUNCTION__)); |
103 | unsigned Cost = 0; |
104 | // Shuffle cost is equal to the cost of extracting element from its argument |
105 | // plus the cost of inserting them onto the result vector. |
106 | |
107 | // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from |
108 | // index 0 of first vector, index 1 of second vector,index 2 of first |
109 | // vector and finally index 3 of second vector and insert them at index |
110 | // <0,1,2,3> of result vector. |
111 | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { |
112 | Cost += static_cast<T *>(this) |
113 | ->getVectorInstrCost(Instruction::InsertElement, Ty, i); |
114 | Cost += static_cast<T *>(this) |
115 | ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); |
116 | } |
117 | return Cost; |
118 | } |
119 | |
120 | /// Estimate a cost of subvector extraction as a sequence of extract and |
121 | /// insert operations. |
122 | unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) { |
123 | assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&((Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && "Can only extract subvectors from vectors" ) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only extract subvectors from vectors\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 124, __PRETTY_FUNCTION__)) |
124 | "Can only extract subvectors from vectors")((Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && "Can only extract subvectors from vectors" ) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only extract subvectors from vectors\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 124, __PRETTY_FUNCTION__)); |
125 | int NumSubElts = SubTy->getVectorNumElements(); |
126 | assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&(((Index + NumSubElts) <= (int)Ty->getVectorNumElements () && "SK_ExtractSubvector index out of range") ? static_cast <void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_ExtractSubvector index out of range\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 127, __PRETTY_FUNCTION__)) |
127 | "SK_ExtractSubvector index out of range")(((Index + NumSubElts) <= (int)Ty->getVectorNumElements () && "SK_ExtractSubvector index out of range") ? static_cast <void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_ExtractSubvector index out of range\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 127, __PRETTY_FUNCTION__)); |
128 | |
129 | unsigned Cost = 0; |
130 | // Subvector extraction cost is equal to the cost of extracting element from |
131 | // the source type plus the cost of inserting them into the result vector |
132 | // type. |
133 | for (int i = 0; i != NumSubElts; ++i) { |
134 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
135 | Instruction::ExtractElement, Ty, i + Index); |
136 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
137 | Instruction::InsertElement, SubTy, i); |
138 | } |
139 | return Cost; |
140 | } |
141 | |
142 | /// Estimate a cost of subvector insertion as a sequence of extract and |
143 | /// insert operations. |
144 | unsigned getInsertSubvectorOverhead(Type *Ty, int Index, Type *SubTy) { |
145 | assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&((Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && "Can only insert subvectors into vectors" ) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only insert subvectors into vectors\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 146, __PRETTY_FUNCTION__)) |
146 | "Can only insert subvectors into vectors")((Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && "Can only insert subvectors into vectors" ) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only insert subvectors into vectors\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 146, __PRETTY_FUNCTION__)); |
147 | int NumSubElts = SubTy->getVectorNumElements(); |
148 | assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&(((Index + NumSubElts) <= (int)Ty->getVectorNumElements () && "SK_InsertSubvector index out of range") ? static_cast <void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_InsertSubvector index out of range\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 149, __PRETTY_FUNCTION__)) |
149 | "SK_InsertSubvector index out of range")(((Index + NumSubElts) <= (int)Ty->getVectorNumElements () && "SK_InsertSubvector index out of range") ? static_cast <void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_InsertSubvector index out of range\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 149, __PRETTY_FUNCTION__)); |
150 | |
151 | unsigned Cost = 0; |
152 | // Subvector insertion cost is equal to the cost of extracting element from |
153 | // the source type plus the cost of inserting them into the result vector |
154 | // type. |
155 | for (int i = 0; i != NumSubElts; ++i) { |
156 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
157 | Instruction::ExtractElement, SubTy, i); |
158 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
159 | Instruction::InsertElement, Ty, i + Index); |
160 | } |
161 | return Cost; |
162 | } |
163 | |
164 | /// Local query method delegates up to T which *must* implement this! |
165 | const TargetSubtargetInfo *getST() const { |
166 | return static_cast<const T *>(this)->getST(); |
167 | } |
168 | |
169 | /// Local query method delegates up to T which *must* implement this! |
170 | const TargetLoweringBase *getTLI() const { |
171 | return static_cast<const T *>(this)->getTLI(); |
172 | } |
173 | |
174 | static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) { |
175 | switch (M) { |
176 | case TTI::MIM_Unindexed: |
177 | return ISD::UNINDEXED; |
178 | case TTI::MIM_PreInc: |
179 | return ISD::PRE_INC; |
180 | case TTI::MIM_PreDec: |
181 | return ISD::PRE_DEC; |
182 | case TTI::MIM_PostInc: |
183 | return ISD::POST_INC; |
184 | case TTI::MIM_PostDec: |
185 | return ISD::POST_DEC; |
186 | } |
187 | llvm_unreachable("Unexpected MemIndexedMode")::llvm::llvm_unreachable_internal("Unexpected MemIndexedMode" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 187); |
188 | } |
189 | |
190 | protected: |
191 | explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL) |
192 | : BaseT(DL) {} |
193 | virtual ~BasicTTIImplBase() = default; |
194 | |
195 | using TargetTransformInfoImplBase::DL; |
196 | |
197 | public: |
198 | /// \name Scalar TTI Implementations |
199 | /// @{ |
200 | bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, |
201 | unsigned AddressSpace, unsigned Alignment, |
202 | bool *Fast) const { |
203 | EVT E = EVT::getIntegerVT(Context, BitWidth); |
204 | return getTLI()->allowsMisalignedMemoryAccesses( |
205 | E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast); |
206 | } |
207 | |
208 | bool hasBranchDivergence() { return false; } |
209 | |
210 | bool useGPUDivergenceAnalysis() { return false; } |
211 | |
212 | bool isSourceOfDivergence(const Value *V) { return false; } |
213 | |
214 | bool isAlwaysUniform(const Value *V) { return false; } |
215 | |
216 | unsigned getFlatAddressSpace() { |
217 | // Return an invalid address space. |
218 | return -1; |
219 | } |
220 | |
221 | bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, |
222 | Intrinsic::ID IID) const { |
223 | return false; |
224 | } |
225 | |
226 | bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, |
227 | Value *OldV, Value *NewV) const { |
228 | return false; |
229 | } |
230 | |
231 | bool isLegalAddImmediate(int64_t imm) { |
232 | return getTLI()->isLegalAddImmediate(imm); |
233 | } |
234 | |
235 | bool isLegalICmpImmediate(int64_t imm) { |
236 | return getTLI()->isLegalICmpImmediate(imm); |
237 | } |
238 | |
239 | bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, |
240 | bool HasBaseReg, int64_t Scale, |
241 | unsigned AddrSpace, Instruction *I = nullptr) { |
242 | TargetLoweringBase::AddrMode AM; |
243 | AM.BaseGV = BaseGV; |
244 | AM.BaseOffs = BaseOffset; |
245 | AM.HasBaseReg = HasBaseReg; |
246 | AM.Scale = Scale; |
247 | return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I); |
248 | } |
249 | |
250 | bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty, |
251 | const DataLayout &DL) const { |
252 | EVT VT = getTLI()->getValueType(DL, Ty); |
253 | return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT); |
254 | } |
255 | |
256 | bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty, |
257 | const DataLayout &DL) const { |
258 | EVT VT = getTLI()->getValueType(DL, Ty); |
259 | return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT); |
260 | } |
261 | |
262 | bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) { |
263 | return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); |
264 | } |
265 | |
266 | int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, |
267 | bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { |
268 | TargetLoweringBase::AddrMode AM; |
269 | AM.BaseGV = BaseGV; |
270 | AM.BaseOffs = BaseOffset; |
271 | AM.HasBaseReg = HasBaseReg; |
272 | AM.Scale = Scale; |
273 | return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace); |
274 | } |
275 | |
276 | bool isTruncateFree(Type *Ty1, Type *Ty2) { |
277 | return getTLI()->isTruncateFree(Ty1, Ty2); |
278 | } |
279 | |
280 | bool isProfitableToHoist(Instruction *I) { |
281 | return getTLI()->isProfitableToHoist(I); |
282 | } |
283 | |
284 | bool useAA() const { return getST()->useAA(); } |
285 | |
286 | bool isTypeLegal(Type *Ty) { |
287 | EVT VT = getTLI()->getValueType(DL, Ty); |
288 | return getTLI()->isTypeLegal(VT); |
289 | } |
290 | |
291 | int getGEPCost(Type *PointeeType, const Value *Ptr, |
292 | ArrayRef<const Value *> Operands) { |
293 | return BaseT::getGEPCost(PointeeType, Ptr, Operands); |
294 | } |
295 | |
296 | int getExtCost(const Instruction *I, const Value *Src) { |
297 | if (getTLI()->isExtFree(I)) |
298 | return TargetTransformInfo::TCC_Free; |
299 | |
300 | if (isa<ZExtInst>(I) || isa<SExtInst>(I)) |
301 | if (const LoadInst *LI = dyn_cast<LoadInst>(Src)) |
302 | if (getTLI()->isExtLoad(LI, I, DL)) |
303 | return TargetTransformInfo::TCC_Free; |
304 | |
305 | return TargetTransformInfo::TCC_Basic; |
306 | } |
307 | |
308 | unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, |
309 | ArrayRef<const Value *> Arguments, const User *U) { |
310 | return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U); |
311 | } |
312 | |
313 | unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, |
314 | ArrayRef<Type *> ParamTys, const User *U) { |
315 | if (IID == Intrinsic::cttz) { |
316 | if (getTLI()->isCheapToSpeculateCttz()) |
317 | return TargetTransformInfo::TCC_Basic; |
318 | return TargetTransformInfo::TCC_Expensive; |
319 | } |
320 | |
321 | if (IID == Intrinsic::ctlz) { |
322 | if (getTLI()->isCheapToSpeculateCtlz()) |
323 | return TargetTransformInfo::TCC_Basic; |
324 | return TargetTransformInfo::TCC_Expensive; |
325 | } |
326 | |
327 | return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U); |
328 | } |
329 | |
330 | unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, |
331 | unsigned &JumpTableSize, |
332 | ProfileSummaryInfo *PSI, |
333 | BlockFrequencyInfo *BFI) { |
334 | /// Try to find the estimated number of clusters. Note that the number of |
335 | /// clusters identified in this function could be different from the actual |
336 | /// numbers found in lowering. This function ignore switches that are |
337 | /// lowered with a mix of jump table / bit test / BTree. This function was |
338 | /// initially intended to be used when estimating the cost of switch in |
339 | /// inline cost heuristic, but it's a generic cost model to be used in other |
340 | /// places (e.g., in loop unrolling). |
341 | unsigned N = SI.getNumCases(); |
342 | const TargetLoweringBase *TLI = getTLI(); |
343 | const DataLayout &DL = this->getDataLayout(); |
344 | |
345 | JumpTableSize = 0; |
346 | bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent()); |
347 | |
348 | // Early exit if both a jump table and bit test are not allowed. |
349 | if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N)) |
350 | return N; |
351 | |
352 | APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue(); |
353 | APInt MinCaseVal = MaxCaseVal; |
354 | for (auto CI : SI.cases()) { |
355 | const APInt &CaseVal = CI.getCaseValue()->getValue(); |
356 | if (CaseVal.sgt(MaxCaseVal)) |
357 | MaxCaseVal = CaseVal; |
358 | if (CaseVal.slt(MinCaseVal)) |
359 | MinCaseVal = CaseVal; |
360 | } |
361 | |
362 | // Check if suitable for a bit test |
363 | if (N <= DL.getIndexSizeInBits(0u)) { |
364 | SmallPtrSet<const BasicBlock *, 4> Dests; |
365 | for (auto I : SI.cases()) |
366 | Dests.insert(I.getCaseSuccessor()); |
367 | |
368 | if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal, |
369 | DL)) |
370 | return 1; |
371 | } |
372 | |
373 | // Check if suitable for a jump table. |
374 | if (IsJTAllowed) { |
375 | if (N < 2 || N < TLI->getMinimumJumpTableEntries()) |
376 | return N; |
377 | uint64_t Range = |
378 | (MaxCaseVal - MinCaseVal) |
379 | .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1; |
380 | // Check whether a range of clusters is dense enough for a jump table |
381 | if (TLI->isSuitableForJumpTable(&SI, N, Range, PSI, BFI)) { |
382 | JumpTableSize = Range; |
383 | return 1; |
384 | } |
385 | } |
386 | return N; |
387 | } |
388 | |
389 | bool shouldBuildLookupTables() { |
390 | const TargetLoweringBase *TLI = getTLI(); |
391 | return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || |
392 | TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other); |
393 | } |
394 | |
395 | bool haveFastSqrt(Type *Ty) { |
396 | const TargetLoweringBase *TLI = getTLI(); |
397 | EVT VT = TLI->getValueType(DL, Ty); |
398 | return TLI->isTypeLegal(VT) && |
399 | TLI->isOperationLegalOrCustom(ISD::FSQRT, VT); |
400 | } |
401 | |
402 | bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { |
403 | return true; |
404 | } |
405 | |
406 | unsigned getFPOpCost(Type *Ty) { |
407 | // Check whether FADD is available, as a proxy for floating-point in |
408 | // general. |
409 | const TargetLoweringBase *TLI = getTLI(); |
410 | EVT VT = TLI->getValueType(DL, Ty); |
411 | if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT)) |
412 | return TargetTransformInfo::TCC_Basic; |
413 | return TargetTransformInfo::TCC_Expensive; |
414 | } |
415 | |
416 | unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) { |
417 | const TargetLoweringBase *TLI = getTLI(); |
418 | switch (Opcode) { |
419 | default: break; |
420 | case Instruction::Trunc: |
421 | if (TLI->isTruncateFree(OpTy, Ty)) |
422 | return TargetTransformInfo::TCC_Free; |
423 | return TargetTransformInfo::TCC_Basic; |
424 | case Instruction::ZExt: |
425 | if (TLI->isZExtFree(OpTy, Ty)) |
426 | return TargetTransformInfo::TCC_Free; |
427 | return TargetTransformInfo::TCC_Basic; |
428 | |
429 | case Instruction::AddrSpaceCast: |
430 | if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(), |
431 | Ty->getPointerAddressSpace())) |
432 | return TargetTransformInfo::TCC_Free; |
433 | return TargetTransformInfo::TCC_Basic; |
434 | } |
435 | |
436 | return BaseT::getOperationCost(Opcode, Ty, OpTy); |
437 | } |
438 | |
439 | unsigned getInliningThresholdMultiplier() { return 1; } |
440 | |
441 | int getInlinerVectorBonusPercent() { return 150; } |
442 | |
443 | void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, |
444 | TTI::UnrollingPreferences &UP) { |
445 | // This unrolling functionality is target independent, but to provide some |
446 | // motivation for its intended use, for x86: |
447 | |
448 | // According to the Intel 64 and IA-32 Architectures Optimization Reference |
449 | // Manual, Intel Core models and later have a loop stream detector (and |
450 | // associated uop queue) that can benefit from partial unrolling. |
451 | // The relevant requirements are: |
452 | // - The loop must have no more than 4 (8 for Nehalem and later) branches |
453 | // taken, and none of them may be calls. |
454 | // - The loop can have no more than 18 (28 for Nehalem and later) uops. |
455 | |
456 | // According to the Software Optimization Guide for AMD Family 15h |
457 | // Processors, models 30h-4fh (Steamroller and later) have a loop predictor |
458 | // and loop buffer which can benefit from partial unrolling. |
459 | // The relevant requirements are: |
460 | // - The loop must have fewer than 16 branches |
461 | // - The loop must have less than 40 uops in all executed loop branches |
462 | |
463 | // The number of taken branches in a loop is hard to estimate here, and |
464 | // benchmarking has revealed that it is better not to be conservative when |
465 | // estimating the branch count. As a result, we'll ignore the branch limits |
466 | // until someone finds a case where it matters in practice. |
467 | |
468 | unsigned MaxOps; |
469 | const TargetSubtargetInfo *ST = getST(); |
470 | if (PartialUnrollingThreshold.getNumOccurrences() > 0) |
471 | MaxOps = PartialUnrollingThreshold; |
472 | else if (ST->getSchedModel().LoopMicroOpBufferSize > 0) |
473 | MaxOps = ST->getSchedModel().LoopMicroOpBufferSize; |
474 | else |
475 | return; |
476 | |
477 | // Scan the loop: don't unroll loops with calls. |
478 | for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; |
479 | ++I) { |
480 | BasicBlock *BB = *I; |
481 | |
482 | for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J) |
483 | if (isa<CallInst>(J) || isa<InvokeInst>(J)) { |
484 | ImmutableCallSite CS(&*J); |
485 | if (const Function *F = CS.getCalledFunction()) { |
486 | if (!static_cast<T *>(this)->isLoweredToCall(F)) |
487 | continue; |
488 | } |
489 | |
490 | return; |
491 | } |
492 | } |
493 | |
494 | // Enable runtime and partial unrolling up to the specified size. |
495 | // Enable using trip count upper bound to unroll loops. |
496 | UP.Partial = UP.Runtime = UP.UpperBound = true; |
497 | UP.PartialThreshold = MaxOps; |
498 | |
499 | // Avoid unrolling when optimizing for size. |
500 | UP.OptSizeThreshold = 0; |
501 | UP.PartialOptSizeThreshold = 0; |
502 | |
503 | // Set number of instructions optimized when "back edge" |
504 | // becomes "fall through" to default value of 2. |
505 | UP.BEInsns = 2; |
506 | } |
507 | |
508 | bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, |
509 | AssumptionCache &AC, |
510 | TargetLibraryInfo *LibInfo, |
511 | HardwareLoopInfo &HWLoopInfo) { |
512 | return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo); |
513 | } |
514 | |
515 | bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, |
516 | AssumptionCache &AC, TargetLibraryInfo *TLI, |
517 | DominatorTree *DT, |
518 | const LoopAccessInfo *LAI) { |
519 | return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI); |
520 | } |
521 | |
522 | int getInstructionLatency(const Instruction *I) { |
523 | if (isa<LoadInst>(I)) |
524 | return getST()->getSchedModel().DefaultLoadLatency; |
525 | |
526 | return BaseT::getInstructionLatency(I); |
527 | } |
528 | |
529 | virtual Optional<unsigned> |
530 | getCacheSize(TargetTransformInfo::CacheLevel Level) const { |
531 | return Optional<unsigned>( |
532 | getST()->getCacheSize(static_cast<unsigned>(Level))); |
533 | } |
534 | |
535 | virtual Optional<unsigned> |
536 | getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const { |
537 | Optional<unsigned> TargetResult = |
538 | getST()->getCacheAssociativity(static_cast<unsigned>(Level)); |
539 | |
540 | if (TargetResult) |
541 | return TargetResult; |
542 | |
543 | return BaseT::getCacheAssociativity(Level); |
544 | } |
545 | |
546 | virtual unsigned getCacheLineSize() const { |
547 | return getST()->getCacheLineSize(); |
548 | } |
549 | |
550 | virtual unsigned getPrefetchDistance() const { |
551 | return getST()->getPrefetchDistance(); |
552 | } |
553 | |
554 | virtual unsigned getMinPrefetchStride() const { |
555 | return getST()->getMinPrefetchStride(); |
556 | } |
557 | |
558 | virtual unsigned getMaxPrefetchIterationsAhead() const { |
559 | return getST()->getMaxPrefetchIterationsAhead(); |
560 | } |
561 | |
562 | /// @} |
563 | |
564 | /// \name Vector TTI Implementations |
565 | /// @{ |
566 | |
567 | unsigned getRegisterBitWidth(bool Vector) const { return 32; } |
568 | |
569 | /// Estimate the overhead of scalarizing an instruction. Insert and Extract |
570 | /// are set if the result needs to be inserted and/or extracted from vectors. |
571 | unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { |
572 | assert(Ty->isVectorTy() && "Can only scalarize vectors")((Ty->isVectorTy() && "Can only scalarize vectors" ) ? static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only scalarize vectors\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 572, __PRETTY_FUNCTION__)); |
573 | unsigned Cost = 0; |
574 | |
575 | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { |
576 | if (Insert) |
577 | Cost += static_cast<T *>(this) |
578 | ->getVectorInstrCost(Instruction::InsertElement, Ty, i); |
579 | if (Extract) |
580 | Cost += static_cast<T *>(this) |
581 | ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); |
582 | } |
583 | |
584 | return Cost; |
585 | } |
586 | |
587 | /// Estimate the overhead of scalarizing an instructions unique |
588 | /// non-constant operands. The types of the arguments are ordinarily |
589 | /// scalar, in which case the costs are multiplied with VF. |
590 | unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, |
591 | unsigned VF) { |
592 | unsigned Cost = 0; |
593 | SmallPtrSet<const Value*, 4> UniqueOperands; |
594 | for (const Value *A : Args) { |
595 | if (!isa<Constant>(A) && UniqueOperands.insert(A).second) { |
596 | Type *VecTy = nullptr; |
597 | if (A->getType()->isVectorTy()) { |
598 | VecTy = A->getType(); |
599 | // If A is a vector operand, VF should be 1 or correspond to A. |
600 | assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&(((VF == 1 || VF == VecTy->getVectorNumElements()) && "Vector argument does not match VF") ? static_cast<void> (0) : __assert_fail ("(VF == 1 || VF == VecTy->getVectorNumElements()) && \"Vector argument does not match VF\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 601, __PRETTY_FUNCTION__)) |
601 | "Vector argument does not match VF")(((VF == 1 || VF == VecTy->getVectorNumElements()) && "Vector argument does not match VF") ? static_cast<void> (0) : __assert_fail ("(VF == 1 || VF == VecTy->getVectorNumElements()) && \"Vector argument does not match VF\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 601, __PRETTY_FUNCTION__)); |
602 | } |
603 | else |
604 | VecTy = VectorType::get(A->getType(), VF); |
605 | |
606 | Cost += getScalarizationOverhead(VecTy, false, true); |
607 | } |
608 | } |
609 | |
610 | return Cost; |
611 | } |
612 | |
613 | unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) { |
614 | assert(VecTy->isVectorTy())((VecTy->isVectorTy()) ? static_cast<void> (0) : __assert_fail ("VecTy->isVectorTy()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 614, __PRETTY_FUNCTION__)); |
615 | |
616 | unsigned Cost = 0; |
617 | |
618 | Cost += getScalarizationOverhead(VecTy, true, false); |
619 | if (!Args.empty()) |
620 | Cost += getOperandsScalarizationOverhead(Args, |
621 | VecTy->getVectorNumElements()); |
622 | else |
623 | // When no information on arguments is provided, we add the cost |
624 | // associated with one argument as a heuristic. |
625 | Cost += getScalarizationOverhead(VecTy, false, true); |
626 | |
627 | return Cost; |
628 | } |
629 | |
630 | unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } |
631 | |
632 | unsigned getArithmeticInstrCost( |
633 | unsigned Opcode, Type *Ty, |
634 | TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, |
635 | TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, |
636 | TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, |
637 | TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, |
638 | ArrayRef<const Value *> Args = ArrayRef<const Value *>(), |
639 | const Instruction *CxtI = nullptr) { |
640 | // Check if any of the operands are vector operands. |
641 | const TargetLoweringBase *TLI = getTLI(); |
642 | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
643 | assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> ( 0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 643, __PRETTY_FUNCTION__)); |
644 | |
645 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); |
646 | |
647 | bool IsFloat = Ty->isFPOrFPVectorTy(); |
648 | // Assume that floating point arithmetic operations cost twice as much as |
649 | // integer operations. |
650 | unsigned OpCost = (IsFloat ? 2 : 1); |
651 | |
652 | if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { |
653 | // The operation is legal. Assume it costs 1. |
654 | // TODO: Once we have extract/insert subvector cost we need to use them. |
655 | return LT.first * OpCost; |
656 | } |
657 | |
658 | if (!TLI->isOperationExpand(ISD, LT.second)) { |
659 | // If the operation is custom lowered, then assume that the code is twice |
660 | // as expensive. |
661 | return LT.first * 2 * OpCost; |
662 | } |
663 | |
664 | // Else, assume that we need to scalarize this op. |
665 | // TODO: If one of the types get legalized by splitting, handle this |
666 | // similarly to what getCastInstrCost() does. |
667 | if (Ty->isVectorTy()) { |
668 | unsigned Num = Ty->getVectorNumElements(); |
669 | unsigned Cost = static_cast<T *>(this) |
670 | ->getArithmeticInstrCost(Opcode, Ty->getScalarType()); |
671 | // Return the cost of multiple scalar invocation plus the cost of |
672 | // inserting and extracting the values. |
673 | return getScalarizationOverhead(Ty, Args) + Num * Cost; |
674 | } |
675 | |
676 | // We don't know anything about this scalar instruction. |
677 | return OpCost; |
678 | } |
679 | |
680 | unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, |
681 | Type *SubTp) { |
682 | switch (Kind) { |
683 | case TTI::SK_Broadcast: |
684 | return getBroadcastShuffleOverhead(Tp); |
685 | case TTI::SK_Select: |
686 | case TTI::SK_Reverse: |
687 | case TTI::SK_Transpose: |
688 | case TTI::SK_PermuteSingleSrc: |
689 | case TTI::SK_PermuteTwoSrc: |
690 | return getPermuteShuffleOverhead(Tp); |
691 | case TTI::SK_ExtractSubvector: |
692 | return getExtractSubvectorOverhead(Tp, Index, SubTp); |
693 | case TTI::SK_InsertSubvector: |
694 | return getInsertSubvectorOverhead(Tp, Index, SubTp); |
695 | } |
696 | llvm_unreachable("Unknown TTI::ShuffleKind")::llvm::llvm_unreachable_internal("Unknown TTI::ShuffleKind", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 696); |
697 | } |
698 | |
699 | unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, |
700 | const Instruction *I = nullptr) { |
701 | const TargetLoweringBase *TLI = getTLI(); |
702 | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
703 | assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> ( 0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 703, __PRETTY_FUNCTION__)); |
704 | std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src); |
705 | std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst); |
706 | |
707 | // Check for NOOP conversions. |
708 | if (SrcLT.first == DstLT.first && |
709 | SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { |
710 | |
711 | // Bitcast between types that are legalized to the same type are free. |
712 | if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc) |
713 | return 0; |
714 | } |
715 | |
716 | if (Opcode == Instruction::Trunc && |
717 | TLI->isTruncateFree(SrcLT.second, DstLT.second)) |
718 | return 0; |
719 | |
720 | if (Opcode == Instruction::ZExt && |
721 | TLI->isZExtFree(SrcLT.second, DstLT.second)) |
722 | return 0; |
723 | |
724 | if (Opcode == Instruction::AddrSpaceCast && |
725 | TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(), |
726 | Dst->getPointerAddressSpace())) |
727 | return 0; |
728 | |
729 | // If this is a zext/sext of a load, return 0 if the corresponding |
730 | // extending load exists on target. |
731 | if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && |
732 | I && isa<LoadInst>(I->getOperand(0))) { |
733 | EVT ExtVT = EVT::getEVT(Dst); |
734 | EVT LoadVT = EVT::getEVT(Src); |
735 | unsigned LType = |
736 | ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD); |
737 | if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT)) |
738 | return 0; |
739 | } |
740 | |
741 | // If the cast is marked as legal (or promote) then assume low cost. |
742 | if (SrcLT.first == DstLT.first && |
743 | TLI->isOperationLegalOrPromote(ISD, DstLT.second)) |
744 | return 1; |
745 | |
746 | // Handle scalar conversions. |
747 | if (!Src->isVectorTy() && !Dst->isVectorTy()) { |
748 | // Scalar bitcasts are usually free. |
749 | if (Opcode == Instruction::BitCast) |
750 | return 0; |
751 | |
752 | // Just check the op cost. If the operation is legal then assume it costs |
753 | // 1. |
754 | if (!TLI->isOperationExpand(ISD, DstLT.second)) |
755 | return 1; |
756 | |
757 | // Assume that illegal scalar instruction are expensive. |
758 | return 4; |
759 | } |
760 | |
761 | // Check vector-to-vector casts. |
762 | if (Dst->isVectorTy() && Src->isVectorTy()) { |
763 | // If the cast is between same-sized registers, then the check is simple. |
764 | if (SrcLT.first == DstLT.first && |
765 | SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { |
766 | |
767 | // Assume that Zext is done using AND. |
768 | if (Opcode == Instruction::ZExt) |
769 | return 1; |
770 | |
771 | // Assume that sext is done using SHL and SRA. |
772 | if (Opcode == Instruction::SExt) |
773 | return 2; |
774 | |
775 | // Just check the op cost. If the operation is legal then assume it |
776 | // costs |
777 | // 1 and multiply by the type-legalization overhead. |
778 | if (!TLI->isOperationExpand(ISD, DstLT.second)) |
779 | return SrcLT.first * 1; |
780 | } |
781 | |
782 | // If we are legalizing by splitting, query the concrete TTI for the cost |
783 | // of casting the original vector twice. We also need to factor in the |
784 | // cost of the split itself. Count that as 1, to be consistent with |
785 | // TLI->getTypeLegalizationCost(). |
786 | if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) == |
787 | TargetLowering::TypeSplitVector || |
788 | TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) == |
789 | TargetLowering::TypeSplitVector) && |
790 | Src->getVectorNumElements() > 1 && Dst->getVectorNumElements() > 1) { |
791 | Type *SplitDst = VectorType::get(Dst->getVectorElementType(), |
792 | Dst->getVectorNumElements() / 2); |
793 | Type *SplitSrc = VectorType::get(Src->getVectorElementType(), |
794 | Src->getVectorNumElements() / 2); |
795 | T *TTI = static_cast<T *>(this); |
796 | return TTI->getVectorSplitCost() + |
797 | (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I)); |
798 | } |
799 | |
800 | // In other cases where the source or destination are illegal, assume |
801 | // the operation will get scalarized. |
802 | unsigned Num = Dst->getVectorNumElements(); |
803 | unsigned Cost = static_cast<T *>(this)->getCastInstrCost( |
804 | Opcode, Dst->getScalarType(), Src->getScalarType(), I); |
805 | |
806 | // Return the cost of multiple scalar invocation plus the cost of |
807 | // inserting and extracting the values. |
808 | return getScalarizationOverhead(Dst, true, true) + Num * Cost; |
809 | } |
810 | |
811 | // We already handled vector-to-vector and scalar-to-scalar conversions. |
812 | // This |
813 | // is where we handle bitcast between vectors and scalars. We need to assume |
814 | // that the conversion is scalarized in one way or another. |
815 | if (Opcode == Instruction::BitCast) |
816 | // Illegal bitcasts are done by storing and loading from a stack slot. |
817 | return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true) |
818 | : 0) + |
819 | (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false) |
820 | : 0); |
821 | |
822 | llvm_unreachable("Unhandled cast")::llvm::llvm_unreachable_internal("Unhandled cast", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 822); |
823 | } |
824 | |
825 | unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, |
826 | VectorType *VecTy, unsigned Index) { |
827 | return static_cast<T *>(this)->getVectorInstrCost( |
828 | Instruction::ExtractElement, VecTy, Index) + |
829 | static_cast<T *>(this)->getCastInstrCost(Opcode, Dst, |
830 | VecTy->getElementType()); |
831 | } |
832 | |
833 | unsigned getCFInstrCost(unsigned Opcode) { |
834 | // Branches are assumed to be predicted. |
835 | return 0; |
836 | } |
837 | |
838 | unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, |
839 | const Instruction *I) { |
840 | const TargetLoweringBase *TLI = getTLI(); |
841 | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
842 | assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> ( 0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 842, __PRETTY_FUNCTION__)); |
843 | |
844 | // Selects on vectors are actually vector selects. |
845 | if (ISD == ISD::SELECT) { |
846 | assert(CondTy && "CondTy must exist")((CondTy && "CondTy must exist") ? static_cast<void > (0) : __assert_fail ("CondTy && \"CondTy must exist\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 846, __PRETTY_FUNCTION__)); |
847 | if (CondTy->isVectorTy()) |
848 | ISD = ISD::VSELECT; |
849 | } |
850 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); |
851 | |
852 | if (!(ValTy->isVectorTy() && !LT.second.isVector()) && |
853 | !TLI->isOperationExpand(ISD, LT.second)) { |
854 | // The operation is legal. Assume it costs 1. Multiply |
855 | // by the type-legalization overhead. |
856 | return LT.first * 1; |
857 | } |
858 | |
859 | // Otherwise, assume that the cast is scalarized. |
860 | // TODO: If one of the types get legalized by splitting, handle this |
861 | // similarly to what getCastInstrCost() does. |
862 | if (ValTy->isVectorTy()) { |
863 | unsigned Num = ValTy->getVectorNumElements(); |
864 | if (CondTy) |
865 | CondTy = CondTy->getScalarType(); |
866 | unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost( |
867 | Opcode, ValTy->getScalarType(), CondTy, I); |
868 | |
869 | // Return the cost of multiple scalar invocation plus the cost of |
870 | // inserting and extracting the values. |
871 | return getScalarizationOverhead(ValTy, true, false) + Num * Cost; |
872 | } |
873 | |
874 | // Unknown scalar opcode. |
875 | return 1; |
876 | } |
877 | |
878 | unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { |
879 | std::pair<unsigned, MVT> LT = |
880 | getTLI()->getTypeLegalizationCost(DL, Val->getScalarType()); |
881 | |
882 | return LT.first; |
883 | } |
884 | |
885 | unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, |
886 | unsigned AddressSpace, |
887 | const Instruction *I = nullptr) { |
888 | assert(!Src->isVoidTy() && "Invalid type")((!Src->isVoidTy() && "Invalid type") ? static_cast <void> (0) : __assert_fail ("!Src->isVoidTy() && \"Invalid type\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 888, __PRETTY_FUNCTION__)); |
889 | std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src); |
890 | |
891 | // Assuming that all loads of legal types cost 1. |
892 | unsigned Cost = LT.first; |
893 | |
894 | if (Src->isVectorTy() && |
895 | Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) { |
896 | // This is a vector load that legalizes to a larger type than the vector |
897 | // itself. Unless the corresponding extending load or truncating store is |
898 | // legal, then this will scalarize. |
899 | TargetLowering::LegalizeAction LA = TargetLowering::Expand; |
900 | EVT MemVT = getTLI()->getValueType(DL, Src); |
901 | if (Opcode == Instruction::Store) |
902 | LA = getTLI()->getTruncStoreAction(LT.second, MemVT); |
903 | else |
904 | LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT); |
905 | |
906 | if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) { |
907 | // This is a vector load/store for some illegal type that is scalarized. |
908 | // We must account for the cost of building or decomposing the vector. |
909 | Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store, |
910 | Opcode == Instruction::Store); |
911 | } |
912 | } |
913 | |
914 | return Cost; |
915 | } |
916 | |
917 | unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, |
918 | unsigned Factor, |
919 | ArrayRef<unsigned> Indices, |
920 | unsigned Alignment, unsigned AddressSpace, |
921 | bool UseMaskForCond = false, |
922 | bool UseMaskForGaps = false) { |
923 | VectorType *VT = dyn_cast<VectorType>(VecTy); |
924 | assert(VT && "Expect a vector type for interleaved memory op")((VT && "Expect a vector type for interleaved memory op" ) ? static_cast<void> (0) : __assert_fail ("VT && \"Expect a vector type for interleaved memory op\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 924, __PRETTY_FUNCTION__)); |
925 | |
926 | unsigned NumElts = VT->getNumElements(); |
927 | assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor")((Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor" ) ? static_cast<void> (0) : __assert_fail ("Factor > 1 && NumElts % Factor == 0 && \"Invalid interleave factor\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 927, __PRETTY_FUNCTION__)); |
928 | |
929 | unsigned NumSubElts = NumElts / Factor; |
930 | VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts); |
931 | |
932 | // Firstly, the cost of load/store operation. |
933 | unsigned Cost; |
934 | if (UseMaskForCond || UseMaskForGaps) |
935 | Cost = static_cast<T *>(this)->getMaskedMemoryOpCost( |
936 | Opcode, VecTy, Alignment, AddressSpace); |
937 | else |
938 | Cost = static_cast<T *>(this)->getMemoryOpCost( |
939 | Opcode, VecTy, MaybeAlign(Alignment), AddressSpace); |
940 | |
941 | // Legalize the vector type, and get the legalized and unlegalized type |
942 | // sizes. |
943 | MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second; |
944 | unsigned VecTySize = |
945 | static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy); |
946 | unsigned VecTyLTSize = VecTyLT.getStoreSize(); |
947 | |
948 | // Return the ceiling of dividing A by B. |
949 | auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; }; |
950 | |
951 | // Scale the cost of the memory operation by the fraction of legalized |
952 | // instructions that will actually be used. We shouldn't account for the |
953 | // cost of dead instructions since they will be removed. |
954 | // |
955 | // E.g., An interleaved load of factor 8: |
956 | // %vec = load <16 x i64>, <16 x i64>* %ptr |
957 | // %v0 = shufflevector %vec, undef, <0, 8> |
958 | // |
959 | // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be |
960 | // used (those corresponding to elements [0:1] and [8:9] of the unlegalized |
961 | // type). The other loads are unused. |
962 | // |
963 | // We only scale the cost of loads since interleaved store groups aren't |
964 | // allowed to have gaps. |
965 | if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) { |
966 | // The number of loads of a legal type it will take to represent a load |
967 | // of the unlegalized vector type. |
968 | unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize); |
969 | |
970 | // The number of elements of the unlegalized type that correspond to a |
971 | // single legal instruction. |
972 | unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts); |
973 | |
974 | // Determine which legal instructions will be used. |
975 | BitVector UsedInsts(NumLegalInsts, false); |
976 | for (unsigned Index : Indices) |
977 | for (unsigned Elt = 0; Elt < NumSubElts; ++Elt) |
978 | UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst); |
979 | |
980 | // Scale the cost of the load by the fraction of legal instructions that |
981 | // will be used. |
982 | Cost *= UsedInsts.count() / NumLegalInsts; |
983 | } |
984 | |
985 | // Then plus the cost of interleave operation. |
986 | if (Opcode == Instruction::Load) { |
987 | // The interleave cost is similar to extract sub vectors' elements |
988 | // from the wide vector, and insert them into sub vectors. |
989 | // |
990 | // E.g. An interleaved load of factor 2 (with one member of index 0): |
991 | // %vec = load <8 x i32>, <8 x i32>* %ptr |
992 | // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0 |
993 | // The cost is estimated as extract elements at 0, 2, 4, 6 from the |
994 | // <8 x i32> vector and insert them into a <4 x i32> vector. |
995 | |
996 | assert(Indices.size() <= Factor &&((Indices.size() <= Factor && "Interleaved memory op has too many members" ) ? static_cast<void> (0) : __assert_fail ("Indices.size() <= Factor && \"Interleaved memory op has too many members\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 997, __PRETTY_FUNCTION__)) |
997 | "Interleaved memory op has too many members")((Indices.size() <= Factor && "Interleaved memory op has too many members" ) ? static_cast<void> (0) : __assert_fail ("Indices.size() <= Factor && \"Interleaved memory op has too many members\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 997, __PRETTY_FUNCTION__)); |
998 | |
999 | for (unsigned Index : Indices) { |
1000 | assert(Index < Factor && "Invalid index for interleaved memory op")((Index < Factor && "Invalid index for interleaved memory op" ) ? static_cast<void> (0) : __assert_fail ("Index < Factor && \"Invalid index for interleaved memory op\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1000, __PRETTY_FUNCTION__)); |
1001 | |
1002 | // Extract elements from loaded vector for each sub vector. |
1003 | for (unsigned i = 0; i < NumSubElts; i++) |
1004 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
1005 | Instruction::ExtractElement, VT, Index + i * Factor); |
1006 | } |
1007 | |
1008 | unsigned InsSubCost = 0; |
1009 | for (unsigned i = 0; i < NumSubElts; i++) |
1010 | InsSubCost += static_cast<T *>(this)->getVectorInstrCost( |
1011 | Instruction::InsertElement, SubVT, i); |
1012 | |
1013 | Cost += Indices.size() * InsSubCost; |
1014 | } else { |
1015 | // The interleave cost is extract all elements from sub vectors, and |
1016 | // insert them into the wide vector. |
1017 | // |
1018 | // E.g. An interleaved store of factor 2: |
1019 | // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7> |
1020 | // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr |
1021 | // The cost is estimated as extract all elements from both <4 x i32> |
1022 | // vectors and insert into the <8 x i32> vector. |
1023 | |
1024 | unsigned ExtSubCost = 0; |
1025 | for (unsigned i = 0; i < NumSubElts; i++) |
1026 | ExtSubCost += static_cast<T *>(this)->getVectorInstrCost( |
1027 | Instruction::ExtractElement, SubVT, i); |
1028 | Cost += ExtSubCost * Factor; |
1029 | |
1030 | for (unsigned i = 0; i < NumElts; i++) |
1031 | Cost += static_cast<T *>(this) |
1032 | ->getVectorInstrCost(Instruction::InsertElement, VT, i); |
1033 | } |
1034 | |
1035 | if (!UseMaskForCond) |
1036 | return Cost; |
1037 | |
1038 | Type *I8Type = Type::getInt8Ty(VT->getContext()); |
1039 | VectorType *MaskVT = VectorType::get(I8Type, NumElts); |
1040 | SubVT = VectorType::get(I8Type, NumSubElts); |
1041 | |
1042 | // The Mask shuffling cost is extract all the elements of the Mask |
1043 | // and insert each of them Factor times into the wide vector: |
1044 | // |
1045 | // E.g. an interleaved group with factor 3: |
1046 | // %mask = icmp ult <8 x i32> %vec1, %vec2 |
1047 | // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef, |
1048 | // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7> |
1049 | // The cost is estimated as extract all mask elements from the <8xi1> mask |
1050 | // vector and insert them factor times into the <24xi1> shuffled mask |
1051 | // vector. |
1052 | for (unsigned i = 0; i < NumSubElts; i++) |
1053 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
1054 | Instruction::ExtractElement, SubVT, i); |
1055 | |
1056 | for (unsigned i = 0; i < NumElts; i++) |
1057 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
1058 | Instruction::InsertElement, MaskVT, i); |
1059 | |
1060 | // The Gaps mask is invariant and created outside the loop, therefore the |
1061 | // cost of creating it is not accounted for here. However if we have both |
1062 | // a MaskForGaps and some other mask that guards the execution of the |
1063 | // memory access, we need to account for the cost of And-ing the two masks |
1064 | // inside the loop. |
1065 | if (UseMaskForGaps) |
1066 | Cost += static_cast<T *>(this)->getArithmeticInstrCost( |
1067 | BinaryOperator::And, MaskVT); |
1068 | |
1069 | return Cost; |
1070 | } |
1071 | |
1072 | /// Get intrinsic cost based on arguments. |
1073 | unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, |
1074 | ArrayRef<Value *> Args, FastMathFlags FMF, |
1075 | unsigned VF = 1) { |
1076 | unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1); |
1077 | assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type")(((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type" ) ? static_cast<void> (0) : __assert_fail ("(RetVF == 1 || VF == 1) && \"VF > 1 and RetVF is a vector type\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1077, __PRETTY_FUNCTION__)); |
1078 | auto *ConcreteTTI = static_cast<T *>(this); |
1079 | |
1080 | switch (IID) { |
1081 | default: { |
1082 | // Assume that we need to scalarize this intrinsic. |
1083 | SmallVector<Type *, 4> Types; |
1084 | for (Value *Op : Args) { |
1085 | Type *OpTy = Op->getType(); |
1086 | assert(VF == 1 || !OpTy->isVectorTy())((VF == 1 || !OpTy->isVectorTy()) ? static_cast<void> (0) : __assert_fail ("VF == 1 || !OpTy->isVectorTy()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1086, __PRETTY_FUNCTION__)); |
1087 | Types.push_back(VF == 1 ? OpTy : VectorType::get(OpTy, VF)); |
1088 | } |
1089 | |
1090 | if (VF > 1 && !RetTy->isVoidTy()) |
1091 | RetTy = VectorType::get(RetTy, VF); |
1092 | |
1093 | // Compute the scalarization overhead based on Args for a vector |
1094 | // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while |
1095 | // CostModel will pass a vector RetTy and VF is 1. |
1096 | unsigned ScalarizationCost = std::numeric_limits<unsigned>::max(); |
1097 | if (RetVF > 1 || VF > 1) { |
1098 | ScalarizationCost = 0; |
1099 | if (!RetTy->isVoidTy()) |
1100 | ScalarizationCost += getScalarizationOverhead(RetTy, true, false); |
1101 | ScalarizationCost += getOperandsScalarizationOverhead(Args, VF); |
1102 | } |
1103 | |
1104 | return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF, |
1105 | ScalarizationCost); |
1106 | } |
1107 | case Intrinsic::masked_scatter: { |
1108 | assert(VF == 1 && "Can't vectorize types here.")((VF == 1 && "Can't vectorize types here.") ? static_cast <void> (0) : __assert_fail ("VF == 1 && \"Can't vectorize types here.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1108, __PRETTY_FUNCTION__)); |
1109 | Value *Mask = Args[3]; |
1110 | bool VarMask = !isa<Constant>(Mask); |
1111 | unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue(); |
1112 | return ConcreteTTI->getGatherScatterOpCost( |
1113 | Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment); |
1114 | } |
1115 | case Intrinsic::masked_gather: { |
1116 | assert(VF == 1 && "Can't vectorize types here.")((VF == 1 && "Can't vectorize types here.") ? static_cast <void> (0) : __assert_fail ("VF == 1 && \"Can't vectorize types here.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1116, __PRETTY_FUNCTION__)); |
1117 | Value *Mask = Args[2]; |
1118 | bool VarMask = !isa<Constant>(Mask); |
1119 | unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue(); |
1120 | return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy, |
1121 | Args[0], VarMask, Alignment); |
1122 | } |
1123 | case Intrinsic::experimental_vector_reduce_add: |
1124 | case Intrinsic::experimental_vector_reduce_mul: |
1125 | case Intrinsic::experimental_vector_reduce_and: |
1126 | case Intrinsic::experimental_vector_reduce_or: |
1127 | case Intrinsic::experimental_vector_reduce_xor: |
1128 | case Intrinsic::experimental_vector_reduce_v2_fadd: |
1129 | case Intrinsic::experimental_vector_reduce_v2_fmul: |
1130 | case Intrinsic::experimental_vector_reduce_smax: |
1131 | case Intrinsic::experimental_vector_reduce_smin: |
1132 | case Intrinsic::experimental_vector_reduce_fmax: |
1133 | case Intrinsic::experimental_vector_reduce_fmin: |
1134 | case Intrinsic::experimental_vector_reduce_umax: |
1135 | case Intrinsic::experimental_vector_reduce_umin: |
1136 | return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF); |
1137 | case Intrinsic::fshl: |
1138 | case Intrinsic::fshr: { |
1139 | Value *X = Args[0]; |
1140 | Value *Y = Args[1]; |
1141 | Value *Z = Args[2]; |
1142 | TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW; |
1143 | TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX); |
1144 | TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY); |
1145 | TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ); |
1146 | TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue; |
1147 | OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2 |
1148 | : TTI::OP_None; |
1149 | // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) |
1150 | // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) |
1151 | unsigned Cost = 0; |
1152 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy); |
1153 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy); |
1154 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy, |
1155 | OpKindX, OpKindZ, OpPropsX); |
1156 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy, |
1157 | OpKindY, OpKindZ, OpPropsY); |
1158 | // Non-constant shift amounts requires a modulo. |
1159 | if (OpKindZ != TTI::OK_UniformConstantValue && |
1160 | OpKindZ != TTI::OK_NonUniformConstantValue) |
1161 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy, |
1162 | OpKindZ, OpKindBW, OpPropsZ, |
1163 | OpPropsBW); |
1164 | // For non-rotates (X != Y) we must add shift-by-zero handling costs. |
1165 | if (X != Y) { |
1166 | Type *CondTy = RetTy->getWithNewBitWidth(1); |
1167 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, |
1168 | CondTy, nullptr); |
1169 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, |
1170 | CondTy, nullptr); |
1171 | } |
1172 | return Cost; |
1173 | } |
1174 | } |
1175 | } |
1176 | |
1177 | /// Get intrinsic cost based on argument types. |
1178 | /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the |
1179 | /// cost of scalarizing the arguments and the return value will be computed |
1180 | /// based on types. |
1181 | unsigned getIntrinsicInstrCost( |
1182 | Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF, |
1183 | unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) { |
1184 | auto *ConcreteTTI = static_cast<T *>(this); |
1185 | |
1186 | SmallVector<unsigned, 2> ISDs; |
1187 | unsigned SingleCallCost = 10; // Library call cost. Make it expensive. |
1188 | switch (IID) { |
1189 | default: { |
1190 | // Assume that we need to scalarize this intrinsic. |
1191 | unsigned ScalarizationCost = ScalarizationCostPassed; |
1192 | unsigned ScalarCalls = 1; |
1193 | Type *ScalarRetTy = RetTy; |
1194 | if (RetTy->isVectorTy()) { |
1195 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) |
1196 | ScalarizationCost = getScalarizationOverhead(RetTy, true, false); |
1197 | ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); |
1198 | ScalarRetTy = RetTy->getScalarType(); |
1199 | } |
1200 | SmallVector<Type *, 4> ScalarTys; |
1201 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { |
1202 | Type *Ty = Tys[i]; |
1203 | if (Ty->isVectorTy()) { |
1204 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) |
1205 | ScalarizationCost += getScalarizationOverhead(Ty, false, true); |
1206 | ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements()); |
1207 | Ty = Ty->getScalarType(); |
1208 | } |
1209 | ScalarTys.push_back(Ty); |
1210 | } |
1211 | if (ScalarCalls == 1) |
1212 | return 1; // Return cost of a scalar intrinsic. Assume it to be cheap. |
1213 | |
1214 | unsigned ScalarCost = |
1215 | ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF); |
1216 | |
1217 | return ScalarCalls * ScalarCost + ScalarizationCost; |
1218 | } |
1219 | // Look for intrinsics that can be lowered directly or turned into a scalar |
1220 | // intrinsic call. |
1221 | case Intrinsic::sqrt: |
1222 | ISDs.push_back(ISD::FSQRT); |
1223 | break; |
1224 | case Intrinsic::sin: |
1225 | ISDs.push_back(ISD::FSIN); |
1226 | break; |
1227 | case Intrinsic::cos: |
1228 | ISDs.push_back(ISD::FCOS); |
1229 | break; |
1230 | case Intrinsic::exp: |
1231 | ISDs.push_back(ISD::FEXP); |
1232 | break; |
1233 | case Intrinsic::exp2: |
1234 | ISDs.push_back(ISD::FEXP2); |
1235 | break; |
1236 | case Intrinsic::log: |
1237 | ISDs.push_back(ISD::FLOG); |
1238 | break; |
1239 | case Intrinsic::log10: |
1240 | ISDs.push_back(ISD::FLOG10); |
1241 | break; |
1242 | case Intrinsic::log2: |
1243 | ISDs.push_back(ISD::FLOG2); |
1244 | break; |
1245 | case Intrinsic::fabs: |
1246 | ISDs.push_back(ISD::FABS); |
1247 | break; |
1248 | case Intrinsic::canonicalize: |
1249 | ISDs.push_back(ISD::FCANONICALIZE); |
1250 | break; |
1251 | case Intrinsic::minnum: |
1252 | ISDs.push_back(ISD::FMINNUM); |
1253 | if (FMF.noNaNs()) |
1254 | ISDs.push_back(ISD::FMINIMUM); |
1255 | break; |
1256 | case Intrinsic::maxnum: |
1257 | ISDs.push_back(ISD::FMAXNUM); |
1258 | if (FMF.noNaNs()) |
1259 | ISDs.push_back(ISD::FMAXIMUM); |
1260 | break; |
1261 | case Intrinsic::copysign: |
1262 | ISDs.push_back(ISD::FCOPYSIGN); |
1263 | break; |
1264 | case Intrinsic::floor: |
1265 | ISDs.push_back(ISD::FFLOOR); |
1266 | break; |
1267 | case Intrinsic::ceil: |
1268 | ISDs.push_back(ISD::FCEIL); |
1269 | break; |
1270 | case Intrinsic::trunc: |
1271 | ISDs.push_back(ISD::FTRUNC); |
1272 | break; |
1273 | case Intrinsic::nearbyint: |
1274 | ISDs.push_back(ISD::FNEARBYINT); |
1275 | break; |
1276 | case Intrinsic::rint: |
1277 | ISDs.push_back(ISD::FRINT); |
1278 | break; |
1279 | case Intrinsic::round: |
1280 | ISDs.push_back(ISD::FROUND); |
1281 | break; |
1282 | case Intrinsic::pow: |
1283 | ISDs.push_back(ISD::FPOW); |
1284 | break; |
1285 | case Intrinsic::fma: |
1286 | ISDs.push_back(ISD::FMA); |
1287 | break; |
1288 | case Intrinsic::fmuladd: |
1289 | ISDs.push_back(ISD::FMA); |
1290 | break; |
1291 | case Intrinsic::experimental_constrained_fmuladd: |
1292 | ISDs.push_back(ISD::STRICT_FMA); |
1293 | break; |
1294 | // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free. |
1295 | case Intrinsic::lifetime_start: |
1296 | case Intrinsic::lifetime_end: |
1297 | case Intrinsic::sideeffect: |
1298 | return 0; |
1299 | case Intrinsic::masked_store: |
1300 | return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0, |
1301 | 0); |
1302 | case Intrinsic::masked_load: |
1303 | return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0); |
1304 | case Intrinsic::experimental_vector_reduce_add: |
1305 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0], |
1306 | /*IsPairwiseForm=*/false); |
1307 | case Intrinsic::experimental_vector_reduce_mul: |
1308 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0], |
1309 | /*IsPairwiseForm=*/false); |
1310 | case Intrinsic::experimental_vector_reduce_and: |
1311 | return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0], |
1312 | /*IsPairwiseForm=*/false); |
1313 | case Intrinsic::experimental_vector_reduce_or: |
1314 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0], |
1315 | /*IsPairwiseForm=*/false); |
1316 | case Intrinsic::experimental_vector_reduce_xor: |
1317 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0], |
1318 | /*IsPairwiseForm=*/false); |
1319 | case Intrinsic::experimental_vector_reduce_v2_fadd: |
1320 | return ConcreteTTI->getArithmeticReductionCost( |
1321 | Instruction::FAdd, Tys[0], |
1322 | /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict |
1323 | // reductions. |
1324 | case Intrinsic::experimental_vector_reduce_v2_fmul: |
1325 | return ConcreteTTI->getArithmeticReductionCost( |
1326 | Instruction::FMul, Tys[0], |
1327 | /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict |
1328 | // reductions. |
1329 | case Intrinsic::experimental_vector_reduce_smax: |
1330 | case Intrinsic::experimental_vector_reduce_smin: |
1331 | case Intrinsic::experimental_vector_reduce_fmax: |
1332 | case Intrinsic::experimental_vector_reduce_fmin: |
1333 | return ConcreteTTI->getMinMaxReductionCost( |
1334 | Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false, |
1335 | /*IsUnsigned=*/true); |
1336 | case Intrinsic::experimental_vector_reduce_umax: |
1337 | case Intrinsic::experimental_vector_reduce_umin: |
1338 | return ConcreteTTI->getMinMaxReductionCost( |
1339 | Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false, |
1340 | /*IsUnsigned=*/false); |
1341 | case Intrinsic::sadd_sat: |
1342 | case Intrinsic::ssub_sat: { |
1343 | Type *CondTy = RetTy->getWithNewBitWidth(1); |
1344 | |
1345 | Type *OpTy = StructType::create({RetTy, CondTy}); |
1346 | Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat |
1347 | ? Intrinsic::sadd_with_overflow |
1348 | : Intrinsic::ssub_with_overflow; |
1349 | |
1350 | // SatMax -> Overflow && SumDiff < 0 |
1351 | // SatMin -> Overflow && SumDiff >= 0 |
1352 | unsigned Cost = 0; |
1353 | Cost += ConcreteTTI->getIntrinsicInstrCost( |
1354 | OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); |
1355 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, |
1356 | CondTy, nullptr); |
1357 | Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, |
1358 | CondTy, nullptr); |
1359 | return Cost; |
1360 | } |
1361 | case Intrinsic::uadd_sat: |
1362 | case Intrinsic::usub_sat: { |
1363 | Type *CondTy = RetTy->getWithNewBitWidth(1); |
1364 | |
1365 | Type *OpTy = StructType::create({RetTy, CondTy}); |
1366 | Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat |
1367 | ? Intrinsic::uadd_with_overflow |
1368 | : Intrinsic::usub_with_overflow; |
1369 | |
1370 | unsigned Cost = 0; |
1371 | Cost += ConcreteTTI->getIntrinsicInstrCost( |
1372 | OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); |
1373 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, |
1374 | CondTy, nullptr); |
1375 | return Cost; |
1376 | } |
1377 | case Intrinsic::smul_fix: |
1378 | case Intrinsic::umul_fix: { |
1379 | unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; |
1380 | Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize); |
1381 | |
1382 | unsigned ExtOp = |
1383 | IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; |
1384 | |
1385 | unsigned Cost = 0; |
1386 | Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy); |
1387 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy); |
1388 | Cost += |
1389 | 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy); |
1390 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy, |
1391 | TTI::OK_AnyValue, |
1392 | TTI::OK_UniformConstantValue); |
1393 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy, |
1394 | TTI::OK_AnyValue, |
1395 | TTI::OK_UniformConstantValue); |
1396 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy); |
1397 | return Cost; |
1398 | } |
1399 | case Intrinsic::sadd_with_overflow: |
1400 | case Intrinsic::ssub_with_overflow: { |
1401 | Type *SumTy = RetTy->getContainedType(0); |
1402 | Type *OverflowTy = RetTy->getContainedType(1); |
1403 | unsigned Opcode = IID == Intrinsic::sadd_with_overflow |
1404 | ? BinaryOperator::Add |
1405 | : BinaryOperator::Sub; |
1406 | |
1407 | // LHSSign -> LHS >= 0 |
1408 | // RHSSign -> RHS >= 0 |
1409 | // SumSign -> Sum >= 0 |
1410 | // |
1411 | // Add: |
1412 | // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) |
1413 | // Sub: |
1414 | // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) |
1415 | unsigned Cost = 0; |
1416 | Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy); |
1417 | Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, |
1418 | OverflowTy, nullptr); |
1419 | Cost += 2 * ConcreteTTI->getCmpSelInstrCost( |
1420 | BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr); |
1421 | Cost += |
1422 | ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy); |
1423 | return Cost; |
1424 | } |
1425 | case Intrinsic::uadd_with_overflow: |
1426 | case Intrinsic::usub_with_overflow: { |
1427 | Type *SumTy = RetTy->getContainedType(0); |
1428 | Type *OverflowTy = RetTy->getContainedType(1); |
1429 | unsigned Opcode = IID == Intrinsic::uadd_with_overflow |
1430 | ? BinaryOperator::Add |
1431 | : BinaryOperator::Sub; |
1432 | |
1433 | unsigned Cost = 0; |
1434 | Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy); |
1435 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, |
1436 | OverflowTy, nullptr); |
1437 | return Cost; |
1438 | } |
1439 | case Intrinsic::smul_with_overflow: |
1440 | case Intrinsic::umul_with_overflow: { |
1441 | Type *MulTy = RetTy->getContainedType(0); |
1442 | Type *OverflowTy = RetTy->getContainedType(1); |
1443 | unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; |
1444 | Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize); |
1445 | |
1446 | unsigned ExtOp = |
1447 | IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; |
1448 | |
1449 | unsigned Cost = 0; |
1450 | Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy); |
1451 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy); |
1452 | Cost += |
1453 | 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy); |
1454 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy, |
1455 | TTI::OK_AnyValue, |
1456 | TTI::OK_UniformConstantValue); |
1457 | |
1458 | if (IID == Intrinsic::smul_with_overflow) |
1459 | Cost += ConcreteTTI->getArithmeticInstrCost( |
1460 | Instruction::AShr, MulTy, TTI::OK_AnyValue, |
1461 | TTI::OK_UniformConstantValue); |
1462 | |
1463 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, |
1464 | OverflowTy, nullptr); |
1465 | return Cost; |
1466 | } |
1467 | case Intrinsic::ctpop: |
1468 | ISDs.push_back(ISD::CTPOP); |
1469 | // In case of legalization use TCC_Expensive. This is cheaper than a |
1470 | // library call but still not a cheap instruction. |
1471 | SingleCallCost = TargetTransformInfo::TCC_Expensive; |
1472 | break; |
1473 | // FIXME: ctlz, cttz, ... |
1474 | case Intrinsic::bswap: |
1475 | ISDs.push_back(ISD::BSWAP); |
1476 | break; |
1477 | case Intrinsic::bitreverse: |
1478 | ISDs.push_back(ISD::BITREVERSE); |
1479 | break; |
1480 | } |
1481 | |
1482 | const TargetLoweringBase *TLI = getTLI(); |
1483 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy); |
1484 | |
1485 | SmallVector<unsigned, 2> LegalCost; |
1486 | SmallVector<unsigned, 2> CustomCost; |
1487 | for (unsigned ISD : ISDs) { |
1488 | if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { |
1489 | if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() && |
1490 | TLI->isFAbsFree(LT.second)) { |
1491 | return 0; |
1492 | } |
1493 | |
1494 | // The operation is legal. Assume it costs 1. |
1495 | // If the type is split to multiple registers, assume that there is some |
1496 | // overhead to this. |
1497 | // TODO: Once we have extract/insert subvector cost we need to use them. |
1498 | if (LT.first > 1) |
1499 | LegalCost.push_back(LT.first * 2); |
1500 | else |
1501 | LegalCost.push_back(LT.first * 1); |
1502 | } else if (!TLI->isOperationExpand(ISD, LT.second)) { |
1503 | // If the operation is custom lowered then assume |
1504 | // that the code is twice as expensive. |
1505 | CustomCost.push_back(LT.first * 2); |
1506 | } |
1507 | } |
1508 | |
1509 | auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end()); |
1510 | if (MinLegalCostI != LegalCost.end()) |
1511 | return *MinLegalCostI; |
1512 | |
1513 | auto MinCustomCostI = |
1514 | std::min_element(CustomCost.begin(), CustomCost.end()); |
1515 | if (MinCustomCostI != CustomCost.end()) |
1516 | return *MinCustomCostI; |
1517 | |
1518 | // If we can't lower fmuladd into an FMA estimate the cost as a floating |
1519 | // point mul followed by an add. |
1520 | if (IID == Intrinsic::fmuladd) |
1521 | return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) + |
1522 | ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy); |
1523 | if (IID == Intrinsic::experimental_constrained_fmuladd) |
1524 | return ConcreteTTI->getIntrinsicCost( |
1525 | Intrinsic::experimental_constrained_fmul, RetTy, Tys, |
1526 | nullptr) + |
1527 | ConcreteTTI->getIntrinsicCost( |
1528 | Intrinsic::experimental_constrained_fadd, RetTy, Tys, nullptr); |
1529 | |
1530 | // Else, assume that we need to scalarize this intrinsic. For math builtins |
1531 | // this will emit a costly libcall, adding call overhead and spills. Make it |
1532 | // very expensive. |
1533 | if (RetTy->isVectorTy()) { |
1534 | unsigned ScalarizationCost = |
1535 | ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max()) |
1536 | ? ScalarizationCostPassed |
1537 | : getScalarizationOverhead(RetTy, true, false)); |
1538 | unsigned ScalarCalls = RetTy->getVectorNumElements(); |
1539 | SmallVector<Type *, 4> ScalarTys; |
1540 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { |
1541 | Type *Ty = Tys[i]; |
1542 | if (Ty->isVectorTy()) |
1543 | Ty = Ty->getScalarType(); |
1544 | ScalarTys.push_back(Ty); |
1545 | } |
1546 | unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost( |
1547 | IID, RetTy->getScalarType(), ScalarTys, FMF); |
1548 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { |
1549 | if (Tys[i]->isVectorTy()) { |
1550 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) |
1551 | ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); |
1552 | ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements()); |
1553 | } |
1554 | } |
1555 | |
1556 | return ScalarCalls * ScalarCost + ScalarizationCost; |
1557 | } |
1558 | |
1559 | // This is going to be turned into a library call, make it expensive. |
1560 | return SingleCallCost; |
1561 | } |
1562 | |
1563 | /// Compute a cost of the given call instruction. |
1564 | /// |
1565 | /// Compute the cost of calling function F with return type RetTy and |
1566 | /// argument types Tys. F might be nullptr, in this case the cost of an |
1567 | /// arbitrary call with the specified signature will be returned. |
1568 | /// This is used, for instance, when we estimate call of a vector |
1569 | /// counterpart of the given function. |
1570 | /// \param F Called function, might be nullptr. |
1571 | /// \param RetTy Return value types. |
1572 | /// \param Tys Argument types. |
1573 | /// \returns The cost of Call instruction. |
1574 | unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) { |
1575 | return 10; |
1576 | } |
1577 | |
1578 | unsigned getNumberOfParts(Type *Tp) { |
1579 | std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp); |
1580 | return LT.first; |
1581 | } |
1582 | |
1583 | unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *, |
1584 | const SCEV *) { |
1585 | return 0; |
1586 | } |
1587 | |
1588 | /// Try to calculate arithmetic and shuffle op costs for reduction operations. |
1589 | /// We're assuming that reduction operation are performing the following way: |
1590 | /// 1. Non-pairwise reduction |
1591 | /// %val1 = shufflevector<n x t> %val, <n x t> %undef, |
1592 | /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef> |
1593 | /// \----------------v-------------/ \----------v------------/ |
1594 | /// n/2 elements n/2 elements |
1595 | /// %red1 = op <n x t> %val, <n x t> val1 |
1596 | /// After this operation we have a vector %red1 where only the first n/2 |
1597 | /// elements are meaningful, the second n/2 elements are undefined and can be |
1598 | /// dropped. All other operations are actually working with the vector of |
1599 | /// length n/2, not n, though the real vector length is still n. |
1600 | /// %val2 = shufflevector<n x t> %red1, <n x t> %undef, |
1601 | /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef> |
1602 | /// \----------------v-------------/ \----------v------------/ |
1603 | /// n/4 elements 3*n/4 elements |
1604 | /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of |
1605 | /// length n/2, the resulting vector has length n/4 etc. |
1606 | /// 2. Pairwise reduction: |
1607 | /// Everything is the same except for an additional shuffle operation which |
1608 | /// is used to produce operands for pairwise kind of reductions. |
1609 | /// %val1 = shufflevector<n x t> %val, <n x t> %undef, |
1610 | /// <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef> |
1611 | /// \-------------v----------/ \----------v------------/ |
1612 | /// n/2 elements n/2 elements |
1613 | /// %val2 = shufflevector<n x t> %val, <n x t> %undef, |
1614 | /// <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef> |
1615 | /// \-------------v----------/ \----------v------------/ |
1616 | /// n/2 elements n/2 elements |
1617 | /// %red1 = op <n x t> %val1, <n x t> val2 |
1618 | /// Again, the operation is performed on <n x t> vector, but the resulting |
1619 | /// vector %red1 is <n/2 x t> vector. |
1620 | /// |
1621 | /// The cost model should take into account that the actual length of the |
1622 | /// vector is reduced on each iteration. |
1623 | unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty, |
1624 | bool IsPairwise) { |
1625 | assert(Ty->isVectorTy() && "Expect a vector type")((Ty->isVectorTy() && "Expect a vector type") ? static_cast <void> (0) : __assert_fail ("Ty->isVectorTy() && \"Expect a vector type\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1625, __PRETTY_FUNCTION__)); |
1626 | Type *ScalarTy = Ty->getVectorElementType(); |
1627 | unsigned NumVecElts = Ty->getVectorNumElements(); |
1628 | unsigned NumReduxLevels = Log2_32(NumVecElts); |
1629 | unsigned ArithCost = 0; |
1630 | unsigned ShuffleCost = 0; |
1631 | auto *ConcreteTTI = static_cast<T *>(this); |
1632 | std::pair<unsigned, MVT> LT = |
1633 | ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty); |
1634 | unsigned LongVectorCount = 0; |
1635 | unsigned MVTLen = |
1636 | LT.second.isVector() ? LT.second.getVectorNumElements() : 1; |
1637 | while (NumVecElts > MVTLen) { |
1638 | NumVecElts /= 2; |
1639 | Type *SubTy = VectorType::get(ScalarTy, NumVecElts); |
1640 | // Assume the pairwise shuffles add a cost. |
1641 | ShuffleCost += (IsPairwise + 1) * |
1642 | ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, |
1643 | NumVecElts, SubTy); |
1644 | ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy); |
1645 | Ty = SubTy; |
1646 | ++LongVectorCount; |
1647 | } |
1648 | |
1649 | NumReduxLevels -= LongVectorCount; |
1650 | |
1651 | // The minimal length of the vector is limited by the real length of vector |
1652 | // operations performed on the current platform. That's why several final |
1653 | // reduction operations are performed on the vectors with the same |
1654 | // architecture-dependent length. |
1655 | |
1656 | // Non pairwise reductions need one shuffle per reduction level. Pairwise |
1657 | // reductions need two shuffles on every level, but the last one. On that |
1658 | // level one of the shuffles is <0, u, u, ...> which is identity. |
1659 | unsigned NumShuffles = NumReduxLevels; |
1660 | if (IsPairwise && NumReduxLevels >= 1) |
1661 | NumShuffles += NumReduxLevels - 1; |
1662 | ShuffleCost += NumShuffles * |
1663 | ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, |
1664 | 0, Ty); |
1665 | ArithCost += NumReduxLevels * |
1666 | ConcreteTTI->getArithmeticInstrCost(Opcode, Ty); |
1667 | return ShuffleCost + ArithCost + |
1668 | ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); |
1669 | } |
1670 | |
1671 | /// Try to calculate op costs for min/max reduction operations. |
1672 | /// \param CondTy Conditional type for the Select instruction. |
1673 | unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise, |
1674 | bool) { |
1675 | assert(Ty->isVectorTy() && "Expect a vector type")((Ty->isVectorTy() && "Expect a vector type") ? static_cast <void> (0) : __assert_fail ("Ty->isVectorTy() && \"Expect a vector type\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1675, __PRETTY_FUNCTION__)); |
1676 | Type *ScalarTy = Ty->getVectorElementType(); |
1677 | Type *ScalarCondTy = CondTy->getVectorElementType(); |
1678 | unsigned NumVecElts = Ty->getVectorNumElements(); |
1679 | unsigned NumReduxLevels = Log2_32(NumVecElts); |
1680 | unsigned CmpOpcode; |
1681 | if (Ty->isFPOrFPVectorTy()) { |
1682 | CmpOpcode = Instruction::FCmp; |
1683 | } else { |
1684 | assert(Ty->isIntOrIntVectorTy() &&((Ty->isIntOrIntVectorTy() && "expecting floating point or integer type for min/max reduction" ) ? static_cast<void> (0) : __assert_fail ("Ty->isIntOrIntVectorTy() && \"expecting floating point or integer type for min/max reduction\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1685, __PRETTY_FUNCTION__)) |
1685 | "expecting floating point or integer type for min/max reduction")((Ty->isIntOrIntVectorTy() && "expecting floating point or integer type for min/max reduction" ) ? static_cast<void> (0) : __assert_fail ("Ty->isIntOrIntVectorTy() && \"expecting floating point or integer type for min/max reduction\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1685, __PRETTY_FUNCTION__)); |
1686 | CmpOpcode = Instruction::ICmp; |
1687 | } |
1688 | unsigned MinMaxCost = 0; |
1689 | unsigned ShuffleCost = 0; |
1690 | auto *ConcreteTTI = static_cast<T *>(this); |
1691 | std::pair<unsigned, MVT> LT = |
1692 | ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty); |
1693 | unsigned LongVectorCount = 0; |
1694 | unsigned MVTLen = |
1695 | LT.second.isVector() ? LT.second.getVectorNumElements() : 1; |
1696 | while (NumVecElts > MVTLen) { |
1697 | NumVecElts /= 2; |
1698 | Type *SubTy = VectorType::get(ScalarTy, NumVecElts); |
1699 | CondTy = VectorType::get(ScalarCondTy, NumVecElts); |
1700 | |
1701 | // Assume the pairwise shuffles add a cost. |
1702 | ShuffleCost += (IsPairwise + 1) * |
1703 | ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, |
1704 | NumVecElts, SubTy); |
1705 | MinMaxCost += |
1706 | ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) + |
1707 | ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy, |
1708 | nullptr); |
1709 | Ty = SubTy; |
1710 | ++LongVectorCount; |
1711 | } |
1712 | |
1713 | NumReduxLevels -= LongVectorCount; |
1714 | |
1715 | // The minimal length of the vector is limited by the real length of vector |
1716 | // operations performed on the current platform. That's why several final |
1717 | // reduction opertions are perfomed on the vectors with the same |
1718 | // architecture-dependent length. |
1719 | |
1720 | // Non pairwise reductions need one shuffle per reduction level. Pairwise |
1721 | // reductions need two shuffles on every level, but the last one. On that |
1722 | // level one of the shuffles is <0, u, u, ...> which is identity. |
1723 | unsigned NumShuffles = NumReduxLevels; |
1724 | if (IsPairwise && NumReduxLevels >= 1) |
1725 | NumShuffles += NumReduxLevels - 1; |
1726 | ShuffleCost += NumShuffles * |
1727 | ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, |
1728 | 0, Ty); |
1729 | MinMaxCost += |
1730 | NumReduxLevels * |
1731 | (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) + |
1732 | ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy, |
1733 | nullptr)); |
1734 | // The last min/max should be in vector registers and we counted it above. |
1735 | // So just need a single extractelement. |
1736 | return ShuffleCost + MinMaxCost + |
1737 | ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); |
1738 | } |
1739 | |
1740 | unsigned getVectorSplitCost() { return 1; } |
1741 | |
1742 | /// @} |
1743 | }; |
1744 | |
1745 | /// Concrete BasicTTIImpl that can be used if no further customization |
1746 | /// is needed. |
1747 | class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> { |
1748 | using BaseT = BasicTTIImplBase<BasicTTIImpl>; |
1749 | |
1750 | friend class BasicTTIImplBase<BasicTTIImpl>; |
1751 | |
1752 | const TargetSubtargetInfo *ST; |
1753 | const TargetLoweringBase *TLI; |
1754 | |
1755 | const TargetSubtargetInfo *getST() const { return ST; } |
1756 | const TargetLoweringBase *getTLI() const { return TLI; } |
1757 | |
1758 | public: |
1759 | explicit BasicTTIImpl(const TargetMachine *TM, const Function &F); |
1760 | }; |
1761 | |
1762 | } // end namespace llvm |
1763 | |
1764 | #endif // LLVM_CODEGEN_BASICTTIIMPL_H |