Bug Summary

File:llvm/include/llvm/CodeGen/BasicTTIImpl.h
Warning:line 430, column 36
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AMDGPUTargetTransformInfo.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-11/lib/clang/11.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/include -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-11/lib/clang/11.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/lib/Target/AMDGPU -fdebug-prefix-map=/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347=. -ferror-limit 19 -fmessage-length 0 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-03-09-184146-41876-1 -x c++ /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

1//===- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// This file implements a TargetTransformInfo analysis pass specific to the
11// AMDGPU target machine. It uses the target's detailed information to provide
12// more precise answers to certain TTI queries, while letting the target
13// independent and default TTI implementations handle the rest.
14//
15//===----------------------------------------------------------------------===//
16
17#include "AMDGPUTargetTransformInfo.h"
18#include "AMDGPUSubtarget.h"
19#include "Utils/AMDGPUBaseInfo.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/Analysis/LoopInfo.h"
22#include "llvm/Analysis/TargetTransformInfo.h"
23#include "llvm/Analysis/ValueTracking.h"
24#include "llvm/CodeGen/ISDOpcodes.h"
25#include "llvm/CodeGen/ValueTypes.h"
26#include "llvm/IR/Argument.h"
27#include "llvm/IR/Attributes.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/CallingConv.h"
30#include "llvm/IR/DataLayout.h"
31#include "llvm/IR/DerivedTypes.h"
32#include "llvm/IR/Function.h"
33#include "llvm/IR/Instruction.h"
34#include "llvm/IR/Instructions.h"
35#include "llvm/IR/IntrinsicInst.h"
36#include "llvm/IR/Module.h"
37#include "llvm/IR/PatternMatch.h"
38#include "llvm/IR/Type.h"
39#include "llvm/IR/Value.h"
40#include "llvm/MC/SubtargetFeature.h"
41#include "llvm/Support/Casting.h"
42#include "llvm/Support/CommandLine.h"
43#include "llvm/Support/Debug.h"
44#include "llvm/Support/ErrorHandling.h"
45#include "llvm/Support/MachineValueType.h"
46#include "llvm/Support/raw_ostream.h"
47#include "llvm/Target/TargetMachine.h"
48#include <algorithm>
49#include <cassert>
50#include <limits>
51#include <utility>
52
53using namespace llvm;
54
55#define DEBUG_TYPE"AMDGPUtti" "AMDGPUtti"
56
57static cl::opt<unsigned> UnrollThresholdPrivate(
58 "amdgpu-unroll-threshold-private",
59 cl::desc("Unroll threshold for AMDGPU if private memory used in a loop"),
60 cl::init(2700), cl::Hidden);
61
62static cl::opt<unsigned> UnrollThresholdLocal(
63 "amdgpu-unroll-threshold-local",
64 cl::desc("Unroll threshold for AMDGPU if local memory used in a loop"),
65 cl::init(1000), cl::Hidden);
66
67static cl::opt<unsigned> UnrollThresholdIf(
68 "amdgpu-unroll-threshold-if",
69 cl::desc("Unroll threshold increment for AMDGPU for each if statement inside loop"),
70 cl::init(150), cl::Hidden);
71
72static cl::opt<bool> UnrollRuntimeLocal(
73 "amdgpu-unroll-runtime-local",
74 cl::desc("Allow runtime unroll for AMDGPU if local memory used in a loop"),
75 cl::init(true), cl::Hidden);
76
77static cl::opt<bool> UseLegacyDA(
78 "amdgpu-use-legacy-divergence-analysis",
79 cl::desc("Enable legacy divergence analysis for AMDGPU"),
80 cl::init(false), cl::Hidden);
81
82static bool dependsOnLocalPhi(const Loop *L, const Value *Cond,
83 unsigned Depth = 0) {
84 const Instruction *I = dyn_cast<Instruction>(Cond);
85 if (!I)
86 return false;
87
88 for (const Value *V : I->operand_values()) {
89 if (!L->contains(I))
90 continue;
91 if (const PHINode *PHI = dyn_cast<PHINode>(V)) {
92 if (llvm::none_of(L->getSubLoops(), [PHI](const Loop* SubLoop) {
93 return SubLoop->contains(PHI); }))
94 return true;
95 } else if (Depth < 10 && dependsOnLocalPhi(L, V, Depth+1))
96 return true;
97 }
98 return false;
99}
100
101void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
102 TTI::UnrollingPreferences &UP) {
103 const Function &F = *L->getHeader()->getParent();
104 UP.Threshold = AMDGPU::getIntegerAttribute(F, "amdgpu-unroll-threshold", 300);
105 UP.MaxCount = std::numeric_limits<unsigned>::max();
106 UP.Partial = true;
107
108 // TODO: Do we want runtime unrolling?
109
110 // Maximum alloca size than can fit registers. Reserve 16 registers.
111 const unsigned MaxAlloca = (256 - 16) * 4;
112 unsigned ThresholdPrivate = UnrollThresholdPrivate;
113 unsigned ThresholdLocal = UnrollThresholdLocal;
114 unsigned MaxBoost = std::max(ThresholdPrivate, ThresholdLocal);
115 for (const BasicBlock *BB : L->getBlocks()) {
116 const DataLayout &DL = BB->getModule()->getDataLayout();
117 unsigned LocalGEPsSeen = 0;
118
119 if (llvm::any_of(L->getSubLoops(), [BB](const Loop* SubLoop) {
120 return SubLoop->contains(BB); }))
121 continue; // Block belongs to an inner loop.
122
123 for (const Instruction &I : *BB) {
124 // Unroll a loop which contains an "if" statement whose condition
125 // defined by a PHI belonging to the loop. This may help to eliminate
126 // if region and potentially even PHI itself, saving on both divergence
127 // and registers used for the PHI.
128 // Add a small bonus for each of such "if" statements.
129 if (const BranchInst *Br = dyn_cast<BranchInst>(&I)) {
130 if (UP.Threshold < MaxBoost && Br->isConditional()) {
131 BasicBlock *Succ0 = Br->getSuccessor(0);
132 BasicBlock *Succ1 = Br->getSuccessor(1);
133 if ((L->contains(Succ0) && L->isLoopExiting(Succ0)) ||
134 (L->contains(Succ1) && L->isLoopExiting(Succ1)))
135 continue;
136 if (dependsOnLocalPhi(L, Br->getCondition())) {
137 UP.Threshold += UnrollThresholdIf;
138 LLVM_DEBUG(dbgs() << "Set unroll threshold " << UP.Thresholddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("AMDGPUtti")) { dbgs() << "Set unroll threshold " <<
UP.Threshold << " for loop:\n" << *L << " due to "
<< *Br << '\n'; } } while (false)
139 << " for loop:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("AMDGPUtti")) { dbgs() << "Set unroll threshold " <<
UP.Threshold << " for loop:\n" << *L << " due to "
<< *Br << '\n'; } } while (false)
140 << *L << " due to " << *Br << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("AMDGPUtti")) { dbgs() << "Set unroll threshold " <<
UP.Threshold << " for loop:\n" << *L << " due to "
<< *Br << '\n'; } } while (false)
;
141 if (UP.Threshold >= MaxBoost)
142 return;
143 }
144 }
145 continue;
146 }
147
148 const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I);
149 if (!GEP)
150 continue;
151
152 unsigned AS = GEP->getAddressSpace();
153 unsigned Threshold = 0;
154 if (AS == AMDGPUAS::PRIVATE_ADDRESS)
155 Threshold = ThresholdPrivate;
156 else if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS)
157 Threshold = ThresholdLocal;
158 else
159 continue;
160
161 if (UP.Threshold >= Threshold)
162 continue;
163
164 if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
165 const Value *Ptr = GEP->getPointerOperand();
166 const AllocaInst *Alloca =
167 dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr, DL));
168 if (!Alloca || !Alloca->isStaticAlloca())
169 continue;
170 Type *Ty = Alloca->getAllocatedType();
171 unsigned AllocaSize = Ty->isSized() ? DL.getTypeAllocSize(Ty) : 0;
172 if (AllocaSize > MaxAlloca)
173 continue;
174 } else if (AS == AMDGPUAS::LOCAL_ADDRESS ||
175 AS == AMDGPUAS::REGION_ADDRESS) {
176 LocalGEPsSeen++;
177 // Inhibit unroll for local memory if we have seen addressing not to
178 // a variable, most likely we will be unable to combine it.
179 // Do not unroll too deep inner loops for local memory to give a chance
180 // to unroll an outer loop for a more important reason.
181 if (LocalGEPsSeen > 1 || L->getLoopDepth() > 2 ||
182 (!isa<GlobalVariable>(GEP->getPointerOperand()) &&
183 !isa<Argument>(GEP->getPointerOperand())))
184 continue;
185 LLVM_DEBUG(dbgs() << "Allow unroll runtime for loop:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("AMDGPUtti")) { dbgs() << "Allow unroll runtime for loop:\n"
<< *L << " due to LDS use.\n"; } } while (false)
186 << *L << " due to LDS use.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("AMDGPUtti")) { dbgs() << "Allow unroll runtime for loop:\n"
<< *L << " due to LDS use.\n"; } } while (false)
;
187 UP.Runtime = UnrollRuntimeLocal;
188 }
189
190 // Check if GEP depends on a value defined by this loop itself.
191 bool HasLoopDef = false;
192 for (const Value *Op : GEP->operands()) {
193 const Instruction *Inst = dyn_cast<Instruction>(Op);
194 if (!Inst || L->isLoopInvariant(Op))
195 continue;
196
197 if (llvm::any_of(L->getSubLoops(), [Inst](const Loop* SubLoop) {
198 return SubLoop->contains(Inst); }))
199 continue;
200 HasLoopDef = true;
201 break;
202 }
203 if (!HasLoopDef)
204 continue;
205
206 // We want to do whatever we can to limit the number of alloca
207 // instructions that make it through to the code generator. allocas
208 // require us to use indirect addressing, which is slow and prone to
209 // compiler bugs. If this loop does an address calculation on an
210 // alloca ptr, then we want to use a higher than normal loop unroll
211 // threshold. This will give SROA a better chance to eliminate these
212 // allocas.
213 //
214 // We also want to have more unrolling for local memory to let ds
215 // instructions with different offsets combine.
216 //
217 // Don't use the maximum allowed value here as it will make some
218 // programs way too big.
219 UP.Threshold = Threshold;
220 LLVM_DEBUG(dbgs() << "Set unroll threshold " << Thresholddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("AMDGPUtti")) { dbgs() << "Set unroll threshold " <<
Threshold << " for loop:\n" << *L << " due to "
<< *GEP << '\n'; } } while (false)
221 << " for loop:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("AMDGPUtti")) { dbgs() << "Set unroll threshold " <<
Threshold << " for loop:\n" << *L << " due to "
<< *GEP << '\n'; } } while (false)
222 << *L << " due to " << *GEP << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("AMDGPUtti")) { dbgs() << "Set unroll threshold " <<
Threshold << " for loop:\n" << *L << " due to "
<< *GEP << '\n'; } } while (false)
;
223 if (UP.Threshold >= MaxBoost)
224 return;
225 }
226 }
227}
228
229unsigned GCNTTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
230 // The concept of vector registers doesn't really exist. Some packed vector
231 // operations operate on the normal 32-bit registers.
232 return 256;
233}
234
235unsigned GCNTTIImpl::getNumberOfRegisters(bool Vec) const {
236 // This is really the number of registers to fill when vectorizing /
237 // interleaving loops, so we lie to avoid trying to use all registers.
238 return getHardwareNumberOfRegisters(Vec) >> 3;
239}
240
241unsigned GCNTTIImpl::getRegisterBitWidth(bool Vector) const {
242 return 32;
243}
244
245unsigned GCNTTIImpl::getMinVectorRegisterBitWidth() const {
246 return 32;
247}
248
249unsigned GCNTTIImpl::getLoadVectorFactor(unsigned VF, unsigned LoadSize,
250 unsigned ChainSizeInBytes,
251 VectorType *VecTy) const {
252 unsigned VecRegBitWidth = VF * LoadSize;
253 if (VecRegBitWidth > 128 && VecTy->getScalarSizeInBits() < 32)
254 // TODO: Support element-size less than 32bit?
255 return 128 / LoadSize;
256
257 return VF;
258}
259
260unsigned GCNTTIImpl::getStoreVectorFactor(unsigned VF, unsigned StoreSize,
261 unsigned ChainSizeInBytes,
262 VectorType *VecTy) const {
263 unsigned VecRegBitWidth = VF * StoreSize;
264 if (VecRegBitWidth > 128)
265 return 128 / StoreSize;
266
267 return VF;
268}
269
270unsigned GCNTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
271 if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
272 AddrSpace == AMDGPUAS::CONSTANT_ADDRESS ||
273 AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
274 AddrSpace == AMDGPUAS::BUFFER_FAT_POINTER) {
275 return 512;
276 }
277
278 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS ||
279 AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
280 AddrSpace == AMDGPUAS::REGION_ADDRESS)
281 return 128;
282
283 if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
284 return 8 * ST->getMaxPrivateElementSize();
285
286 llvm_unreachable("unhandled address space")::llvm::llvm_unreachable_internal("unhandled address space", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp"
, 286)
;
287}
288
289bool GCNTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
290 unsigned Alignment,
291 unsigned AddrSpace) const {
292 // We allow vectorization of flat stores, even though we may need to decompose
293 // them later if they may access private memory. We don't have enough context
294 // here, and legalization can handle it.
295 if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) {
296 return (Alignment >= 4 || ST->hasUnalignedScratchAccess()) &&
297 ChainSizeInBytes <= ST->getMaxPrivateElementSize();
298 }
299 return true;
300}
301
302bool GCNTTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
303 unsigned Alignment,
304 unsigned AddrSpace) const {
305 return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
306}
307
308bool GCNTTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
309 unsigned Alignment,
310 unsigned AddrSpace) const {
311 return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
312}
313
314unsigned GCNTTIImpl::getMaxInterleaveFactor(unsigned VF) {
315 // Disable unrolling if the loop is not vectorized.
316 // TODO: Enable this again.
317 if (VF == 1)
318 return 1;
319
320 return 8;
321}
322
323bool GCNTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
324 MemIntrinsicInfo &Info) const {
325 switch (Inst->getIntrinsicID()) {
326 case Intrinsic::amdgcn_atomic_inc:
327 case Intrinsic::amdgcn_atomic_dec:
328 case Intrinsic::amdgcn_ds_ordered_add:
329 case Intrinsic::amdgcn_ds_ordered_swap:
330 case Intrinsic::amdgcn_ds_fadd:
331 case Intrinsic::amdgcn_ds_fmin:
332 case Intrinsic::amdgcn_ds_fmax: {
333 auto *Ordering = dyn_cast<ConstantInt>(Inst->getArgOperand(2));
334 auto *Volatile = dyn_cast<ConstantInt>(Inst->getArgOperand(4));
335 if (!Ordering || !Volatile)
336 return false; // Invalid.
337
338 unsigned OrderingVal = Ordering->getZExtValue();
339 if (OrderingVal > static_cast<unsigned>(AtomicOrdering::SequentiallyConsistent))
340 return false;
341
342 Info.PtrVal = Inst->getArgOperand(0);
343 Info.Ordering = static_cast<AtomicOrdering>(OrderingVal);
344 Info.ReadMem = true;
345 Info.WriteMem = true;
346 Info.IsVolatile = !Volatile->isNullValue();
347 return true;
348 }
349 default:
350 return false;
351 }
352}
353
354int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
355 TTI::OperandValueKind Opd1Info,
356 TTI::OperandValueKind Opd2Info,
357 TTI::OperandValueProperties Opd1PropInfo,
358 TTI::OperandValueProperties Opd2PropInfo,
359 ArrayRef<const Value *> Args,
360 const Instruction *CxtI) {
361 EVT OrigTy = TLI->getValueType(DL, Ty);
362 if (!OrigTy.isSimple()) {
363 return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
364 Opd1PropInfo, Opd2PropInfo);
365 }
366
367 // Legalize the type.
368 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
369 int ISD = TLI->InstructionOpcodeToISD(Opcode);
370
371 // Because we don't have any legal vector operations, but the legal types, we
372 // need to account for split vectors.
373 unsigned NElts = LT.second.isVector() ?
374 LT.second.getVectorNumElements() : 1;
375
376 MVT::SimpleValueType SLT = LT.second.getScalarType().SimpleTy;
377
378 switch (ISD) {
379 case ISD::SHL:
380 case ISD::SRL:
381 case ISD::SRA:
382 if (SLT == MVT::i64)
383 return get64BitInstrCost() * LT.first * NElts;
384
385 if (ST->has16BitInsts() && SLT == MVT::i16)
386 NElts = (NElts + 1) / 2;
387
388 // i32
389 return getFullRateInstrCost() * LT.first * NElts;
390 case ISD::ADD:
391 case ISD::SUB:
392 case ISD::AND:
393 case ISD::OR:
394 case ISD::XOR:
395 if (SLT == MVT::i64) {
396 // and, or and xor are typically split into 2 VALU instructions.
397 return 2 * getFullRateInstrCost() * LT.first * NElts;
398 }
399
400 if (ST->has16BitInsts() && SLT == MVT::i16)
401 NElts = (NElts + 1) / 2;
402
403 return LT.first * NElts * getFullRateInstrCost();
404 case ISD::MUL: {
405 const int QuarterRateCost = getQuarterRateInstrCost();
406 if (SLT == MVT::i64) {
407 const int FullRateCost = getFullRateInstrCost();
408 return (4 * QuarterRateCost + (2 * 2) * FullRateCost) * LT.first * NElts;
409 }
410
411 if (ST->has16BitInsts() && SLT == MVT::i16)
412 NElts = (NElts + 1) / 2;
413
414 // i32
415 return QuarterRateCost * NElts * LT.first;
416 }
417 case ISD::FADD:
418 case ISD::FSUB:
419 case ISD::FMUL:
420 if (SLT == MVT::f64)
421 return LT.first * NElts * get64BitInstrCost();
422
423 if (ST->has16BitInsts() && SLT == MVT::f16)
424 NElts = (NElts + 1) / 2;
425
426 if (SLT == MVT::f32 || SLT == MVT::f16)
427 return LT.first * NElts * getFullRateInstrCost();
428 break;
429 case ISD::FDIV:
430 case ISD::FREM:
431 // FIXME: frem should be handled separately. The fdiv in it is most of it,
432 // but the current lowering is also not entirely correct.
433 if (SLT == MVT::f64) {
434 int Cost = 4 * get64BitInstrCost() + 7 * getQuarterRateInstrCost();
435 // Add cost of workaround.
436 if (!ST->hasUsableDivScaleConditionOutput())
437 Cost += 3 * getFullRateInstrCost();
438
439 return LT.first * Cost * NElts;
440 }
441
442 if (!Args.empty() && match(Args[0], PatternMatch::m_FPOne())) {
443 // TODO: This is more complicated, unsafe flags etc.
444 if ((SLT == MVT::f32 && !HasFP32Denormals) ||
445 (SLT == MVT::f16 && ST->has16BitInsts())) {
446 return LT.first * getQuarterRateInstrCost() * NElts;
447 }
448 }
449
450 if (SLT == MVT::f16 && ST->has16BitInsts()) {
451 // 2 x v_cvt_f32_f16
452 // f32 rcp
453 // f32 fmul
454 // v_cvt_f16_f32
455 // f16 div_fixup
456 int Cost = 4 * getFullRateInstrCost() + 2 * getQuarterRateInstrCost();
457 return LT.first * Cost * NElts;
458 }
459
460 if (SLT == MVT::f32 || SLT == MVT::f16) {
461 int Cost = 7 * getFullRateInstrCost() + 1 * getQuarterRateInstrCost();
462
463 if (!HasFP32Denormals) {
464 // FP mode switches.
465 Cost += 2 * getFullRateInstrCost();
466 }
467
468 return LT.first * NElts * Cost;
469 }
470 break;
471 default:
472 break;
473 }
474
475 return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
476 Opd1PropInfo, Opd2PropInfo);
477}
478
479template <typename T>
480int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
481 ArrayRef<T *> Args,
482 FastMathFlags FMF, unsigned VF) {
483 if (ID != Intrinsic::fma)
484 return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
485
486 EVT OrigTy = TLI->getValueType(DL, RetTy);
487 if (!OrigTy.isSimple()) {
488 return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
489 }
490
491 // Legalize the type.
492 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
493
494 unsigned NElts = LT.second.isVector() ?
495 LT.second.getVectorNumElements() : 1;
496
497 MVT::SimpleValueType SLT = LT.second.getScalarType().SimpleTy;
498
499 if (SLT == MVT::f64)
500 return LT.first * NElts * get64BitInstrCost();
501
502 if (ST->has16BitInsts() && SLT == MVT::f16)
503 NElts = (NElts + 1) / 2;
504
505 return LT.first * NElts * (ST->hasFastFMAF32() ? getHalfRateInstrCost()
506 : getQuarterRateInstrCost());
507}
508
509int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
510 ArrayRef<Value*> Args, FastMathFlags FMF,
511 unsigned VF) {
512 return getIntrinsicInstrCost<Value>(ID, RetTy, Args, FMF, VF);
513}
514
515int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
516 ArrayRef<Type *> Tys, FastMathFlags FMF,
517 unsigned ScalarizationCostPassed) {
518 return getIntrinsicInstrCost<Type>(ID, RetTy, Tys, FMF,
519 ScalarizationCostPassed);
520}
521
522unsigned GCNTTIImpl::getCFInstrCost(unsigned Opcode) {
523 // XXX - For some reason this isn't called for switch.
524 switch (Opcode) {
525 case Instruction::Br:
526 case Instruction::Ret:
527 return 10;
528 default:
529 return BaseT::getCFInstrCost(Opcode);
530 }
531}
532
533int GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *Ty,
534 bool IsPairwise) {
535 EVT OrigTy = TLI->getValueType(DL, Ty);
536
537 // Computes cost on targets that have packed math instructions(which support
538 // 16-bit types only).
539 if (IsPairwise ||
540 !ST->hasVOP3PInsts() ||
541 OrigTy.getScalarSizeInBits() != 16)
542 return BaseT::getArithmeticReductionCost(Opcode, Ty, IsPairwise);
543
544 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
545 return LT.first * getFullRateInstrCost();
546}
547
548int GCNTTIImpl::getMinMaxReductionCost(Type *Ty, Type *CondTy,
549 bool IsPairwise,
550 bool IsUnsigned) {
551 EVT OrigTy = TLI->getValueType(DL, Ty);
552
553 // Computes cost on targets that have packed math instructions(which support
554 // 16-bit types only).
555 if (IsPairwise ||
556 !ST->hasVOP3PInsts() ||
557 OrigTy.getScalarSizeInBits() != 16)
558 return BaseT::getMinMaxReductionCost(Ty, CondTy, IsPairwise, IsUnsigned);
559
560 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
561 return LT.first * getHalfRateInstrCost();
562}
563
564int GCNTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
565 unsigned Index) {
566 switch (Opcode) {
567 case Instruction::ExtractElement:
568 case Instruction::InsertElement: {
569 unsigned EltSize
570 = DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType());
571 if (EltSize < 32) {
572 if (EltSize == 16 && Index == 0 && ST->has16BitInsts())
573 return 0;
574 return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
575 }
576
577 // Extracts are just reads of a subregister, so are free. Inserts are
578 // considered free because we don't want to have any cost for scalarizing
579 // operations, and we don't have to copy into a different register class.
580
581 // Dynamic indexing isn't free and is best avoided.
582 return Index == ~0u ? 2 : 0;
583 }
584 default:
585 return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
586 }
587}
588
589static bool isArgPassedInSGPR(const Argument *A) {
590 const Function *F = A->getParent();
591
592 // Arguments to compute shaders are never a source of divergence.
593 CallingConv::ID CC = F->getCallingConv();
594 switch (CC) {
595 case CallingConv::AMDGPU_KERNEL:
596 case CallingConv::SPIR_KERNEL:
597 return true;
598 case CallingConv::AMDGPU_VS:
599 case CallingConv::AMDGPU_LS:
600 case CallingConv::AMDGPU_HS:
601 case CallingConv::AMDGPU_ES:
602 case CallingConv::AMDGPU_GS:
603 case CallingConv::AMDGPU_PS:
604 case CallingConv::AMDGPU_CS:
605 // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
606 // Everything else is in VGPRs.
607 return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
608 F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
609 default:
610 // TODO: Should calls support inreg for SGPR inputs?
611 return false;
612 }
613}
614
615/// Analyze if the results of inline asm are divergent. If \p Indices is empty,
616/// this is analyzing the collective result of all output registers. Otherwise,
617/// this is only querying a specific result index if this returns multiple
618/// registers in a struct.
619bool GCNTTIImpl::isInlineAsmSourceOfDivergence(
620 const CallInst *CI, ArrayRef<unsigned> Indices) const {
621 // TODO: Handle complex extract indices
622 if (Indices.size() > 1)
623 return true;
624
625 const DataLayout &DL = CI->getModule()->getDataLayout();
626 const SIRegisterInfo *TRI = ST->getRegisterInfo();
627 ImmutableCallSite CS(CI);
628 TargetLowering::AsmOperandInfoVector TargetConstraints
629 = TLI->ParseConstraints(DL, ST->getRegisterInfo(), CS);
630
631 const int TargetOutputIdx = Indices.empty() ? -1 : Indices[0];
632
633 int OutputIdx = 0;
634 for (auto &TC : TargetConstraints) {
635 if (TC.Type != InlineAsm::isOutput)
636 continue;
637
638 // Skip outputs we don't care about.
639 if (TargetOutputIdx != -1 && TargetOutputIdx != OutputIdx++)
640 continue;
641
642 TLI->ComputeConstraintToUse(TC, SDValue());
643
644 Register AssignedReg;
645 const TargetRegisterClass *RC;
646 std::tie(AssignedReg, RC) = TLI->getRegForInlineAsmConstraint(
647 TRI, TC.ConstraintCode, TC.ConstraintVT);
648 if (AssignedReg) {
649 // FIXME: This is a workaround for getRegForInlineAsmConstraint
650 // returning VS_32
651 RC = TRI->getPhysRegClass(AssignedReg);
652 }
653
654 // For AGPR constraints null is returned on subtargets without AGPRs, so
655 // assume divergent for null.
656 if (!RC || !TRI->isSGPRClass(RC))
657 return true;
658 }
659
660 return false;
661}
662
663/// \returns true if the new GPU divergence analysis is enabled.
664bool GCNTTIImpl::useGPUDivergenceAnalysis() const {
665 return !UseLegacyDA;
666}
667
668/// \returns true if the result of the value could potentially be
669/// different across workitems in a wavefront.
670bool GCNTTIImpl::isSourceOfDivergence(const Value *V) const {
671 if (const Argument *A = dyn_cast<Argument>(V))
672 return !isArgPassedInSGPR(A);
673
674 // Loads from the private and flat address spaces are divergent, because
675 // threads can execute the load instruction with the same inputs and get
676 // different results.
677 //
678 // All other loads are not divergent, because if threads issue loads with the
679 // same arguments, they will always get the same result.
680 if (const LoadInst *Load = dyn_cast<LoadInst>(V))
681 return Load->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
682 Load->getPointerAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
683
684 // Atomics are divergent because they are executed sequentially: when an
685 // atomic operation refers to the same address in each thread, then each
686 // thread after the first sees the value written by the previous thread as
687 // original value.
688 if (isa<AtomicRMWInst>(V) || isa<AtomicCmpXchgInst>(V))
689 return true;
690
691 if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V))
692 return AMDGPU::isIntrinsicSourceOfDivergence(Intrinsic->getIntrinsicID());
693
694 // Assume all function calls are a source of divergence.
695 if (const CallInst *CI = dyn_cast<CallInst>(V)) {
696 if (isa<InlineAsm>(CI->getCalledValue()))
697 return isInlineAsmSourceOfDivergence(CI);
698 return true;
699 }
700
701 // Assume all function calls are a source of divergence.
702 if (isa<InvokeInst>(V))
703 return true;
704
705 return false;
706}
707
708bool GCNTTIImpl::isAlwaysUniform(const Value *V) const {
709 if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) {
710 switch (Intrinsic->getIntrinsicID()) {
711 default:
712 return false;
713 case Intrinsic::amdgcn_readfirstlane:
714 case Intrinsic::amdgcn_readlane:
715 case Intrinsic::amdgcn_icmp:
716 case Intrinsic::amdgcn_fcmp:
717 case Intrinsic::amdgcn_if_break:
718 return true;
719 }
720 }
721
722 if (const CallInst *CI = dyn_cast<CallInst>(V)) {
723 if (isa<InlineAsm>(CI->getCalledValue()))
724 return !isInlineAsmSourceOfDivergence(CI);
725 return false;
726 }
727
728 const ExtractValueInst *ExtValue = dyn_cast<ExtractValueInst>(V);
729 if (!ExtValue)
730 return false;
731
732 const CallInst *CI = dyn_cast<CallInst>(ExtValue->getOperand(0));
733 if (!CI)
734 return false;
735
736 if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(CI)) {
737 switch (Intrinsic->getIntrinsicID()) {
738 default:
739 return false;
740 case Intrinsic::amdgcn_if:
741 case Intrinsic::amdgcn_else: {
742 ArrayRef<unsigned> Indices = ExtValue->getIndices();
743 return Indices.size() == 1 && Indices[0] == 1;
744 }
745 }
746 }
747
748 // If we have inline asm returning mixed SGPR and VGPR results, we inferred
749 // divergent for the overall struct return. We need to override it in the
750 // case we're extracting an SGPR component here.
751 if (isa<InlineAsm>(CI->getCalledValue()))
752 return !isInlineAsmSourceOfDivergence(CI, ExtValue->getIndices());
753
754 return false;
755}
756
757bool GCNTTIImpl::collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
758 Intrinsic::ID IID) const {
759 switch (IID) {
760 case Intrinsic::amdgcn_atomic_inc:
761 case Intrinsic::amdgcn_atomic_dec:
762 case Intrinsic::amdgcn_ds_fadd:
763 case Intrinsic::amdgcn_ds_fmin:
764 case Intrinsic::amdgcn_ds_fmax:
765 case Intrinsic::amdgcn_is_shared:
766 case Intrinsic::amdgcn_is_private:
767 OpIndexes.push_back(0);
768 return true;
769 default:
770 return false;
771 }
772}
773
774bool GCNTTIImpl::rewriteIntrinsicWithAddressSpace(
775 IntrinsicInst *II, Value *OldV, Value *NewV) const {
776 auto IntrID = II->getIntrinsicID();
777 switch (IntrID) {
778 case Intrinsic::amdgcn_atomic_inc:
779 case Intrinsic::amdgcn_atomic_dec:
780 case Intrinsic::amdgcn_ds_fadd:
781 case Intrinsic::amdgcn_ds_fmin:
782 case Intrinsic::amdgcn_ds_fmax: {
783 const ConstantInt *IsVolatile = cast<ConstantInt>(II->getArgOperand(4));
784 if (!IsVolatile->isZero())
785 return false;
786 Module *M = II->getParent()->getParent()->getParent();
787 Type *DestTy = II->getType();
788 Type *SrcTy = NewV->getType();
789 Function *NewDecl =
790 Intrinsic::getDeclaration(M, II->getIntrinsicID(), {DestTy, SrcTy});
791 II->setArgOperand(0, NewV);
792 II->setCalledFunction(NewDecl);
793 return true;
794 }
795 case Intrinsic::amdgcn_is_shared:
796 case Intrinsic::amdgcn_is_private: {
797 unsigned TrueAS = IntrID == Intrinsic::amdgcn_is_shared ?
798 AMDGPUAS::LOCAL_ADDRESS : AMDGPUAS::PRIVATE_ADDRESS;
799 unsigned NewAS = NewV->getType()->getPointerAddressSpace();
800 LLVMContext &Ctx = NewV->getType()->getContext();
801 ConstantInt *NewVal = (TrueAS == NewAS) ?
802 ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx);
803 II->replaceAllUsesWith(NewVal);
804 II->eraseFromParent();
805 return true;
806 }
807 default:
808 return false;
809 }
810}
811
812unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
813 Type *SubTp) {
814 if (ST->hasVOP3PInsts()) {
815 VectorType *VT = cast<VectorType>(Tp);
816 if (VT->getNumElements() == 2 &&
817 DL.getTypeSizeInBits(VT->getElementType()) == 16) {
818 // With op_sel VOP3P instructions freely can access the low half or high
819 // half of a register, so any swizzle is free.
820
821 switch (Kind) {
822 case TTI::SK_Broadcast:
823 case TTI::SK_Reverse:
824 case TTI::SK_PermuteSingleSrc:
825 return 0;
826 default:
827 break;
828 }
829 }
830 }
831
832 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
833}
834
835bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
836 const Function *Callee) const {
837 const TargetMachine &TM = getTLI()->getTargetMachine();
838 const GCNSubtarget *CallerST
839 = static_cast<const GCNSubtarget *>(TM.getSubtargetImpl(*Caller));
840 const GCNSubtarget *CalleeST
841 = static_cast<const GCNSubtarget *>(TM.getSubtargetImpl(*Callee));
842
843 const FeatureBitset &CallerBits = CallerST->getFeatureBits();
844 const FeatureBitset &CalleeBits = CalleeST->getFeatureBits();
845
846 FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
847 FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
848 if ((RealCallerBits & RealCalleeBits) != RealCalleeBits)
849 return false;
850
851 // FIXME: dx10_clamp can just take the caller setting, but there seems to be
852 // no way to support merge for backend defined attributes.
853 AMDGPU::SIModeRegisterDefaults CallerMode(*Caller, *CallerST);
854 AMDGPU::SIModeRegisterDefaults CalleeMode(*Callee, *CalleeST);
855 return CallerMode.isInlineCompatible(CalleeMode);
856}
857
858void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
859 TTI::UnrollingPreferences &UP) {
860 CommonTTI.getUnrollingPreferences(L, SE, UP);
861}
862
863unsigned GCNTTIImpl::getUserCost(const User *U,
864 ArrayRef<const Value *> Operands) {
865 const Instruction *I = dyn_cast<Instruction>(U);
1
Assuming 'U' is not a 'Instruction'
866 if (!I
1.1
'I' is null
1.1
'I' is null
1.1
'I' is null
1.1
'I' is null
1.1
'I' is null
1.1
'I' is null
1.1
'I' is null
)
2
Taking true branch
867 return BaseT::getUserCost(U, Operands);
3
Calling 'TargetTransformInfoImplCRTPBase::getUserCost'
868
869 // Estimate different operations to be optimized out
870 switch (I->getOpcode()) {
871 case Instruction::ExtractElement: {
872 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
873 unsigned Idx = -1;
874 if (CI)
875 Idx = CI->getZExtValue();
876 return getVectorInstrCost(I->getOpcode(), I->getOperand(0)->getType(), Idx);
877 }
878 case Instruction::InsertElement: {
879 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
880 unsigned Idx = -1;
881 if (CI)
882 Idx = CI->getZExtValue();
883 return getVectorInstrCost(I->getOpcode(), I->getType(), Idx);
884 }
885 case Instruction::Call: {
886 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
887 SmallVector<Value *, 4> Args(II->arg_operands());
888 FastMathFlags FMF;
889 if (auto *FPMO = dyn_cast<FPMathOperator>(II))
890 FMF = FPMO->getFastMathFlags();
891 return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args,
892 FMF);
893 } else {
894 return BaseT::getUserCost(U, Operands);
895 }
896 }
897 case Instruction::ShuffleVector: {
898 const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
899 Type *Ty = Shuffle->getType();
900 Type *SrcTy = Shuffle->getOperand(0)->getType();
901
902 // TODO: Identify and add costs for insert subvector, etc.
903 int SubIndex;
904 if (Shuffle->isExtractSubvectorMask(SubIndex))
905 return getShuffleCost(TTI::SK_ExtractSubvector, SrcTy, SubIndex, Ty);
906
907 if (Shuffle->changesLength())
908 return BaseT::getUserCost(U, Operands);
909
910 if (Shuffle->isIdentity())
911 return 0;
912
913 if (Shuffle->isReverse())
914 return getShuffleCost(TTI::SK_Reverse, Ty, 0, nullptr);
915
916 if (Shuffle->isSelect())
917 return getShuffleCost(TTI::SK_Select, Ty, 0, nullptr);
918
919 if (Shuffle->isTranspose())
920 return getShuffleCost(TTI::SK_Transpose, Ty, 0, nullptr);
921
922 if (Shuffle->isZeroEltSplat())
923 return getShuffleCost(TTI::SK_Broadcast, Ty, 0, nullptr);
924
925 if (Shuffle->isSingleSource())
926 return getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, 0, nullptr);
927
928 return getShuffleCost(TTI::SK_PermuteTwoSrc, Ty, 0, nullptr);
929 }
930 case Instruction::ZExt:
931 case Instruction::SExt:
932 case Instruction::FPToUI:
933 case Instruction::FPToSI:
934 case Instruction::FPExt:
935 case Instruction::PtrToInt:
936 case Instruction::IntToPtr:
937 case Instruction::SIToFP:
938 case Instruction::UIToFP:
939 case Instruction::Trunc:
940 case Instruction::FPTrunc:
941 case Instruction::BitCast:
942 case Instruction::AddrSpaceCast: {
943 return getCastInstrCost(I->getOpcode(), I->getType(),
944 I->getOperand(0)->getType(), I);
945 }
946 case Instruction::Add:
947 case Instruction::FAdd:
948 case Instruction::Sub:
949 case Instruction::FSub:
950 case Instruction::Mul:
951 case Instruction::FMul:
952 case Instruction::UDiv:
953 case Instruction::SDiv:
954 case Instruction::FDiv:
955 case Instruction::URem:
956 case Instruction::SRem:
957 case Instruction::FRem:
958 case Instruction::Shl:
959 case Instruction::LShr:
960 case Instruction::AShr:
961 case Instruction::And:
962 case Instruction::Or:
963 case Instruction::Xor:
964 case Instruction::FNeg: {
965 return getArithmeticInstrCost(I->getOpcode(), I->getType(),
966 TTI::OK_AnyValue, TTI::OK_AnyValue,
967 TTI::OP_None, TTI::OP_None, Operands, I);
968 }
969 default:
970 break;
971 }
972
973 return BaseT::getUserCost(U, Operands);
974}
975
976unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
977 return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
978}
979
980unsigned R600TTIImpl::getNumberOfRegisters(bool Vec) const {
981 return getHardwareNumberOfRegisters(Vec);
982}
983
984unsigned R600TTIImpl::getRegisterBitWidth(bool Vector) const {
985 return 32;
986}
987
988unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const {
989 return 32;
990}
991
992unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
993 if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
994 AddrSpace == AMDGPUAS::CONSTANT_ADDRESS)
995 return 128;
996 if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
997 AddrSpace == AMDGPUAS::REGION_ADDRESS)
998 return 64;
999 if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
1000 return 32;
1001
1002 if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS ||
1003 AddrSpace == AMDGPUAS::PARAM_I_ADDRESS ||
1004 (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&
1005 AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))
1006 return 128;
1007 llvm_unreachable("unhandled address space")::llvm::llvm_unreachable_internal("unhandled address space", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp"
, 1007)
;
1008}
1009
1010bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
1011 unsigned Alignment,
1012 unsigned AddrSpace) const {
1013 // We allow vectorization of flat stores, even though we may need to decompose
1014 // them later if they may access private memory. We don't have enough context
1015 // here, and legalization can handle it.
1016 return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS);
1017}
1018
1019bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1020 unsigned Alignment,
1021 unsigned AddrSpace) const {
1022 return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
1023}
1024
1025bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1026 unsigned Alignment,
1027 unsigned AddrSpace) const {
1028 return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
1029}
1030
1031unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) {
1032 // Disable unrolling if the loop is not vectorized.
1033 // TODO: Enable this again.
1034 if (VF == 1)
1035 return 1;
1036
1037 return 8;
1038}
1039
1040unsigned R600TTIImpl::getCFInstrCost(unsigned Opcode) {
1041 // XXX - For some reason this isn't called for switch.
1042 switch (Opcode) {
1043 case Instruction::Br:
1044 case Instruction::Ret:
1045 return 10;
1046 default:
1047 return BaseT::getCFInstrCost(Opcode);
1048 }
1049}
1050
1051int R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
1052 unsigned Index) {
1053 switch (Opcode) {
1054 case Instruction::ExtractElement:
1055 case Instruction::InsertElement: {
1056 unsigned EltSize
1057 = DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType());
1058 if (EltSize < 32) {
1059 return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
1060 }
1061
1062 // Extracts are just reads of a subregister, so are free. Inserts are
1063 // considered free because we don't want to have any cost for scalarizing
1064 // operations, and we don't have to copy into a different register class.
1065
1066 // Dynamic indexing isn't free and is best avoided.
1067 return Index == ~0u ? 2 : 0;
1068 }
1069 default:
1070 return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
1071 }
1072}
1073
1074void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
1075 TTI::UnrollingPreferences &UP) {
1076 CommonTTI.getUnrollingPreferences(L, SE, UP);
1077}

/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

1//===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file provides helpers for the implementation of
10/// a TargetTransformInfo-conforming class.
11///
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
16
17#include "llvm/Analysis/ScalarEvolutionExpressions.h"
18#include "llvm/Analysis/TargetTransformInfo.h"
19#include "llvm/Analysis/VectorUtils.h"
20#include "llvm/IR/CallSite.h"
21#include "llvm/IR/DataLayout.h"
22#include "llvm/IR/Function.h"
23#include "llvm/IR/GetElementPtrTypeIterator.h"
24#include "llvm/IR/Operator.h"
25#include "llvm/IR/Type.h"
26
27namespace llvm {
28
29/// Base class for use as a mix-in that aids implementing
30/// a TargetTransformInfo-compatible class.
31class TargetTransformInfoImplBase {
32protected:
33 typedef TargetTransformInfo TTI;
34
35 const DataLayout &DL;
36
37 explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {}
38
39public:
40 // Provide value semantics. MSVC requires that we spell all of these out.
41 TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg)
42 : DL(Arg.DL) {}
43 TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {}
44
45 const DataLayout &getDataLayout() const { return DL; }
46
47 unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
48 switch (Opcode) {
49 default:
50 // By default, just classify everything as 'basic'.
51 return TTI::TCC_Basic;
52
53 case Instruction::GetElementPtr:
54 llvm_unreachable("Use getGEPCost for GEP operations!")::llvm::llvm_unreachable_internal("Use getGEPCost for GEP operations!"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 54)
;
55
56 case Instruction::BitCast:
57 assert(OpTy && "Cast instructions must provide the operand type")((OpTy && "Cast instructions must provide the operand type"
) ? static_cast<void> (0) : __assert_fail ("OpTy && \"Cast instructions must provide the operand type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 57, __PRETTY_FUNCTION__))
;
58 if (Ty == OpTy || (Ty->isPointerTy() && OpTy->isPointerTy()))
59 // Identity and pointer-to-pointer casts are free.
60 return TTI::TCC_Free;
61
62 // Otherwise, the default basic cost is used.
63 return TTI::TCC_Basic;
64
65 case Instruction::Freeze:
66 // Freeze operation is free because it should be lowered into a register
67 // use without any register copy in assembly code.
68 return TTI::TCC_Free;
69
70 case Instruction::FDiv:
71 case Instruction::FRem:
72 case Instruction::SDiv:
73 case Instruction::SRem:
74 case Instruction::UDiv:
75 case Instruction::URem:
76 return TTI::TCC_Expensive;
77
78 case Instruction::IntToPtr: {
79 // An inttoptr cast is free so long as the input is a legal integer type
80 // which doesn't contain values outside the range of a pointer.
81 unsigned OpSize = OpTy->getScalarSizeInBits();
82 if (DL.isLegalInteger(OpSize) &&
83 OpSize <= DL.getPointerTypeSizeInBits(Ty))
84 return TTI::TCC_Free;
85
86 // Otherwise it's not a no-op.
87 return TTI::TCC_Basic;
88 }
89 case Instruction::PtrToInt: {
90 // A ptrtoint cast is free so long as the result is large enough to store
91 // the pointer, and a legal integer type.
92 unsigned DestSize = Ty->getScalarSizeInBits();
93 if (DL.isLegalInteger(DestSize) &&
94 DestSize >= DL.getPointerTypeSizeInBits(OpTy))
95 return TTI::TCC_Free;
96
97 // Otherwise it's not a no-op.
98 return TTI::TCC_Basic;
99 }
100 case Instruction::Trunc:
101 // trunc to a native type is free (assuming the target has compare and
102 // shift-right of the same width).
103 if (DL.isLegalInteger(DL.getTypeSizeInBits(Ty)))
104 return TTI::TCC_Free;
105
106 return TTI::TCC_Basic;
107 }
108 }
109
110 int getGEPCost(Type *PointeeType, const Value *Ptr,
111 ArrayRef<const Value *> Operands) {
112 // In the basic model, we just assume that all-constant GEPs will be folded
113 // into their uses via addressing modes.
114 for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
115 if (!isa<Constant>(Operands[Idx]))
116 return TTI::TCC_Basic;
117
118 return TTI::TCC_Free;
119 }
120
121 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
122 unsigned &JTSize,
123 ProfileSummaryInfo *PSI,
124 BlockFrequencyInfo *BFI) {
125 (void)PSI;
126 (void)BFI;
127 JTSize = 0;
128 return SI.getNumCases();
129 }
130
131 int getExtCost(const Instruction *I, const Value *Src) {
132 return TTI::TCC_Basic;
133 }
134
135 unsigned getCallCost(FunctionType *FTy, int NumArgs, const User *U) {
136 assert(FTy && "FunctionType must be provided to this routine.")((FTy && "FunctionType must be provided to this routine."
) ? static_cast<void> (0) : __assert_fail ("FTy && \"FunctionType must be provided to this routine.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 136, __PRETTY_FUNCTION__))
;
137
138 // The target-independent implementation just measures the size of the
139 // function by approximating that each argument will take on average one
140 // instruction to prepare.
141
142 if (NumArgs < 0)
143 // Set the argument number to the number of explicit arguments in the
144 // function.
145 NumArgs = FTy->getNumParams();
146
147 return TTI::TCC_Basic * (NumArgs + 1);
148 }
149
150 unsigned getInliningThresholdMultiplier() { return 1; }
151
152 int getInlinerVectorBonusPercent() { return 150; }
153
154 unsigned getMemcpyCost(const Instruction *I) {
155 return TTI::TCC_Expensive;
156 }
157
158 bool hasBranchDivergence() { return false; }
159
160 bool useGPUDivergenceAnalysis() { return false; }
161
162 bool isSourceOfDivergence(const Value *V) { return false; }
163
164 bool isAlwaysUniform(const Value *V) { return false; }
165
166 unsigned getFlatAddressSpace () {
167 return -1;
168 }
169
170 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
171 Intrinsic::ID IID) const {
172 return false;
173 }
174
175 bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
176 Value *OldV, Value *NewV) const {
177 return false;
178 }
179
180 bool isLoweredToCall(const Function *F) {
181 assert(F && "A concrete function must be provided to this routine.")((F && "A concrete function must be provided to this routine."
) ? static_cast<void> (0) : __assert_fail ("F && \"A concrete function must be provided to this routine.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 181, __PRETTY_FUNCTION__))
;
182
183 // FIXME: These should almost certainly not be handled here, and instead
184 // handled with the help of TLI or the target itself. This was largely
185 // ported from existing analysis heuristics here so that such refactorings
186 // can take place in the future.
187
188 if (F->isIntrinsic())
189 return false;
190
191 if (F->hasLocalLinkage() || !F->hasName())
192 return true;
193
194 StringRef Name = F->getName();
195
196 // These will all likely lower to a single selection DAG node.
197 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
198 Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" ||
199 Name == "fmin" || Name == "fminf" || Name == "fminl" ||
200 Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
201 Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" ||
202 Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
203 return false;
204
205 // These are all likely to be optimized into something smaller.
206 if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
207 Name == "exp2l" || Name == "exp2f" || Name == "floor" ||
208 Name == "floorf" || Name == "ceil" || Name == "round" ||
209 Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" ||
210 Name == "llabs")
211 return false;
212
213 return true;
214 }
215
216 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
217 AssumptionCache &AC,
218 TargetLibraryInfo *LibInfo,
219 HardwareLoopInfo &HWLoopInfo) {
220 return false;
221 }
222
223 bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
224 AssumptionCache &AC, TargetLibraryInfo *TLI,
225 DominatorTree *DT,
226 const LoopAccessInfo *LAI) const {
227 return false;
228 }
229
230 void getUnrollingPreferences(Loop *, ScalarEvolution &,
231 TTI::UnrollingPreferences &) {}
232
233 bool isLegalAddImmediate(int64_t Imm) { return false; }
234
235 bool isLegalICmpImmediate(int64_t Imm) { return false; }
236
237 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
238 bool HasBaseReg, int64_t Scale,
239 unsigned AddrSpace, Instruction *I = nullptr) {
240 // Guess that only reg and reg+reg addressing is allowed. This heuristic is
241 // taken from the implementation of LSR.
242 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
243 }
244
245 bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) {
246 return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
247 C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
248 std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
249 C2.ScaleCost, C2.ImmCost, C2.SetupCost);
250 }
251
252 bool canMacroFuseCmp() { return false; }
253
254 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
255 DominatorTree *DT, AssumptionCache *AC,
256 TargetLibraryInfo *LibInfo) {
257 return false;
258 }
259
260 bool shouldFavorPostInc() const { return false; }
261
262 bool shouldFavorBackedgeIndex(const Loop *L) const { return false; }
263
264 bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) { return false; }
265
266 bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) { return false; }
267
268 bool isLegalNTStore(Type *DataType, Align Alignment) {
269 // By default, assume nontemporal memory stores are available for stores
270 // that are aligned and have a size that is a power of 2.
271 unsigned DataSize = DL.getTypeStoreSize(DataType);
272 return Alignment >= DataSize && isPowerOf2_32(DataSize);
273 }
274
275 bool isLegalNTLoad(Type *DataType, Align Alignment) {
276 // By default, assume nontemporal memory loads are available for loads that
277 // are aligned and have a size that is a power of 2.
278 unsigned DataSize = DL.getTypeStoreSize(DataType);
279 return Alignment >= DataSize && isPowerOf2_32(DataSize);
280 }
281
282 bool isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment) {
283 return false;
284 }
285
286 bool isLegalMaskedGather(Type *DataType, MaybeAlign Alignment) {
287 return false;
288 }
289
290 bool isLegalMaskedCompressStore(Type *DataType) { return false; }
291
292 bool isLegalMaskedExpandLoad(Type *DataType) { return false; }
293
294 bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; }
295
296 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; }
297
298 bool prefersVectorizedAddressing() { return true; }
299
300 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
301 bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
302 // Guess that all legal addressing mode are free.
303 if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
304 Scale, AddrSpace))
305 return 0;
306 return -1;
307 }
308
309 bool LSRWithInstrQueries() { return false; }
310
311 bool isTruncateFree(Type *Ty1, Type *Ty2) { return false; }
312
313 bool isProfitableToHoist(Instruction *I) { return true; }
314
315 bool useAA() { return false; }
316
317 bool isTypeLegal(Type *Ty) { return false; }
318
319 bool shouldBuildLookupTables() { return true; }
320 bool shouldBuildLookupTablesForConstant(Constant *C) { return true; }
321
322 bool useColdCCForColdCall(Function &F) { return false; }
323
324 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
325 return 0;
326 }
327
328 unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
329 unsigned VF) { return 0; }
330
331 bool supportsEfficientVectorElementLoadStore() { return false; }
332
333 bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; }
334
335 TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
336 bool IsZeroCmp) const {
337 return {};
338 }
339
340 bool enableInterleavedAccessVectorization() { return false; }
341
342 bool enableMaskedInterleavedAccessVectorization() { return false; }
343
344 bool isFPVectorizationPotentiallyUnsafe() { return false; }
345
346 bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
347 unsigned BitWidth,
348 unsigned AddressSpace,
349 unsigned Alignment,
350 bool *Fast) { return false; }
351
352 TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) {
353 return TTI::PSK_Software;
354 }
355
356 bool haveFastSqrt(Type *Ty) { return false; }
357
358 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { return true; }
359
360 unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; }
361
362 int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
363 Type *Ty) {
364 return 0;
365 }
366
367 unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; }
368
369 unsigned getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
370 Type *Ty) {
371 return TTI::TCC_Free;
372 }
373
374 unsigned getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
375 const APInt &Imm, Type *Ty) {
376 return TTI::TCC_Free;
377 }
378
379 unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; }
380
381 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const {
382 return Vector ? 1 : 0;
383 };
384
385 const char* getRegisterClassName(unsigned ClassID) const {
386 switch (ClassID) {
387 default:
388 return "Generic::Unknown Register Class";
389 case 0: return "Generic::ScalarRC";
390 case 1: return "Generic::VectorRC";
391 }
392 }
393
394 unsigned getRegisterBitWidth(bool Vector) const { return 32; }
395
396 unsigned getMinVectorRegisterBitWidth() { return 128; }
397
398 bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; }
399
400 unsigned getMinimumVF(unsigned ElemWidth) const { return 0; }
401
402 bool
403 shouldConsiderAddressTypePromotion(const Instruction &I,
404 bool &AllowPromotionWithoutCommonHeader) {
405 AllowPromotionWithoutCommonHeader = false;
406 return false;
407 }
408
409 unsigned getCacheLineSize() const { return 0; }
410
411 llvm::Optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level) const {
412 switch (Level) {
413 case TargetTransformInfo::CacheLevel::L1D:
414 LLVM_FALLTHROUGH[[gnu::fallthrough]];
415 case TargetTransformInfo::CacheLevel::L2D:
416 return llvm::Optional<unsigned>();
417 }
418 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel")::llvm::llvm_unreachable_internal("Unknown TargetTransformInfo::CacheLevel"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 418)
;
419 }
420
421 llvm::Optional<unsigned> getCacheAssociativity(
422 TargetTransformInfo::CacheLevel Level) const {
423 switch (Level) {
424 case TargetTransformInfo::CacheLevel::L1D:
425 LLVM_FALLTHROUGH[[gnu::fallthrough]];
426 case TargetTransformInfo::CacheLevel::L2D:
427 return llvm::Optional<unsigned>();
428 }
429
430 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel")::llvm::llvm_unreachable_internal("Unknown TargetTransformInfo::CacheLevel"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 430)
;
431 }
432
433 unsigned getPrefetchDistance() const { return 0; }
434 unsigned getMinPrefetchStride() const { return 1; }
435 unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX(2147483647 *2U +1U); }
436
437 unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
438
439 unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
440 TTI::OperandValueKind Opd1Info,
441 TTI::OperandValueKind Opd2Info,
442 TTI::OperandValueProperties Opd1PropInfo,
443 TTI::OperandValueProperties Opd2PropInfo,
444 ArrayRef<const Value *> Args,
445 const Instruction *CxtI = nullptr) {
446 return 1;
447 }
448
449 unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Ty, int Index,
450 Type *SubTp) {
451 return 1;
452 }
453
454 unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
455 const Instruction *I) { return 1; }
456
457 unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
458 VectorType *VecTy, unsigned Index) {
459 return 1;
460 }
461
462 unsigned getCFInstrCost(unsigned Opcode) { return 1; }
463
464 unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
465 const Instruction *I) {
466 return 1;
467 }
468
469 unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
470 return 1;
471 }
472
473 unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
474 unsigned AddressSpace, const Instruction *I) {
475 return 1;
476 }
477
478 unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
479 unsigned AddressSpace) {
480 return 1;
481 }
482
483 unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
484 bool VariableMask,
485 unsigned Alignment) {
486 return 1;
487 }
488
489 unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
490 unsigned Factor,
491 ArrayRef<unsigned> Indices,
492 unsigned Alignment, unsigned AddressSpace,
493 bool UseMaskForCond = false,
494 bool UseMaskForGaps = false) {
495 return 1;
496 }
497
498 unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
499 ArrayRef<Type *> Tys, FastMathFlags FMF,
500 unsigned ScalarizationCostPassed) {
501 return 1;
502 }
503 unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
504 ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) {
505 return 1;
506 }
507
508 unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) {
509 return 1;
510 }
511
512 unsigned getNumberOfParts(Type *Tp) { return 0; }
513
514 unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *,
515 const SCEV *) {
516 return 0;
517 }
518
519 unsigned getArithmeticReductionCost(unsigned, Type *, bool) { return 1; }
520
521 unsigned getMinMaxReductionCost(Type *, Type *, bool, bool) { return 1; }
522
523 unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { return 0; }
524
525 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) {
526 return false;
527 }
528
529 unsigned getAtomicMemIntrinsicMaxElementSize() const {
530 // Note for overrides: You must ensure for all element unordered-atomic
531 // memory intrinsics that all power-of-2 element sizes up to, and
532 // including, the return value of this method have a corresponding
533 // runtime lib call. These runtime lib call definitions can be found
534 // in RuntimeLibcalls.h
535 return 0;
536 }
537
538 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
539 Type *ExpectedType) {
540 return nullptr;
541 }
542
543 Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
544 unsigned SrcAlign, unsigned DestAlign) const {
545 return Type::getInt8Ty(Context);
546 }
547
548 void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
549 LLVMContext &Context,
550 unsigned RemainingBytes,
551 unsigned SrcAlign,
552 unsigned DestAlign) const {
553 for (unsigned i = 0; i != RemainingBytes; ++i)
554 OpsOut.push_back(Type::getInt8Ty(Context));
555 }
556
557 bool areInlineCompatible(const Function *Caller,
558 const Function *Callee) const {
559 return (Caller->getFnAttribute("target-cpu") ==
560 Callee->getFnAttribute("target-cpu")) &&
561 (Caller->getFnAttribute("target-features") ==
562 Callee->getFnAttribute("target-features"));
563 }
564
565 bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,
566 SmallPtrSetImpl<Argument *> &Args) const {
567 return (Caller->getFnAttribute("target-cpu") ==
568 Callee->getFnAttribute("target-cpu")) &&
569 (Caller->getFnAttribute("target-features") ==
570 Callee->getFnAttribute("target-features"));
571 }
572
573 bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty,
574 const DataLayout &DL) const {
575 return false;
576 }
577
578 bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty,
579 const DataLayout &DL) const {
580 return false;
581 }
582
583 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; }
584
585 bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
586
587 bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
588
589 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
590 unsigned Alignment,
591 unsigned AddrSpace) const {
592 return true;
593 }
594
595 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
596 unsigned Alignment,
597 unsigned AddrSpace) const {
598 return true;
599 }
600
601 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
602 unsigned ChainSizeInBytes,
603 VectorType *VecTy) const {
604 return VF;
605 }
606
607 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
608 unsigned ChainSizeInBytes,
609 VectorType *VecTy) const {
610 return VF;
611 }
612
613 bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
614 TTI::ReductionFlags Flags) const {
615 return false;
616 }
617
618 bool shouldExpandReduction(const IntrinsicInst *II) const {
619 return true;
620 }
621
622 unsigned getGISelRematGlobalCost() const {
623 return 1;
624 }
625
626protected:
627 // Obtain the minimum required size to hold the value (without the sign)
628 // In case of a vector it returns the min required size for one element.
629 unsigned minRequiredElementSize(const Value* Val, bool &isSigned) {
630 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
631 const auto* VectorValue = cast<Constant>(Val);
632
633 // In case of a vector need to pick the max between the min
634 // required size for each element
635 auto *VT = cast<VectorType>(Val->getType());
636
637 // Assume unsigned elements
638 isSigned = false;
639
640 // The max required size is the total vector width divided by num
641 // of elements in the vector
642 unsigned MaxRequiredSize = VT->getBitWidth() / VT->getNumElements();
643
644 unsigned MinRequiredSize = 0;
645 for(unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
646 if (auto* IntElement =
647 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
648 bool signedElement = IntElement->getValue().isNegative();
649 // Get the element min required size.
650 unsigned ElementMinRequiredSize =
651 IntElement->getValue().getMinSignedBits() - 1;
652 // In case one element is signed then all the vector is signed.
653 isSigned |= signedElement;
654 // Save the max required bit size between all the elements.
655 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
656 }
657 else {
658 // not an int constant element
659 return MaxRequiredSize;
660 }
661 }
662 return MinRequiredSize;
663 }
664
665 if (const auto* CI = dyn_cast<ConstantInt>(Val)) {
666 isSigned = CI->getValue().isNegative();
667 return CI->getValue().getMinSignedBits() - 1;
668 }
669
670 if (const auto* Cast = dyn_cast<SExtInst>(Val)) {
671 isSigned = true;
672 return Cast->getSrcTy()->getScalarSizeInBits() - 1;
673 }
674
675 if (const auto* Cast = dyn_cast<ZExtInst>(Val)) {
676 isSigned = false;
677 return Cast->getSrcTy()->getScalarSizeInBits();
678 }
679
680 isSigned = false;
681 return Val->getType()->getScalarSizeInBits();
682 }
683
684 bool isStridedAccess(const SCEV *Ptr) {
685 return Ptr && isa<SCEVAddRecExpr>(Ptr);
686 }
687
688 const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE,
689 const SCEV *Ptr) {
690 if (!isStridedAccess(Ptr))
691 return nullptr;
692 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
693 return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
694 }
695
696 bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr,
697 int64_t MergeDistance) {
698 const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
699 if (!Step)
700 return false;
701 APInt StrideVal = Step->getAPInt();
702 if (StrideVal.getBitWidth() > 64)
703 return false;
704 // FIXME: Need to take absolute value for negative stride case.
705 return StrideVal.getSExtValue() < MergeDistance;
706 }
707};
708
709/// CRTP base class for use as a mix-in that aids implementing
710/// a TargetTransformInfo-compatible class.
711template <typename T>
712class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
713private:
714 typedef TargetTransformInfoImplBase BaseT;
715
716protected:
717 explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {}
718
719public:
720 using BaseT::getCallCost;
721
722 unsigned getCallCost(const Function *F, int NumArgs, const User *U) {
723 assert(F && "A concrete function must be provided to this routine.")((F && "A concrete function must be provided to this routine."
) ? static_cast<void> (0) : __assert_fail ("F && \"A concrete function must be provided to this routine.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 723, __PRETTY_FUNCTION__))
;
724
725 if (NumArgs < 0)
726 // Set the argument number to the number of explicit arguments in the
727 // function.
728 NumArgs = F->arg_size();
729
730 if (Intrinsic::ID IID = F->getIntrinsicID()) {
731 FunctionType *FTy = F->getFunctionType();
732 SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end());
733 return static_cast<T *>(this)
734 ->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys, U);
735 }
736
737 if (!static_cast<T *>(this)->isLoweredToCall(F))
738 return TTI::TCC_Basic; // Give a basic cost if it will be lowered
739 // directly.
740
741 return static_cast<T *>(this)->getCallCost(F->getFunctionType(), NumArgs, U);
742 }
743
744 unsigned getCallCost(const Function *F, ArrayRef<const Value *> Arguments,
745 const User *U) {
746 // Simply delegate to generic handling of the call.
747 // FIXME: We should use instsimplify or something else to catch calls which
748 // will constant fold with these arguments.
749 return static_cast<T *>(this)->getCallCost(F, Arguments.size(), U);
750 }
751
752 using BaseT::getGEPCost;
753
754 int getGEPCost(Type *PointeeType, const Value *Ptr,
755 ArrayRef<const Value *> Operands) {
756 assert(PointeeType && Ptr && "can't get GEPCost of nullptr")((PointeeType && Ptr && "can't get GEPCost of nullptr"
) ? static_cast<void> (0) : __assert_fail ("PointeeType && Ptr && \"can't get GEPCost of nullptr\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 756, __PRETTY_FUNCTION__))
;
757 // TODO: will remove this when pointers have an opaque type.
758 assert(Ptr->getType()->getScalarType()->getPointerElementType() ==((Ptr->getType()->getScalarType()->getPointerElementType
() == PointeeType && "explicit pointee type doesn't match operand's pointee type"
) ? static_cast<void> (0) : __assert_fail ("Ptr->getType()->getScalarType()->getPointerElementType() == PointeeType && \"explicit pointee type doesn't match operand's pointee type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 760, __PRETTY_FUNCTION__))
759 PointeeType &&((Ptr->getType()->getScalarType()->getPointerElementType
() == PointeeType && "explicit pointee type doesn't match operand's pointee type"
) ? static_cast<void> (0) : __assert_fail ("Ptr->getType()->getScalarType()->getPointerElementType() == PointeeType && \"explicit pointee type doesn't match operand's pointee type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 760, __PRETTY_FUNCTION__))
760 "explicit pointee type doesn't match operand's pointee type")((Ptr->getType()->getScalarType()->getPointerElementType
() == PointeeType && "explicit pointee type doesn't match operand's pointee type"
) ? static_cast<void> (0) : __assert_fail ("Ptr->getType()->getScalarType()->getPointerElementType() == PointeeType && \"explicit pointee type doesn't match operand's pointee type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 760, __PRETTY_FUNCTION__))
;
761 auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
762 bool HasBaseReg = (BaseGV == nullptr);
763
764 auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
765 APInt BaseOffset(PtrSizeBits, 0);
766 int64_t Scale = 0;
767
768 auto GTI = gep_type_begin(PointeeType, Operands);
769 Type *TargetType = nullptr;
770
771 // Handle the case where the GEP instruction has a single operand,
772 // the basis, therefore TargetType is a nullptr.
773 if (Operands.empty())
774 return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
775
776 for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
777 TargetType = GTI.getIndexedType();
778 // We assume that the cost of Scalar GEP with constant index and the
779 // cost of Vector GEP with splat constant index are the same.
780 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
781 if (!ConstIdx)
782 if (auto Splat = getSplatValue(*I))
783 ConstIdx = dyn_cast<ConstantInt>(Splat);
784 if (StructType *STy = GTI.getStructTypeOrNull()) {
785 // For structures the index is always splat or scalar constant
786 assert(ConstIdx && "Unexpected GEP index")((ConstIdx && "Unexpected GEP index") ? static_cast<
void> (0) : __assert_fail ("ConstIdx && \"Unexpected GEP index\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 786, __PRETTY_FUNCTION__))
;
787 uint64_t Field = ConstIdx->getZExtValue();
788 BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
789 } else {
790 int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType());
791 if (ConstIdx) {
792 BaseOffset +=
793 ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
794 } else {
795 // Needs scale register.
796 if (Scale != 0)
797 // No addressing mode takes two scale registers.
798 return TTI::TCC_Basic;
799 Scale = ElementSize;
800 }
801 }
802 }
803
804 if (static_cast<T *>(this)->isLegalAddressingMode(
805 TargetType, const_cast<GlobalValue *>(BaseGV),
806 BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale,
807 Ptr->getType()->getPointerAddressSpace()))
808 return TTI::TCC_Free;
809 return TTI::TCC_Basic;
810 }
811
812 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
813 ArrayRef<Type *> ParamTys, const User *U) {
814 switch (IID) {
815 default:
816 // Intrinsics rarely (if ever) have normal argument setup constraints.
817 // Model them as having a basic instruction cost.
818 return TTI::TCC_Basic;
819
820 // TODO: other libc intrinsics.
821 case Intrinsic::memcpy:
822 return static_cast<T *>(this)->getMemcpyCost(dyn_cast<Instruction>(U));
823
824 case Intrinsic::annotation:
825 case Intrinsic::assume:
826 case Intrinsic::sideeffect:
827 case Intrinsic::dbg_declare:
828 case Intrinsic::dbg_value:
829 case Intrinsic::dbg_label:
830 case Intrinsic::invariant_start:
831 case Intrinsic::invariant_end:
832 case Intrinsic::launder_invariant_group:
833 case Intrinsic::strip_invariant_group:
834 case Intrinsic::is_constant:
835 case Intrinsic::lifetime_start:
836 case Intrinsic::lifetime_end:
837 case Intrinsic::objectsize:
838 case Intrinsic::ptr_annotation:
839 case Intrinsic::var_annotation:
840 case Intrinsic::experimental_gc_result:
841 case Intrinsic::experimental_gc_relocate:
842 case Intrinsic::coro_alloc:
843 case Intrinsic::coro_begin:
844 case Intrinsic::coro_free:
845 case Intrinsic::coro_end:
846 case Intrinsic::coro_frame:
847 case Intrinsic::coro_size:
848 case Intrinsic::coro_suspend:
849 case Intrinsic::coro_param:
850 case Intrinsic::coro_subfn_addr:
851 // These intrinsics don't actually represent code after lowering.
852 return TTI::TCC_Free;
853 }
854 }
855
856 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
857 ArrayRef<const Value *> Arguments, const User *U) {
858 // Delegate to the generic intrinsic handling code. This mostly provides an
859 // opportunity for targets to (for example) special case the cost of
860 // certain intrinsics based on constants used as arguments.
861 SmallVector<Type *, 8> ParamTys;
862 ParamTys.reserve(Arguments.size());
863 for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
864 ParamTys.push_back(Arguments[Idx]->getType());
865 return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys, U);
866 }
867
868 unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands) {
869 if (isa<PHINode>(U))
4
Assuming 'U' is not a 'PHINode'
5
Taking false branch
870 return TTI::TCC_Free; // Model all PHI nodes as free.
871
872 if (isa<ExtractValueInst>(U))
6
Assuming 'U' is not a 'ExtractValueInst'
7
Taking false branch
873 return TTI::TCC_Free; // Model all ExtractValue nodes as free.
874
875 if (isa<FreezeInst>(U))
8
Assuming 'U' is not a 'FreezeInst'
9
Taking false branch
876 return TTI::TCC_Free; // Model all Freeze nodes as free.
877
878 // Static alloca doesn't generate target instructions.
879 if (auto *A
10.1
'A' is null
10.1
'A' is null
10.1
'A' is null
10.1
'A' is null
10.1
'A' is null
10.1
'A' is null
10.1
'A' is null
= dyn_cast<AllocaInst>(U))
10
Assuming 'U' is not a 'AllocaInst'
11
Taking false branch
880 if (A->isStaticAlloca())
881 return TTI::TCC_Free;
882
883 if (const GEPOperator *GEP
12.1
'GEP' is null
12.1
'GEP' is null
12.1
'GEP' is null
12.1
'GEP' is null
12.1
'GEP' is null
12.1
'GEP' is null
12.1
'GEP' is null
= dyn_cast<GEPOperator>(U)) {
12
Assuming 'U' is not a 'GEPOperator'
13
Taking false branch
884 return static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(),
885 GEP->getPointerOperand(),
886 Operands.drop_front());
887 }
888
889 if (auto CS = ImmutableCallSite(U)) {
14
Calling 'CallSiteBase::operator bool'
28
Returning from 'CallSiteBase::operator bool'
29
Taking false branch
890 const Function *F = CS.getCalledFunction();
891 if (!F) {
892 // Just use the called value type.
893 Type *FTy = CS.getCalledValue()->getType()->getPointerElementType();
894 return static_cast<T *>(this)
895 ->getCallCost(cast<FunctionType>(FTy), CS.arg_size(), U);
896 }
897
898 SmallVector<const Value *, 8> Arguments(CS.arg_begin(), CS.arg_end());
899 return static_cast<T *>(this)->getCallCost(F, Arguments, U);
900 }
901
902 if (isa<SExtInst>(U) || isa<ZExtInst>(U) || isa<FPExtInst>(U))
30
Assuming 'U' is not a 'SExtInst'
31
Assuming 'U' is not a 'ZExtInst'
32
Assuming 'U' is not a 'FPExtInst'
33
Taking false branch
903 // The old behaviour of generally treating extensions of icmp to be free
904 // has been removed. A target that needs it should override getUserCost().
905 return static_cast<T *>(this)->getExtCost(cast<Instruction>(U),
906 Operands.back());
907
908 return static_cast<T *>(this)->getOperationCost(
44
Calling 'BasicTTIImplBase::getOperationCost'
909 Operator::getOpcode(U), U->getType(),
34
Calling 'Operator::getOpcode'
40
Returning from 'Operator::getOpcode'
910 U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr);
41
Assuming the condition is false
42
'?' condition is false
43
Passing null pointer value via 3rd parameter 'OpTy'
911 }
912
913 int getInstructionLatency(const Instruction *I) {
914 SmallVector<const Value *, 4> Operands(I->value_op_begin(),
915 I->value_op_end());
916 if (getUserCost(I, Operands) == TTI::TCC_Free)
917 return 0;
918
919 if (isa<LoadInst>(I))
920 return 4;
921
922 Type *DstTy = I->getType();
923
924 // Usually an intrinsic is a simple instruction.
925 // A real function call is much slower.
926 if (auto *CI = dyn_cast<CallInst>(I)) {
927 const Function *F = CI->getCalledFunction();
928 if (!F || static_cast<T *>(this)->isLoweredToCall(F))
929 return 40;
930 // Some intrinsics return a value and a flag, we use the value type
931 // to decide its latency.
932 if (StructType* StructTy = dyn_cast<StructType>(DstTy))
933 DstTy = StructTy->getElementType(0);
934 // Fall through to simple instructions.
935 }
936
937 if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy))
938 DstTy = VectorTy->getElementType();
939 if (DstTy->isFloatingPointTy())
940 return 3;
941
942 return 1;
943 }
944};
945}
946
947#endif

/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h

1//===- CallSite.h - Abstract Call & Invoke instrs ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the CallSite class, which is a handy wrapper for code that
10// wants to treat Call, Invoke and CallBr instructions in a generic way. When
11// in non-mutation context (e.g. an analysis) ImmutableCallSite should be used.
12// Finally, when some degree of customization is necessary between these two
13// extremes, CallSiteBase<> can be supplied with fine-tuned parameters.
14//
15// NOTE: These classes are supposed to have "value semantics". So they should be
16// passed by value, not by reference; they should not be "new"ed or "delete"d.
17// They are efficiently copyable, assignable and constructable, with cost
18// equivalent to copying a pointer (notice that they have only a single data
19// member). The internal representation carries a flag which indicates which of
20// the three variants is enclosed. This allows for cheaper checks when various
21// accessors of CallSite are employed.
22//
23//===----------------------------------------------------------------------===//
24
25#ifndef LLVM_IR_CALLSITE_H
26#define LLVM_IR_CALLSITE_H
27
28#include "llvm/ADT/Optional.h"
29#include "llvm/ADT/PointerIntPair.h"
30#include "llvm/ADT/iterator_range.h"
31#include "llvm/IR/Attributes.h"
32#include "llvm/IR/CallingConv.h"
33#include "llvm/IR/Function.h"
34#include "llvm/IR/InstrTypes.h"
35#include "llvm/IR/Instruction.h"
36#include "llvm/IR/Instructions.h"
37#include "llvm/IR/Use.h"
38#include "llvm/IR/User.h"
39#include "llvm/IR/Value.h"
40#include "llvm/Support/Casting.h"
41#include <cassert>
42#include <cstdint>
43#include <iterator>
44
45namespace llvm {
46
47namespace Intrinsic {
48typedef unsigned ID;
49}
50
51template <typename FunTy = const Function, typename BBTy = const BasicBlock,
52 typename ValTy = const Value, typename UserTy = const User,
53 typename UseTy = const Use, typename InstrTy = const Instruction,
54 typename CallTy = const CallInst,
55 typename InvokeTy = const InvokeInst,
56 typename CallBrTy = const CallBrInst,
57 typename IterTy = User::const_op_iterator>
58class CallSiteBase {
59protected:
60 PointerIntPair<InstrTy *, 2, int> I;
61
62 CallSiteBase() = default;
63 CallSiteBase(CallTy *CI) : I(CI, 1) { assert(CI)((CI) ? static_cast<void> (0) : __assert_fail ("CI", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h"
, 63, __PRETTY_FUNCTION__))
; }
64 CallSiteBase(InvokeTy *II) : I(II, 0) { assert(II)((II) ? static_cast<void> (0) : __assert_fail ("II", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h"
, 64, __PRETTY_FUNCTION__))
; }
65 CallSiteBase(CallBrTy *CBI) : I(CBI, 2) { assert(CBI)((CBI) ? static_cast<void> (0) : __assert_fail ("CBI", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h"
, 65, __PRETTY_FUNCTION__))
; }
66 explicit CallSiteBase(ValTy *II) { *this = get(II); }
67
68private:
69 /// This static method is like a constructor. It will create an appropriate
70 /// call site for a Call, Invoke or CallBr instruction, but it can also create
71 /// a null initialized CallSiteBase object for something which is NOT a call
72 /// site.
73 static CallSiteBase get(ValTy *V) {
74 if (InstrTy *II = dyn_cast<InstrTy>(V)) {
75 if (II->getOpcode() == Instruction::Call)
76 return CallSiteBase(static_cast<CallTy*>(II));
77 if (II->getOpcode() == Instruction::Invoke)
78 return CallSiteBase(static_cast<InvokeTy*>(II));
79 if (II->getOpcode() == Instruction::CallBr)
80 return CallSiteBase(static_cast<CallBrTy *>(II));
81 }
82 return CallSiteBase();
83 }
84
85public:
86 /// Return true if a CallInst is enclosed.
87 bool isCall() const { return I.getInt() == 1; }
88
89 /// Return true if a InvokeInst is enclosed. !I.getInt() may also signify a
90 /// NULL instruction pointer, so check that.
91 bool isInvoke() const { return getInstruction() && I.getInt() == 0; }
92
93 /// Return true if a CallBrInst is enclosed.
94 bool isCallBr() const { return I.getInt() == 2; }
95
96 InstrTy *getInstruction() const { return I.getPointer(); }
97 InstrTy *operator->() const { return I.getPointer(); }
98 explicit operator bool() const { return I.getPointer(); }
15
Calling 'PointerIntPair::getPointer'
26
Returning from 'PointerIntPair::getPointer'
27
Returning zero, which participates in a condition later
99
100 /// Get the basic block containing the call site.
101 BBTy* getParent() const { return getInstruction()->getParent(); }
102
103 /// Return the pointer to function that is being called.
104 ValTy *getCalledValue() const {
105 assert(getInstruction() && "Not a call, invoke or callbr instruction!")((getInstruction() && "Not a call, invoke or callbr instruction!"
) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call, invoke or callbr instruction!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h"
, 105, __PRETTY_FUNCTION__))
;
106 return *getCallee();
107 }
108
109 /// Return the function being called if this is a direct call, otherwise
110 /// return null (if it's an indirect call).
111 FunTy *getCalledFunction() const {
112 return dyn_cast<FunTy>(getCalledValue());
113 }
114
115 /// Return true if the callsite is an indirect call.
116 bool isIndirectCall() const {
117 const Value *V = getCalledValue();
118 if (!V)
119 return false;
120 if (isa<FunTy>(V) || isa<Constant>(V))
121 return false;
122 if (const CallBase *CB = dyn_cast<CallBase>(getInstruction()))
123 if (CB->isInlineAsm())
124 return false;
125 return true;
126 }
127
128 /// Set the callee to the specified value. Unlike the function of the same
129 /// name on CallBase, does not modify the type!
130 void setCalledFunction(Value *V) {
131 assert(getInstruction() && "Not a call, callbr, or invoke instruction!")((getInstruction() && "Not a call, callbr, or invoke instruction!"
) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call, callbr, or invoke instruction!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h"
, 131, __PRETTY_FUNCTION__))
;
132 assert(cast<PointerType>(V->getType())->getElementType() ==((cast<PointerType>(V->getType())->getElementType
() == cast<CallBase>(getInstruction())->getFunctionType
() && "New callee type does not match FunctionType on call"
) ? static_cast<void> (0) : __assert_fail ("cast<PointerType>(V->getType())->getElementType() == cast<CallBase>(getInstruction())->getFunctionType() && \"New callee type does not match FunctionType on call\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h"
, 134, __PRETTY_FUNCTION__))
133 cast<CallBase>(getInstruction())->getFunctionType() &&((cast<PointerType>(V->getType())->getElementType
() == cast<CallBase>(getInstruction())->getFunctionType
() && "New callee type does not match FunctionType on call"
) ? static_cast<void> (0) : __assert_fail ("cast<PointerType>(V->getType())->getElementType() == cast<CallBase>(getInstruction())->getFunctionType() && \"New callee type does not match FunctionType on call\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h"
, 134, __PRETTY_FUNCTION__))
134 "New callee type does not match FunctionType on call")((cast<PointerType>(V->getType())->getElementType
() == cast<CallBase>(getInstruction())->getFunctionType
() && "New callee type does not match FunctionType on call"
) ? static_cast<void> (0) : __assert_fail ("cast<PointerType>(V->getType())->getElementType() == cast<CallBase>(getInstruction())->getFunctionType() && \"New callee type does not match FunctionType on call\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h"
, 134, __PRETTY_FUNCTION__))
;
135 *getCallee() = V;
136 }
137
138 /// Return the intrinsic ID of the intrinsic called by this CallSite,
139 /// or Intrinsic::not_intrinsic if the called function is not an
140 /// intrinsic, or if this CallSite is an indirect call.
141 Intrinsic::ID getIntrinsicID() const {
142 if (auto *F = getCalledFunction())
143 return F->getIntrinsicID();
144 // Don't use Intrinsic::not_intrinsic, as it will require pulling
145 // Intrinsics.h into every header that uses CallSite.
146 return static_cast<Intrinsic::ID>(0);
147 }
148
149 /// Determine whether the passed iterator points to the callee operand's Use.
150 bool isCallee(Value::const_user_iterator UI) const {
151 return isCallee(&UI.getUse());
152 }
153
154 /// Determine whether this Use is the callee operand's Use.
155 bool isCallee(const Use *U) const { return getCallee() == U; }
156
157 /// Determine whether the passed iterator points to an argument operand.
158 bool isArgOperand(Value::const_user_iterator UI) const {
159 return isArgOperand(&UI.getUse());
160 }
161
162 /// Determine whether the passed use points to an argument operand.
163 bool isArgOperand(const Use *U) const {
164 assert(getInstruction() == U->getUser())((getInstruction() == U->getUser()) ? static_cast<void>
(0) : __assert_fail ("getInstruction() == U->getUser()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h"
, 164, __PRETTY_FUNCTION__))
;
165 return arg_begin() <= U && U < arg_end();
166 }
167
168 /// Determine whether the passed iterator points to a bundle operand.
169 bool isBundleOperand(Value::const_user_iterator UI) const {
170 return isBundleOperand(&UI.getUse());
171 }
172
173 /// Determine whether the passed use points to a bundle operand.
174 bool isBundleOperand(const Use *U) const {
175 assert(getInstruction() == U->getUser())((getInstruction() == U->getUser()) ? static_cast<void>
(0) : __assert_fail ("getInstruction() == U->getUser()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h"
, 175, __PRETTY_FUNCTION__))
;
176 if (!hasOperandBundles())
177 return false;
178 unsigned OperandNo = U - (*this)->op_begin();
179 return getBundleOperandsStartIndex() <= OperandNo &&
180 OperandNo < getBundleOperandsEndIndex();
181 }
182
183 /// Determine whether the passed iterator points to a data operand.
184 bool isDataOperand(Value::const_user_iterator UI) const {
185 return isDataOperand(&UI.getUse());
186 }
187
188 /// Determine whether the passed use points to a data operand.
189 bool isDataOperand(const Use *U) const {
190 return data_operands_begin() <= U && U < data_operands_end();
191 }
192
193 ValTy *getArgument(unsigned ArgNo) const {
194 assert(arg_begin() + ArgNo < arg_end() && "Argument # out of range!")((arg_begin() + ArgNo < arg_end() && "Argument # out of range!"
) ? static_cast<void> (0) : __assert_fail ("arg_begin() + ArgNo < arg_end() && \"Argument # out of range!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h"
, 194, __PRETTY_FUNCTION__))
;
195 return *(arg_begin() + ArgNo);
196 }
197
198 void setArgument(unsigned ArgNo, Value* newVal) {
199 assert(getInstruction() && "Not a call, invoke or callbr instruction!")((getInstruction() && "Not a call, invoke or callbr instruction!"
) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call, invoke or callbr instruction!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h"
, 199, __PRETTY_FUNCTION__))
;
200 assert(arg_begin() + ArgNo < arg_end() && "Argument # out of range!")((arg_begin() + ArgNo < arg_end() && "Argument # out of range!"
) ? static_cast<void> (0) : __assert_fail ("arg_begin() + ArgNo < arg_end() && \"Argument # out of range!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h"
, 200, __PRETTY_FUNCTION__))
;
201 getInstruction()->setOperand(ArgNo, newVal);
202 }
203
204 /// Given a value use iterator, returns the argument that corresponds to it.
205 /// Iterator must actually correspond to an argument.
206 unsigned getArgumentNo(Value::const_user_iterator I) const {
207 return getArgumentNo(&I.getUse());
208 }
209
210 /// Given a use for an argument, get the argument number that corresponds to
211 /// it.
212 unsigned getArgumentNo(const Use *U) const {
213 assert(getInstruction() && "Not a call, invoke or callbr instruction!")((getInstruction() && "Not a call, invoke or callbr instruction!"
) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call, invoke or callbr instruction!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h"
, 213, __PRETTY_FUNCTION__))
;
214 assert(isArgOperand(U) && "Argument # out of range!")((isArgOperand(U) && "Argument # out of range!") ? static_cast
<void> (0) : __assert_fail ("isArgOperand(U) && \"Argument # out of range!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h"
, 214, __PRETTY_FUNCTION__))
;
215 return U - arg_begin();
216 }
217
218 /// The type of iterator to use when looping over actual arguments at this
219 /// call site.
220 using arg_iterator = IterTy;
221
222 iterator_range<IterTy> args() const {
223 return make_range(arg_begin(), arg_end());
224 }
225 bool arg_empty() const { return arg_end() == arg_begin(); }
226 unsigned arg_size() const { return unsigned(arg_end() - arg_begin()); }
227
228 /// Given a value use iterator, return the data operand corresponding to it.
229 /// Iterator must actually correspond to a data operand.
230 unsigned getDataOperandNo(Value::const_user_iterator UI) const {
231 return getDataOperandNo(&UI.getUse());
232 }
233
234 /// Given a use for a data operand, get the data operand number that
235 /// corresponds to it.
236 unsigned getDataOperandNo(const Use *U) const {
237 assert(getInstruction() && "Not a call, invoke or callbr instruction!")((getInstruction() && "Not a call, invoke or callbr instruction!"
) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call, invoke or callbr instruction!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h"
, 237, __PRETTY_FUNCTION__))
;
238 assert(isDataOperand(U) && "Data operand # out of range!")((isDataOperand(U) && "Data operand # out of range!")
? static_cast<void> (0) : __assert_fail ("isDataOperand(U) && \"Data operand # out of range!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h"
, 238, __PRETTY_FUNCTION__))
;
239 return U - data_operands_begin();
240 }
241
242 /// Type of iterator to use when looping over data operands at this call site
243 /// (see below).
244 using data_operand_iterator = IterTy;
245
246 /// data_operands_begin/data_operands_end - Return iterators iterating over
247 /// the call / invoke / callbr argument list and bundle operands. For invokes,
248 /// this is the set of instruction operands except the invoke target and the
249 /// two successor blocks; for calls this is the set of instruction operands
250 /// except the call target; for callbrs the number of labels to skip must be
251 /// determined first.
252
253 IterTy data_operands_begin() const {
254 assert(getInstruction() && "Not a call or invoke instruction!")((getInstruction() && "Not a call or invoke instruction!"
) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call or invoke instruction!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h"
, 254, __PRETTY_FUNCTION__))
;
255 return cast<CallBase>(getInstruction())->data_operands_begin();
256 }
257 IterTy data_operands_end() const {
258 assert(getInstruction() && "Not a call or invoke instruction!")((getInstruction() && "Not a call or invoke instruction!"
) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call or invoke instruction!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h"
, 258, __PRETTY_FUNCTION__))
;
259 return cast<CallBase>(getInstruction())->data_operands_end();
260 }
261 iterator_range<IterTy> data_ops() const {
262 return make_range(data_operands_begin(), data_operands_end());
263 }
264 bool data_operands_empty() const {
265 return data_operands_end() == data_operands_begin();
266 }
267 unsigned data_operands_size() const {
268 return std::distance(data_operands_begin(), data_operands_end());
269 }
270
271 /// Return the type of the instruction that generated this call site.
272 Type *getType() const { return (*this)->getType(); }
273
274 /// Return the caller function for this call site.
275 FunTy *getCaller() const { return (*this)->getParent()->getParent(); }
276
277 /// Tests if this call site must be tail call optimized. Only a CallInst can
278 /// be tail call optimized.
279 bool isMustTailCall() const {
280 return isCall() && cast<CallInst>(getInstruction())->isMustTailCall();
281 }
282
283 /// Tests if this call site is marked as a tail call.
284 bool isTailCall() const {
285 return isCall() && cast<CallInst>(getInstruction())->isTailCall();
286 }
287
288#define CALLSITE_DELEGATE_GETTER(METHOD) \
289 InstrTy *II = getInstruction(); \
290 return isCall() ? cast<CallInst>(II)->METHOD \
291 : isCallBr() ? cast<CallBrInst>(II)->METHOD \
292 : cast<InvokeInst>(II)->METHOD
293
294#define CALLSITE_DELEGATE_SETTER(METHOD) \
295 InstrTy *II = getInstruction(); \
296 if (isCall()) \
297 cast<CallInst>(II)->METHOD; \
298 else if (isCallBr()) \
299 cast<CallBrInst>(II)->METHOD; \
300 else \
301 cast<InvokeInst>(II)->METHOD
302
303 unsigned getNumArgOperands() const {
304 CALLSITE_DELEGATE_GETTER(getNumArgOperands());
305 }
306
307 ValTy *getArgOperand(unsigned i) const {
308 CALLSITE_DELEGATE_GETTER(getArgOperand(i));
309 }
310
311 ValTy *getReturnedArgOperand() const {
312 CALLSITE_DELEGATE_GETTER(getReturnedArgOperand());
313 }
314
315 bool isInlineAsm() const {
316 return cast<CallBase>(getInstruction())->isInlineAsm();
317 }
318
319 /// Get the calling convention of the call.
320 CallingConv::ID getCallingConv() const {
321 CALLSITE_DELEGATE_GETTER(getCallingConv());
322 }
323 /// Set the calling convention of the call.
324 void setCallingConv(CallingConv::ID CC) {
325 CALLSITE_DELEGATE_SETTER(setCallingConv(CC));
326 }
327
328 FunctionType *getFunctionType() const {
329 CALLSITE_DELEGATE_GETTER(getFunctionType());
330 }
331
332 void mutateFunctionType(FunctionType *Ty) const {
333 CALLSITE_DELEGATE_SETTER(mutateFunctionType(Ty));
334 }
335
336 /// Get the parameter attributes of the call.
337 AttributeList getAttributes() const {
338 CALLSITE_DELEGATE_GETTER(getAttributes());
339 }
340 /// Set the parameter attributes of the call.
341 void setAttributes(AttributeList PAL) {
342 CALLSITE_DELEGATE_SETTER(setAttributes(PAL));
343 }
344
345 void addAttribute(unsigned i, Attribute::AttrKind Kind) {
346 CALLSITE_DELEGATE_SETTER(addAttribute(i, Kind));
347 }
348
349 void addAttribute(unsigned i, Attribute Attr) {
350 CALLSITE_DELEGATE_SETTER(addAttribute(i, Attr));
351 }
352
353 void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
354 CALLSITE_DELEGATE_SETTER(addParamAttr(ArgNo, Kind));
355 }
356
357 void removeAttribute(unsigned i, Attribute::AttrKind Kind) {
358 CALLSITE_DELEGATE_SETTER(removeAttribute(i, Kind));
359 }
360
361 void removeAttribute(unsigned i, StringRef Kind) {
362 CALLSITE_DELEGATE_SETTER(removeAttribute(i, Kind));
363 }
364
365 void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
366 CALLSITE_DELEGATE_SETTER(removeParamAttr(ArgNo, Kind));
367 }
368
369 /// Return true if this function has the given attribute.
370 bool hasFnAttr(Attribute::AttrKind Kind) const {
371 CALLSITE_DELEGATE_GETTER(hasFnAttr(Kind));
372 }
373
374 /// Return true if this function has the given attribute.
375 bool hasFnAttr(StringRef Kind) const {
376 CALLSITE_DELEGATE_GETTER(hasFnAttr(Kind));
377 }
378
379 /// Return true if this return value has the given attribute.
380 bool hasRetAttr(Attribute::AttrKind Kind) const {
381 CALLSITE_DELEGATE_GETTER(hasRetAttr(Kind));
382 }
383
384 /// Return true if the call or the callee has the given attribute.
385 bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const {
386 CALLSITE_DELEGATE_GETTER(paramHasAttr(ArgNo, Kind));
387 }
388
389 Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const {
390 CALLSITE_DELEGATE_GETTER(getAttribute(i, Kind));
391 }
392
393 Attribute getAttribute(unsigned i, StringRef Kind) const {
394 CALLSITE_DELEGATE_GETTER(getAttribute(i, Kind));
395 }
396
397 /// Return true if the data operand at index \p i directly or indirectly has
398 /// the attribute \p A.
399 ///
400 /// Normal call, invoke or callbr arguments have per operand attributes, as
401 /// specified in the attribute set attached to this instruction, while operand
402 /// bundle operands may have some attributes implied by the type of its
403 /// containing operand bundle.
404 bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind Kind) const {
405 CALLSITE_DELEGATE_GETTER(dataOperandHasImpliedAttr(i, Kind));
406 }
407
408 /// Extract the alignment of the return value.
409 unsigned getRetAlignment() const {
410 CALLSITE_DELEGATE_GETTER(getRetAlignment());
411 }
412
413 /// Extract the alignment for a call or parameter (0=unknown).
414 unsigned getParamAlignment(unsigned ArgNo) const {
415 CALLSITE_DELEGATE_GETTER(getParamAlignment(ArgNo));
416 }
417
418 /// Extract the byval type for a call or parameter (nullptr=unknown).
419 Type *getParamByValType(unsigned ArgNo) const {
420 CALLSITE_DELEGATE_GETTER(getParamByValType(ArgNo));
421 }
422
423 /// Extract the number of dereferenceable bytes for a call or parameter
424 /// (0=unknown).
425 uint64_t getDereferenceableBytes(unsigned i) const {
426 CALLSITE_DELEGATE_GETTER(getDereferenceableBytes(i));
427 }
428
429 /// Extract the number of dereferenceable_or_null bytes for a call or
430 /// parameter (0=unknown).
431 uint64_t getDereferenceableOrNullBytes(unsigned i) const {
432 CALLSITE_DELEGATE_GETTER(getDereferenceableOrNullBytes(i));
433 }
434
435 /// Determine if the return value is marked with NoAlias attribute.
436 bool returnDoesNotAlias() const {
437 CALLSITE_DELEGATE_GETTER(returnDoesNotAlias());
438 }
439
440 /// Return true if the call should not be treated as a call to a builtin.
441 bool isNoBuiltin() const {
442 CALLSITE_DELEGATE_GETTER(isNoBuiltin());
443 }
444
445 /// Return true if the call requires strict floating point semantics.
446 bool isStrictFP() const {
447 CALLSITE_DELEGATE_GETTER(isStrictFP());
448 }
449
450 /// Return true if the call should not be inlined.
451 bool isNoInline() const {
452 CALLSITE_DELEGATE_GETTER(isNoInline());
453 }
454 void setIsNoInline(bool Value = true) {
455 CALLSITE_DELEGATE_SETTER(setIsNoInline(Value));
456 }
457
458 /// Determine if the call does not access memory.
459 bool doesNotAccessMemory() const {
460 CALLSITE_DELEGATE_GETTER(doesNotAccessMemory());
461 }
462 void setDoesNotAccessMemory() {
463 CALLSITE_DELEGATE_SETTER(setDoesNotAccessMemory());
464 }
465
466 /// Determine if the call does not access or only reads memory.
467 bool onlyReadsMemory() const {
468 CALLSITE_DELEGATE_GETTER(onlyReadsMemory());
469 }
470 void setOnlyReadsMemory() {
471 CALLSITE_DELEGATE_SETTER(setOnlyReadsMemory());
472 }
473
474 /// Determine if the call does not access or only writes memory.
475 bool doesNotReadMemory() const {
476 CALLSITE_DELEGATE_GETTER(doesNotReadMemory());
477 }
478 void setDoesNotReadMemory() {
479 CALLSITE_DELEGATE_SETTER(setDoesNotReadMemory());
480 }
481
482 /// Determine if the call can access memmory only using pointers based
483 /// on its arguments.
484 bool onlyAccessesArgMemory() const {
485 CALLSITE_DELEGATE_GETTER(onlyAccessesArgMemory());
486 }
487 void setOnlyAccessesArgMemory() {
488 CALLSITE_DELEGATE_SETTER(setOnlyAccessesArgMemory());
489 }
490
491 /// Determine if the function may only access memory that is
492 /// inaccessible from the IR.
493 bool onlyAccessesInaccessibleMemory() const {
494 CALLSITE_DELEGATE_GETTER(onlyAccessesInaccessibleMemory());
495 }
496 void setOnlyAccessesInaccessibleMemory() {
497 CALLSITE_DELEGATE_SETTER(setOnlyAccessesInaccessibleMemory());
498 }
499
500 /// Determine if the function may only access memory that is
501 /// either inaccessible from the IR or pointed to by its arguments.
502 bool onlyAccessesInaccessibleMemOrArgMem() const {
503 CALLSITE_DELEGATE_GETTER(onlyAccessesInaccessibleMemOrArgMem());
504 }
505 void setOnlyAccessesInaccessibleMemOrArgMem() {
506 CALLSITE_DELEGATE_SETTER(setOnlyAccessesInaccessibleMemOrArgMem());
507 }
508
509 /// Determine if the call cannot return.
510 bool doesNotReturn() const {
511 CALLSITE_DELEGATE_GETTER(doesNotReturn());
512 }
513 void setDoesNotReturn() {
514 CALLSITE_DELEGATE_SETTER(setDoesNotReturn());
515 }
516
517 /// Determine if the call cannot unwind.
518 bool doesNotThrow() const {
519 CALLSITE_DELEGATE_GETTER(doesNotThrow());
520 }
521 void setDoesNotThrow() {
522 CALLSITE_DELEGATE_SETTER(setDoesNotThrow());
523 }
524
525 /// Determine if the call can be duplicated.
526 bool cannotDuplicate() const {
527 CALLSITE_DELEGATE_GETTER(cannotDuplicate());
528 }
529 void setCannotDuplicate() {
530 CALLSITE_DELEGATE_SETTER(setCannotDuplicate());
531 }
532
533 /// Determine if the call is convergent.
534 bool isConvergent() const {
535 CALLSITE_DELEGATE_GETTER(isConvergent());
536 }
537 void setConvergent() {
538 CALLSITE_DELEGATE_SETTER(setConvergent());
539 }
540 void setNotConvergent() {
541 CALLSITE_DELEGATE_SETTER(setNotConvergent());
542 }
543
544 unsigned getNumOperandBundles() const {
545 CALLSITE_DELEGATE_GETTER(getNumOperandBundles());
546 }
547
548 bool hasOperandBundles() const {
549 CALLSITE_DELEGATE_GETTER(hasOperandBundles());
550 }
551
552 unsigned getBundleOperandsStartIndex() const {
553 CALLSITE_DELEGATE_GETTER(getBundleOperandsStartIndex());
554 }
555
556 unsigned getBundleOperandsEndIndex() const {
557 CALLSITE_DELEGATE_GETTER(getBundleOperandsEndIndex());
558 }
559
560 unsigned getNumTotalBundleOperands() const {
561 CALLSITE_DELEGATE_GETTER(getNumTotalBundleOperands());
562 }
563
564 OperandBundleUse getOperandBundleAt(unsigned Index) const {
565 CALLSITE_DELEGATE_GETTER(getOperandBundleAt(Index));
566 }
567
568 Optional<OperandBundleUse> getOperandBundle(StringRef Name) const {
569 CALLSITE_DELEGATE_GETTER(getOperandBundle(Name));
570 }
571
572 Optional<OperandBundleUse> getOperandBundle(uint32_t ID) const {
573 CALLSITE_DELEGATE_GETTER(getOperandBundle(ID));
574 }
575
576 unsigned countOperandBundlesOfType(uint32_t ID) const {
577 CALLSITE_DELEGATE_GETTER(countOperandBundlesOfType(ID));
578 }
579
580 bool isBundleOperand(unsigned Idx) const {
581 CALLSITE_DELEGATE_GETTER(isBundleOperand(Idx));
582 }
583
584 IterTy arg_begin() const {
585 CALLSITE_DELEGATE_GETTER(arg_begin());
586 }
587
588 IterTy arg_end() const {
589 CALLSITE_DELEGATE_GETTER(arg_end());
590 }
591
592#undef CALLSITE_DELEGATE_GETTER
593#undef CALLSITE_DELEGATE_SETTER
594
595 void getOperandBundlesAsDefs(SmallVectorImpl<OperandBundleDef> &Defs) const {
596 // Since this is actually a getter that "looks like" a setter, don't use the
597 // above macros to avoid confusion.
598 cast<CallBase>(getInstruction())->getOperandBundlesAsDefs(Defs);
599 }
600
601 /// Determine whether this data operand is not captured.
602 bool doesNotCapture(unsigned OpNo) const {
603 return dataOperandHasImpliedAttr(OpNo + 1, Attribute::NoCapture);
604 }
605
606 /// Determine whether this argument is passed by value.
607 bool isByValArgument(unsigned ArgNo) const {
608 return paramHasAttr(ArgNo, Attribute::ByVal);
609 }
610
611 /// Determine whether this argument is passed in an alloca.
612 bool isInAllocaArgument(unsigned ArgNo) const {
613 return paramHasAttr(ArgNo, Attribute::InAlloca);
614 }
615
616 /// Determine whether this argument is passed by value or in an alloca.
617 bool isByValOrInAllocaArgument(unsigned ArgNo) const {
618 return paramHasAttr(ArgNo, Attribute::ByVal) ||
619 paramHasAttr(ArgNo, Attribute::InAlloca);
620 }
621
622 /// Determine if there are is an inalloca argument. Only the last argument can
623 /// have the inalloca attribute.
624 bool hasInAllocaArgument() const {
625 return !arg_empty() && paramHasAttr(arg_size() - 1, Attribute::InAlloca);
626 }
627
628 bool doesNotAccessMemory(unsigned OpNo) const {
629 return dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone);
630 }
631
632 bool onlyReadsMemory(unsigned OpNo) const {
633 return dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadOnly) ||
634 dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone);
635 }
636
637 bool doesNotReadMemory(unsigned OpNo) const {
638 return dataOperandHasImpliedAttr(OpNo + 1, Attribute::WriteOnly) ||
639 dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone);
640 }
641
642 /// Return true if the return value is known to be not null.
643 /// This may be because it has the nonnull attribute, or because at least
644 /// one byte is dereferenceable and the pointer is in addrspace(0).
645 bool isReturnNonNull() const {
646 if (hasRetAttr(Attribute::NonNull))
647 return true;
648 else if (getDereferenceableBytes(AttributeList::ReturnIndex) > 0 &&
649 !NullPointerIsDefined(getCaller(),
650 getType()->getPointerAddressSpace()))
651 return true;
652
653 return false;
654 }
655
656 /// Returns true if this CallSite passes the given Value* as an argument to
657 /// the called function.
658 bool hasArgument(const Value *Arg) const {
659 for (arg_iterator AI = this->arg_begin(), E = this->arg_end(); AI != E;
660 ++AI)
661 if (AI->get() == Arg)
662 return true;
663 return false;
664 }
665
666private:
667 IterTy getCallee() const {
668 return cast<CallBase>(getInstruction())->op_end() - 1;
669 }
670};
671
672class CallSite : public CallSiteBase<Function, BasicBlock, Value, User, Use,
673 Instruction, CallInst, InvokeInst,
674 CallBrInst, User::op_iterator> {
675public:
676 CallSite() = default;
677 CallSite(CallSiteBase B) : CallSiteBase(B) {}
678 CallSite(CallInst *CI) : CallSiteBase(CI) {}
679 CallSite(InvokeInst *II) : CallSiteBase(II) {}
680 CallSite(CallBrInst *CBI) : CallSiteBase(CBI) {}
681 explicit CallSite(Instruction *II) : CallSiteBase(II) {}
682 explicit CallSite(Value *V) : CallSiteBase(V) {}
683
684 bool operator==(const CallSite &CS) const { return I == CS.I; }
685 bool operator!=(const CallSite &CS) const { return I != CS.I; }
686 bool operator<(const CallSite &CS) const {
687 return getInstruction() < CS.getInstruction();
688 }
689
690private:
691 friend struct DenseMapInfo<CallSite>;
692
693 User::op_iterator getCallee() const;
694};
695
696/// Establish a view to a call site for examination.
697class ImmutableCallSite : public CallSiteBase<> {
698public:
699 ImmutableCallSite() = default;
700 ImmutableCallSite(const CallInst *CI) : CallSiteBase(CI) {}
701 ImmutableCallSite(const InvokeInst *II) : CallSiteBase(II) {}
702 ImmutableCallSite(const CallBrInst *CBI) : CallSiteBase(CBI) {}
703 explicit ImmutableCallSite(const Instruction *II) : CallSiteBase(II) {}
704 explicit ImmutableCallSite(const Value *V) : CallSiteBase(V) {}
705 ImmutableCallSite(CallSite CS) : CallSiteBase(CS.getInstruction()) {}
706};
707
708/// AbstractCallSite
709///
710/// An abstract call site is a wrapper that allows to treat direct,
711/// indirect, and callback calls the same. If an abstract call site
712/// represents a direct or indirect call site it behaves like a stripped
713/// down version of a normal call site object. The abstract call site can
714/// also represent a callback call, thus the fact that the initially
715/// called function (=broker) may invoke a third one (=callback callee).
716/// In this case, the abstract call site hides the middle man, hence the
717/// broker function. The result is a representation of the callback call,
718/// inside the broker, but in the context of the original call to the broker.
719///
720/// There are up to three functions involved when we talk about callback call
721/// sites. The caller (1), which invokes the broker function. The broker
722/// function (2), that will invoke the callee zero or more times. And finally
723/// the callee (3), which is the target of the callback call.
724///
725/// The abstract call site will handle the mapping from parameters to arguments
726/// depending on the semantic of the broker function. However, it is important
727/// to note that the mapping is often partial. Thus, some arguments of the
728/// call/invoke instruction are mapped to parameters of the callee while others
729/// are not.
730class AbstractCallSite {
731public:
732
733 /// The encoding of a callback with regards to the underlying instruction.
734 struct CallbackInfo {
735
736 /// For direct/indirect calls the parameter encoding is empty. If it is not,
737 /// the abstract call site represents a callback. In that case, the first
738 /// element of the encoding vector represents which argument of the call
739 /// site CS is the callback callee. The remaining elements map parameters
740 /// (identified by their position) to the arguments that will be passed
741 /// through (also identified by position but in the call site instruction).
742 ///
743 /// NOTE that we use LLVM argument numbers (starting at 0) and not
744 /// clang/source argument numbers (starting at 1). The -1 entries represent
745 /// unknown values that are passed to the callee.
746 using ParameterEncodingTy = SmallVector<int, 0>;
747 ParameterEncodingTy ParameterEncoding;
748
749 };
750
751private:
752
753 /// The underlying call site:
754 /// caller -> callee, if this is a direct or indirect call site
755 /// caller -> broker function, if this is a callback call site
756 CallSite CS;
757
758 /// The encoding of a callback with regards to the underlying instruction.
759 CallbackInfo CI;
760
761public:
762 /// Sole constructor for abstract call sites (ACS).
763 ///
764 /// An abstract call site can only be constructed through a llvm::Use because
765 /// each operand (=use) of an instruction could potentially be a different
766 /// abstract call site. Furthermore, even if the value of the llvm::Use is the
767 /// same, and the user is as well, the abstract call sites might not be.
768 ///
769 /// If a use is not associated with an abstract call site the constructed ACS
770 /// will evaluate to false if converted to a boolean.
771 ///
772 /// If the use is the callee use of a call or invoke instruction, the
773 /// constructed abstract call site will behave as a llvm::CallSite would.
774 ///
775 /// If the use is not a callee use of a call or invoke instruction, the
776 /// callback metadata is used to determine the argument <-> parameter mapping
777 /// as well as the callee of the abstract call site.
778 AbstractCallSite(const Use *U);
779
780 /// Add operand uses of \p ICS that represent callback uses into \p CBUses.
781 ///
782 /// All uses added to \p CBUses can be used to create abstract call sites for
783 /// which AbstractCallSite::isCallbackCall() will return true.
784 static void getCallbackUses(ImmutableCallSite ICS,
785 SmallVectorImpl<const Use *> &CBUses);
786
787 /// Conversion operator to conveniently check for a valid/initialized ACS.
788 explicit operator bool() const { return (bool)CS; }
789
790 /// Return the underlying instruction.
791 Instruction *getInstruction() const { return CS.getInstruction(); }
792
793 /// Return the call site abstraction for the underlying instruction.
794 CallSite getCallSite() const { return CS; }
795
796 /// Return true if this ACS represents a direct call.
797 bool isDirectCall() const {
798 return !isCallbackCall() && !CS.isIndirectCall();
799 }
800
801 /// Return true if this ACS represents an indirect call.
802 bool isIndirectCall() const {
803 return !isCallbackCall() && CS.isIndirectCall();
804 }
805
806 /// Return true if this ACS represents a callback call.
807 bool isCallbackCall() const {
808 // For a callback call site the callee is ALWAYS stored first in the
809 // transitive values vector. Thus, a non-empty vector indicates a callback.
810 return !CI.ParameterEncoding.empty();
811 }
812
813 /// Return true if @p UI is the use that defines the callee of this ACS.
814 bool isCallee(Value::const_user_iterator UI) const {
815 return isCallee(&UI.getUse());
816 }
817
818 /// Return true if @p U is the use that defines the callee of this ACS.
819 bool isCallee(const Use *U) const {
820 if (isDirectCall())
821 return CS.isCallee(U);
822
823 assert(!CI.ParameterEncoding.empty() &&((!CI.ParameterEncoding.empty() && "Callback without parameter encoding!"
) ? static_cast<void> (0) : __assert_fail ("!CI.ParameterEncoding.empty() && \"Callback without parameter encoding!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h"
, 824, __PRETTY_FUNCTION__))
824 "Callback without parameter encoding!")((!CI.ParameterEncoding.empty() && "Callback without parameter encoding!"
) ? static_cast<void> (0) : __assert_fail ("!CI.ParameterEncoding.empty() && \"Callback without parameter encoding!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h"
, 824, __PRETTY_FUNCTION__))
;
825
826 return (int)CS.getArgumentNo(U) == CI.ParameterEncoding[0];
827 }
828
829 /// Return the number of parameters of the callee.
830 unsigned getNumArgOperands() const {
831 if (isDirectCall())
832 return CS.getNumArgOperands();
833 // Subtract 1 for the callee encoding.
834 return CI.ParameterEncoding.size() - 1;
835 }
836
837 /// Return the operand index of the underlying instruction associated with @p
838 /// Arg.
839 int getCallArgOperandNo(Argument &Arg) const {
840 return getCallArgOperandNo(Arg.getArgNo());
841 }
842
843 /// Return the operand index of the underlying instruction associated with
844 /// the function parameter number @p ArgNo or -1 if there is none.
845 int getCallArgOperandNo(unsigned ArgNo) const {
846 if (isDirectCall())
847 return ArgNo;
848 // Add 1 for the callee encoding.
849 return CI.ParameterEncoding[ArgNo + 1];
850 }
851
852 /// Return the operand of the underlying instruction associated with @p Arg.
853 Value *getCallArgOperand(Argument &Arg) const {
854 return getCallArgOperand(Arg.getArgNo());
855 }
856
857 /// Return the operand of the underlying instruction associated with the
858 /// function parameter number @p ArgNo or nullptr if there is none.
859 Value *getCallArgOperand(unsigned ArgNo) const {
860 if (isDirectCall())
861 return CS.getArgOperand(ArgNo);
862 // Add 1 for the callee encoding.
863 return CI.ParameterEncoding[ArgNo + 1] >= 0
864 ? CS.getArgOperand(CI.ParameterEncoding[ArgNo + 1])
865 : nullptr;
866 }
867
868 /// Return the operand index of the underlying instruction associated with the
869 /// callee of this ACS. Only valid for callback calls!
870 int getCallArgOperandNoForCallee() const {
871 assert(isCallbackCall())((isCallbackCall()) ? static_cast<void> (0) : __assert_fail
("isCallbackCall()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h"
, 871, __PRETTY_FUNCTION__))
;
872 assert(CI.ParameterEncoding.size() && CI.ParameterEncoding[0] >= 0)((CI.ParameterEncoding.size() && CI.ParameterEncoding
[0] >= 0) ? static_cast<void> (0) : __assert_fail ("CI.ParameterEncoding.size() && CI.ParameterEncoding[0] >= 0"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h"
, 872, __PRETTY_FUNCTION__))
;
873 return CI.ParameterEncoding[0];
874 }
875
876 /// Return the use of the callee value in the underlying instruction. Only
877 /// valid for callback calls!
878 const Use &getCalleeUseForCallback() const {
879 int CalleeArgIdx = getCallArgOperandNoForCallee();
880 assert(CalleeArgIdx >= 0 &&((CalleeArgIdx >= 0 && unsigned(CalleeArgIdx) <
getInstruction()->getNumOperands()) ? static_cast<void
> (0) : __assert_fail ("CalleeArgIdx >= 0 && unsigned(CalleeArgIdx) < getInstruction()->getNumOperands()"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h"
, 881, __PRETTY_FUNCTION__))
881 unsigned(CalleeArgIdx) < getInstruction()->getNumOperands())((CalleeArgIdx >= 0 && unsigned(CalleeArgIdx) <
getInstruction()->getNumOperands()) ? static_cast<void
> (0) : __assert_fail ("CalleeArgIdx >= 0 && unsigned(CalleeArgIdx) < getInstruction()->getNumOperands()"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/CallSite.h"
, 881, __PRETTY_FUNCTION__))
;
882 return getInstruction()->getOperandUse(CalleeArgIdx);
883 }
884
885 /// Return the pointer to function that is being called.
886 Value *getCalledValue() const {
887 if (isDirectCall())
888 return CS.getCalledValue();
889 return CS.getArgOperand(getCallArgOperandNoForCallee());
890 }
891
892 /// Return the function being called if this is a direct call, otherwise
893 /// return null (if it's an indirect call).
894 Function *getCalledFunction() const {
895 Value *V = getCalledValue();
896 return V ? dyn_cast<Function>(V->stripPointerCasts()) : nullptr;
897 }
898};
899
900template <> struct DenseMapInfo<CallSite> {
901 using BaseInfo = DenseMapInfo<decltype(CallSite::I)>;
902
903 static CallSite getEmptyKey() {
904 CallSite CS;
905 CS.I = BaseInfo::getEmptyKey();
906 return CS;
907 }
908
909 static CallSite getTombstoneKey() {
910 CallSite CS;
911 CS.I = BaseInfo::getTombstoneKey();
912 return CS;
913 }
914
915 static unsigned getHashValue(const CallSite &CS) {
916 return BaseInfo::getHashValue(CS.I);
917 }
918
919 static bool isEqual(const CallSite &LHS, const CallSite &RHS) {
920 return LHS == RHS;
921 }
922};
923
924} // end namespace llvm
925
926#endif // LLVM_IR_CALLSITE_H

/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/PointerIntPair.h

1//===- llvm/ADT/PointerIntPair.h - Pair for pointer and int -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the PointerIntPair class.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_ADT_POINTERINTPAIR_H
14#define LLVM_ADT_POINTERINTPAIR_H
15
16#include "llvm/Support/Compiler.h"
17#include "llvm/Support/PointerLikeTypeTraits.h"
18#include "llvm/Support/type_traits.h"
19#include <cassert>
20#include <cstdint>
21#include <limits>
22
23namespace llvm {
24
25template <typename T> struct DenseMapInfo;
26template <typename PointerT, unsigned IntBits, typename PtrTraits>
27struct PointerIntPairInfo;
28
29/// PointerIntPair - This class implements a pair of a pointer and small
30/// integer. It is designed to represent this in the space required by one
31/// pointer by bitmangling the integer into the low part of the pointer. This
32/// can only be done for small integers: typically up to 3 bits, but it depends
33/// on the number of bits available according to PointerLikeTypeTraits for the
34/// type.
35///
36/// Note that PointerIntPair always puts the IntVal part in the highest bits
37/// possible. For example, PointerIntPair<void*, 1, bool> will put the bit for
38/// the bool into bit #2, not bit #0, which allows the low two bits to be used
39/// for something else. For example, this allows:
40/// PointerIntPair<PointerIntPair<void*, 1, bool>, 1, bool>
41/// ... and the two bools will land in different bits.
42template <typename PointerTy, unsigned IntBits, typename IntType = unsigned,
43 typename PtrTraits = PointerLikeTypeTraits<PointerTy>,
44 typename Info = PointerIntPairInfo<PointerTy, IntBits, PtrTraits>>
45class PointerIntPair {
46 // Used by MSVC visualizer and generally helpful for debugging/visualizing.
47 using InfoTy = Info;
48 intptr_t Value = 0;
49
50public:
51 constexpr PointerIntPair() = default;
52
53 PointerIntPair(PointerTy PtrVal, IntType IntVal) {
54 setPointerAndInt(PtrVal, IntVal);
55 }
56
57 explicit PointerIntPair(PointerTy PtrVal) { initWithPointer(PtrVal); }
58
59 PointerTy getPointer() const { return Info::getPointer(Value); }
16
Calling 'PointerIntPairInfo::getPointer'
24
Returning from 'PointerIntPairInfo::getPointer'
25
Returning null pointer, which participates in a condition later
60
61 IntType getInt() const { return (IntType)Info::getInt(Value); }
62
63 void setPointer(PointerTy PtrVal) LLVM_LVALUE_FUNCTION& {
64 Value = Info::updatePointer(Value, PtrVal);
65 }
66
67 void setInt(IntType IntVal) LLVM_LVALUE_FUNCTION& {
68 Value = Info::updateInt(Value, static_cast<intptr_t>(IntVal));
69 }
70
71 void initWithPointer(PointerTy PtrVal) LLVM_LVALUE_FUNCTION& {
72 Value = Info::updatePointer(0, PtrVal);
73 }
74
75 void setPointerAndInt(PointerTy PtrVal, IntType IntVal) LLVM_LVALUE_FUNCTION& {
76 Value = Info::updateInt(Info::updatePointer(0, PtrVal),
77 static_cast<intptr_t>(IntVal));
78 }
79
80 PointerTy const *getAddrOfPointer() const {
81 return const_cast<PointerIntPair *>(this)->getAddrOfPointer();
82 }
83
84 PointerTy *getAddrOfPointer() {
85 assert(Value == reinterpret_cast<intptr_t>(getPointer()) &&((Value == reinterpret_cast<intptr_t>(getPointer()) &&
"Can only return the address if IntBits is cleared and " "PtrTraits doesn't change the pointer"
) ? static_cast<void> (0) : __assert_fail ("Value == reinterpret_cast<intptr_t>(getPointer()) && \"Can only return the address if IntBits is cleared and \" \"PtrTraits doesn't change the pointer\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/PointerIntPair.h"
, 87, __PRETTY_FUNCTION__))
86 "Can only return the address if IntBits is cleared and "((Value == reinterpret_cast<intptr_t>(getPointer()) &&
"Can only return the address if IntBits is cleared and " "PtrTraits doesn't change the pointer"
) ? static_cast<void> (0) : __assert_fail ("Value == reinterpret_cast<intptr_t>(getPointer()) && \"Can only return the address if IntBits is cleared and \" \"PtrTraits doesn't change the pointer\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/PointerIntPair.h"
, 87, __PRETTY_FUNCTION__))
87 "PtrTraits doesn't change the pointer")((Value == reinterpret_cast<intptr_t>(getPointer()) &&
"Can only return the address if IntBits is cleared and " "PtrTraits doesn't change the pointer"
) ? static_cast<void> (0) : __assert_fail ("Value == reinterpret_cast<intptr_t>(getPointer()) && \"Can only return the address if IntBits is cleared and \" \"PtrTraits doesn't change the pointer\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/PointerIntPair.h"
, 87, __PRETTY_FUNCTION__))
;
88 return reinterpret_cast<PointerTy *>(&Value);
89 }
90
91 void *getOpaqueValue() const { return reinterpret_cast<void *>(Value); }
92
93 void setFromOpaqueValue(void *Val) LLVM_LVALUE_FUNCTION& {
94 Value = reinterpret_cast<intptr_t>(Val);
95 }
96
97 static PointerIntPair getFromOpaqueValue(void *V) {
98 PointerIntPair P;
99 P.setFromOpaqueValue(V);
100 return P;
101 }
102
103 // Allow PointerIntPairs to be created from const void * if and only if the
104 // pointer type could be created from a const void *.
105 static PointerIntPair getFromOpaqueValue(const void *V) {
106 (void)PtrTraits::getFromVoidPointer(V);
107 return getFromOpaqueValue(const_cast<void *>(V));
108 }
109
110 bool operator==(const PointerIntPair &RHS) const {
111 return Value == RHS.Value;
112 }
113
114 bool operator!=(const PointerIntPair &RHS) const {
115 return Value != RHS.Value;
116 }
117
118 bool operator<(const PointerIntPair &RHS) const { return Value < RHS.Value; }
119 bool operator>(const PointerIntPair &RHS) const { return Value > RHS.Value; }
120
121 bool operator<=(const PointerIntPair &RHS) const {
122 return Value <= RHS.Value;
123 }
124
125 bool operator>=(const PointerIntPair &RHS) const {
126 return Value >= RHS.Value;
127 }
128};
129
130// Specialize is_trivially_copyable to avoid limitation of llvm::is_trivially_copyable
131// when compiled with gcc 4.9.
132template <typename PointerTy, unsigned IntBits, typename IntType,
133 typename PtrTraits,
134 typename Info>
135struct is_trivially_copyable<PointerIntPair<PointerTy, IntBits, IntType, PtrTraits, Info>> : std::true_type {
136#ifdef HAVE_STD_IS_TRIVIALLY_COPYABLE
137 static_assert(std::is_trivially_copyable<PointerIntPair<PointerTy, IntBits, IntType, PtrTraits, Info>>::value,
138 "inconsistent behavior between llvm:: and std:: implementation of is_trivially_copyable");
139#endif
140};
141
142
143template <typename PointerT, unsigned IntBits, typename PtrTraits>
144struct PointerIntPairInfo {
145 static_assert(PtrTraits::NumLowBitsAvailable <
146 std::numeric_limits<uintptr_t>::digits,
147 "cannot use a pointer type that has all bits free");
148 static_assert(IntBits <= PtrTraits::NumLowBitsAvailable,
149 "PointerIntPair with integer size too large for pointer");
150 enum MaskAndShiftConstants : uintptr_t {
151 /// PointerBitMask - The bits that come from the pointer.
152 PointerBitMask =
153 ~(uintptr_t)(((intptr_t)1 << PtrTraits::NumLowBitsAvailable) - 1),
154
155 /// IntShift - The number of low bits that we reserve for other uses, and
156 /// keep zero.
157 IntShift = (uintptr_t)PtrTraits::NumLowBitsAvailable - IntBits,
158
159 /// IntMask - This is the unshifted mask for valid bits of the int type.
160 IntMask = (uintptr_t)(((intptr_t)1 << IntBits) - 1),
161
162 // ShiftedIntMask - This is the bits for the integer shifted in place.
163 ShiftedIntMask = (uintptr_t)(IntMask << IntShift)
164 };
165
166 static PointerT getPointer(intptr_t Value) {
167 return PtrTraits::getFromVoidPointer(
17
Calling 'PointerLikeTypeTraits::getFromVoidPointer'
22
Returning from 'PointerLikeTypeTraits::getFromVoidPointer'
23
Returning null pointer, which participates in a condition later
168 reinterpret_cast<void *>(Value & PointerBitMask));
169 }
170
171 static intptr_t getInt(intptr_t Value) {
172 return (Value >> IntShift) & IntMask;
173 }
174
175 static intptr_t updatePointer(intptr_t OrigValue, PointerT Ptr) {
176 intptr_t PtrWord =
177 reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(Ptr));
178 assert((PtrWord & ~PointerBitMask) == 0 &&(((PtrWord & ~PointerBitMask) == 0 && "Pointer is not sufficiently aligned"
) ? static_cast<void> (0) : __assert_fail ("(PtrWord & ~PointerBitMask) == 0 && \"Pointer is not sufficiently aligned\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/PointerIntPair.h"
, 179, __PRETTY_FUNCTION__))
179 "Pointer is not sufficiently aligned")(((PtrWord & ~PointerBitMask) == 0 && "Pointer is not sufficiently aligned"
) ? static_cast<void> (0) : __assert_fail ("(PtrWord & ~PointerBitMask) == 0 && \"Pointer is not sufficiently aligned\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/PointerIntPair.h"
, 179, __PRETTY_FUNCTION__))
;
180 // Preserve all low bits, just update the pointer.
181 return PtrWord | (OrigValue & ~PointerBitMask);
182 }
183
184 static intptr_t updateInt(intptr_t OrigValue, intptr_t Int) {
185 intptr_t IntWord = static_cast<intptr_t>(Int);
186 assert((IntWord & ~IntMask) == 0 && "Integer too large for field")(((IntWord & ~IntMask) == 0 && "Integer too large for field"
) ? static_cast<void> (0) : __assert_fail ("(IntWord & ~IntMask) == 0 && \"Integer too large for field\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/PointerIntPair.h"
, 186, __PRETTY_FUNCTION__))
;
187
188 // Preserve all bits other than the ones we are updating.
189 return (OrigValue & ~ShiftedIntMask) | IntWord << IntShift;
190 }
191};
192
193// Provide specialization of DenseMapInfo for PointerIntPair.
194template <typename PointerTy, unsigned IntBits, typename IntType>
195struct DenseMapInfo<PointerIntPair<PointerTy, IntBits, IntType>> {
196 using Ty = PointerIntPair<PointerTy, IntBits, IntType>;
197
198 static Ty getEmptyKey() {
199 uintptr_t Val = static_cast<uintptr_t>(-1);
200 Val <<= PointerLikeTypeTraits<Ty>::NumLowBitsAvailable;
201 return Ty::getFromOpaqueValue(reinterpret_cast<void *>(Val));
202 }
203
204 static Ty getTombstoneKey() {
205 uintptr_t Val = static_cast<uintptr_t>(-2);
206 Val <<= PointerLikeTypeTraits<PointerTy>::NumLowBitsAvailable;
207 return Ty::getFromOpaqueValue(reinterpret_cast<void *>(Val));
208 }
209
210 static unsigned getHashValue(Ty V) {
211 uintptr_t IV = reinterpret_cast<uintptr_t>(V.getOpaqueValue());
212 return unsigned(IV) ^ unsigned(IV >> 9);
213 }
214
215 static bool isEqual(const Ty &LHS, const Ty &RHS) { return LHS == RHS; }
216};
217
218// Teach SmallPtrSet that PointerIntPair is "basically a pointer".
219template <typename PointerTy, unsigned IntBits, typename IntType,
220 typename PtrTraits>
221struct PointerLikeTypeTraits<
222 PointerIntPair<PointerTy, IntBits, IntType, PtrTraits>> {
223 static inline void *
224 getAsVoidPointer(const PointerIntPair<PointerTy, IntBits, IntType> &P) {
225 return P.getOpaqueValue();
226 }
227
228 static inline PointerIntPair<PointerTy, IntBits, IntType>
229 getFromVoidPointer(void *P) {
230 return PointerIntPair<PointerTy, IntBits, IntType>::getFromOpaqueValue(P);
231 }
232
233 static inline PointerIntPair<PointerTy, IntBits, IntType>
234 getFromVoidPointer(const void *P) {
235 return PointerIntPair<PointerTy, IntBits, IntType>::getFromOpaqueValue(P);
236 }
237
238 static constexpr int NumLowBitsAvailable =
239 PtrTraits::NumLowBitsAvailable - IntBits;
240};
241
242} // end namespace llvm
243
244#endif // LLVM_ADT_POINTERINTPAIR_H

/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/PointerLikeTypeTraits.h

1//===- llvm/Support/PointerLikeTypeTraits.h - Pointer Traits ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the PointerLikeTypeTraits class. This allows data
10// structures to reason about pointers and other things that are pointer sized.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_SUPPORT_POINTERLIKETYPETRAITS_H
15#define LLVM_SUPPORT_POINTERLIKETYPETRAITS_H
16
17#include "llvm/Support/DataTypes.h"
18#include <assert.h>
19#include <type_traits>
20
21namespace llvm {
22
23/// A traits type that is used to handle pointer types and things that are just
24/// wrappers for pointers as a uniform entity.
25template <typename T> struct PointerLikeTypeTraits;
26
27namespace detail {
28/// A tiny meta function to compute the log2 of a compile time constant.
29template <size_t N>
30struct ConstantLog2
31 : std::integral_constant<size_t, ConstantLog2<N / 2>::value + 1> {};
32template <> struct ConstantLog2<1> : std::integral_constant<size_t, 0> {};
33
34// Provide a trait to check if T is pointer-like.
35template <typename T, typename U = void> struct HasPointerLikeTypeTraits {
36 static const bool value = false;
37};
38
39// sizeof(T) is valid only for a complete T.
40template <typename T> struct HasPointerLikeTypeTraits<
41 T, decltype((sizeof(PointerLikeTypeTraits<T>) + sizeof(T)), void())> {
42 static const bool value = true;
43};
44
45template <typename T> struct IsPointerLike {
46 static const bool value = HasPointerLikeTypeTraits<T>::value;
47};
48
49template <typename T> struct IsPointerLike<T *> {
50 static const bool value = true;
51};
52} // namespace detail
53
54// Provide PointerLikeTypeTraits for non-cvr pointers.
55template <typename T> struct PointerLikeTypeTraits<T *> {
56 static inline void *getAsVoidPointer(T *P) { return P; }
57 static inline T *getFromVoidPointer(void *P) { return static_cast<T *>(P); }
19
Returning null pointer (loaded from 'P'), which participates in a condition later
58
59 static constexpr int NumLowBitsAvailable =
60 detail::ConstantLog2<alignof(T)>::value;
61};
62
63template <> struct PointerLikeTypeTraits<void *> {
64 static inline void *getAsVoidPointer(void *P) { return P; }
65 static inline void *getFromVoidPointer(void *P) { return P; }
66
67 /// Note, we assume here that void* is related to raw malloc'ed memory and
68 /// that malloc returns objects at least 4-byte aligned. However, this may be
69 /// wrong, or pointers may be from something other than malloc. In this case,
70 /// you should specify a real typed pointer or avoid this template.
71 ///
72 /// All clients should use assertions to do a run-time check to ensure that
73 /// this is actually true.
74 static constexpr int NumLowBitsAvailable = 2;
75};
76
77// Provide PointerLikeTypeTraits for const things.
78template <typename T> struct PointerLikeTypeTraits<const T> {
79 typedef PointerLikeTypeTraits<T> NonConst;
80
81 static inline const void *getAsVoidPointer(const T P) {
82 return NonConst::getAsVoidPointer(P);
83 }
84 static inline const T getFromVoidPointer(const void *P) {
85 return NonConst::getFromVoidPointer(const_cast<void *>(P));
86 }
87 static constexpr int NumLowBitsAvailable = NonConst::NumLowBitsAvailable;
88};
89
90// Provide PointerLikeTypeTraits for const pointers.
91template <typename T> struct PointerLikeTypeTraits<const T *> {
92 typedef PointerLikeTypeTraits<T *> NonConst;
93
94 static inline const void *getAsVoidPointer(const T *P) {
95 return NonConst::getAsVoidPointer(const_cast<T *>(P));
96 }
97 static inline const T *getFromVoidPointer(const void *P) {
98 return NonConst::getFromVoidPointer(const_cast<void *>(P));
18
Calling 'PointerLikeTypeTraits::getFromVoidPointer'
20
Returning from 'PointerLikeTypeTraits::getFromVoidPointer'
21
Returning null pointer, which participates in a condition later
99 }
100 static constexpr int NumLowBitsAvailable = NonConst::NumLowBitsAvailable;
101};
102
103// Provide PointerLikeTypeTraits for uintptr_t.
104template <> struct PointerLikeTypeTraits<uintptr_t> {
105 static inline void *getAsVoidPointer(uintptr_t P) {
106 return reinterpret_cast<void *>(P);
107 }
108 static inline uintptr_t getFromVoidPointer(void *P) {
109 return reinterpret_cast<uintptr_t>(P);
110 }
111 // No bits are available!
112 static constexpr int NumLowBitsAvailable = 0;
113};
114
115/// Provide suitable custom traits struct for function pointers.
116///
117/// Function pointers can't be directly given these traits as functions can't
118/// have their alignment computed with `alignof` and we need different casting.
119///
120/// To rely on higher alignment for a specialized use, you can provide a
121/// customized form of this template explicitly with higher alignment, and
122/// potentially use alignment attributes on functions to satisfy that.
123template <int Alignment, typename FunctionPointerT>
124struct FunctionPointerLikeTypeTraits {
125 static constexpr int NumLowBitsAvailable =
126 detail::ConstantLog2<Alignment>::value;
127 static inline void *getAsVoidPointer(FunctionPointerT P) {
128 assert((reinterpret_cast<uintptr_t>(P) &(((reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1
<< NumLowBitsAvailable)) == 0 && "Alignment not satisfied for an actual function pointer!"
) ? static_cast<void> (0) : __assert_fail ("(reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 && \"Alignment not satisfied for an actual function pointer!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/PointerLikeTypeTraits.h"
, 130, __PRETTY_FUNCTION__))
129 ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 &&(((reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1
<< NumLowBitsAvailable)) == 0 && "Alignment not satisfied for an actual function pointer!"
) ? static_cast<void> (0) : __assert_fail ("(reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 && \"Alignment not satisfied for an actual function pointer!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/PointerLikeTypeTraits.h"
, 130, __PRETTY_FUNCTION__))
130 "Alignment not satisfied for an actual function pointer!")(((reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1
<< NumLowBitsAvailable)) == 0 && "Alignment not satisfied for an actual function pointer!"
) ? static_cast<void> (0) : __assert_fail ("(reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 && \"Alignment not satisfied for an actual function pointer!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/PointerLikeTypeTraits.h"
, 130, __PRETTY_FUNCTION__))
;
131 return reinterpret_cast<void *>(P);
132 }
133 static inline FunctionPointerT getFromVoidPointer(void *P) {
134 return reinterpret_cast<FunctionPointerT>(P);
135 }
136};
137
138/// Provide a default specialization for function pointers that assumes 4-byte
139/// alignment.
140///
141/// We assume here that functions used with this are always at least 4-byte
142/// aligned. This means that, for example, thumb functions won't work or systems
143/// with weird unaligned function pointers won't work. But all practical systems
144/// we support satisfy this requirement.
145template <typename ReturnT, typename... ParamTs>
146struct PointerLikeTypeTraits<ReturnT (*)(ParamTs...)>
147 : FunctionPointerLikeTypeTraits<4, ReturnT (*)(ParamTs...)> {};
148
149} // end namespace llvm
150
151#endif

/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/Operator.h

1//===-- llvm/Operator.h - Operator utility subclass -------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines various classes for working with Instructions and
10// ConstantExprs.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_IR_OPERATOR_H
15#define LLVM_IR_OPERATOR_H
16
17#include "llvm/ADT/None.h"
18#include "llvm/ADT/Optional.h"
19#include "llvm/IR/Constants.h"
20#include "llvm/IR/Instruction.h"
21#include "llvm/IR/Type.h"
22#include "llvm/IR/Value.h"
23#include "llvm/Support/Casting.h"
24#include <cstddef>
25
26namespace llvm {
27
28/// This is a utility class that provides an abstraction for the common
29/// functionality between Instructions and ConstantExprs.
30class Operator : public User {
31public:
32 // The Operator class is intended to be used as a utility, and is never itself
33 // instantiated.
34 Operator() = delete;
35 ~Operator() = delete;
36
37 void *operator new(size_t s) = delete;
38
39 /// Return the opcode for this Instruction or ConstantExpr.
40 unsigned getOpcode() const {
41 if (const Instruction *I = dyn_cast<Instruction>(this))
42 return I->getOpcode();
43 return cast<ConstantExpr>(this)->getOpcode();
44 }
45
46 /// If V is an Instruction or ConstantExpr, return its opcode.
47 /// Otherwise return UserOp1.
48 static unsigned getOpcode(const Value *V) {
49 if (const Instruction *I
35.1
'I' is null
35.1
'I' is null
35.1
'I' is null
35.1
'I' is null
35.1
'I' is null
35.1
'I' is null
35.1
'I' is null
= dyn_cast<Instruction>(V))
35
Assuming 'V' is not a 'Instruction'
36
Taking false branch
50 return I->getOpcode();
51 if (const ConstantExpr *CE
37.1
'CE' is non-null
37.1
'CE' is non-null
37.1
'CE' is non-null
37.1
'CE' is non-null
37.1
'CE' is non-null
37.1
'CE' is non-null
37.1
'CE' is non-null
= dyn_cast<ConstantExpr>(V))
37
Assuming 'V' is a 'ConstantExpr'
38
Taking true branch
52 return CE->getOpcode();
39
Returning value, which participates in a condition later
53 return Instruction::UserOp1;
54 }
55
56 static bool classof(const Instruction *) { return true; }
57 static bool classof(const ConstantExpr *) { return true; }
58 static bool classof(const Value *V) {
59 return isa<Instruction>(V) || isa<ConstantExpr>(V);
60 }
61};
62
63/// Utility class for integer operators which may exhibit overflow - Add, Sub,
64/// Mul, and Shl. It does not include SDiv, despite that operator having the
65/// potential for overflow.
66class OverflowingBinaryOperator : public Operator {
67public:
68 enum {
69 AnyWrap = 0,
70 NoUnsignedWrap = (1 << 0),
71 NoSignedWrap = (1 << 1)
72 };
73
74private:
75 friend class Instruction;
76 friend class ConstantExpr;
77
78 void setHasNoUnsignedWrap(bool B) {
79 SubclassOptionalData =
80 (SubclassOptionalData & ~NoUnsignedWrap) | (B * NoUnsignedWrap);
81 }
82 void setHasNoSignedWrap(bool B) {
83 SubclassOptionalData =
84 (SubclassOptionalData & ~NoSignedWrap) | (B * NoSignedWrap);
85 }
86
87public:
88 /// Test whether this operation is known to never
89 /// undergo unsigned overflow, aka the nuw property.
90 bool hasNoUnsignedWrap() const {
91 return SubclassOptionalData & NoUnsignedWrap;
92 }
93
94 /// Test whether this operation is known to never
95 /// undergo signed overflow, aka the nsw property.
96 bool hasNoSignedWrap() const {
97 return (SubclassOptionalData & NoSignedWrap) != 0;
98 }
99
100 static bool classof(const Instruction *I) {
101 return I->getOpcode() == Instruction::Add ||
102 I->getOpcode() == Instruction::Sub ||
103 I->getOpcode() == Instruction::Mul ||
104 I->getOpcode() == Instruction::Shl;
105 }
106 static bool classof(const ConstantExpr *CE) {
107 return CE->getOpcode() == Instruction::Add ||
108 CE->getOpcode() == Instruction::Sub ||
109 CE->getOpcode() == Instruction::Mul ||
110 CE->getOpcode() == Instruction::Shl;
111 }
112 static bool classof(const Value *V) {
113 return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
114 (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
115 }
116};
117
118/// A udiv or sdiv instruction, which can be marked as "exact",
119/// indicating that no bits are destroyed.
120class PossiblyExactOperator : public Operator {
121public:
122 enum {
123 IsExact = (1 << 0)
124 };
125
126private:
127 friend class Instruction;
128 friend class ConstantExpr;
129
130 void setIsExact(bool B) {
131 SubclassOptionalData = (SubclassOptionalData & ~IsExact) | (B * IsExact);
132 }
133
134public:
135 /// Test whether this division is known to be exact, with zero remainder.
136 bool isExact() const {
137 return SubclassOptionalData & IsExact;
138 }
139
140 static bool isPossiblyExactOpcode(unsigned OpC) {
141 return OpC == Instruction::SDiv ||
142 OpC == Instruction::UDiv ||
143 OpC == Instruction::AShr ||
144 OpC == Instruction::LShr;
145 }
146
147 static bool classof(const ConstantExpr *CE) {
148 return isPossiblyExactOpcode(CE->getOpcode());
149 }
150 static bool classof(const Instruction *I) {
151 return isPossiblyExactOpcode(I->getOpcode());
152 }
153 static bool classof(const Value *V) {
154 return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
155 (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
156 }
157};
158
159/// Convenience struct for specifying and reasoning about fast-math flags.
160class FastMathFlags {
161private:
162 friend class FPMathOperator;
163
164 unsigned Flags = 0;
165
166 FastMathFlags(unsigned F) {
167 // If all 7 bits are set, turn this into -1. If the number of bits grows,
168 // this must be updated. This is intended to provide some forward binary
169 // compatibility insurance for the meaning of 'fast' in case bits are added.
170 if (F == 0x7F) Flags = ~0U;
171 else Flags = F;
172 }
173
174public:
175 // This is how the bits are used in Value::SubclassOptionalData so they
176 // should fit there too.
177 // WARNING: We're out of space. SubclassOptionalData only has 7 bits. New
178 // functionality will require a change in how this information is stored.
179 enum {
180 AllowReassoc = (1 << 0),
181 NoNaNs = (1 << 1),
182 NoInfs = (1 << 2),
183 NoSignedZeros = (1 << 3),
184 AllowReciprocal = (1 << 4),
185 AllowContract = (1 << 5),
186 ApproxFunc = (1 << 6)
187 };
188
189 FastMathFlags() = default;
190
191 static FastMathFlags getFast() {
192 FastMathFlags FMF;
193 FMF.setFast();
194 return FMF;
195 }
196
197 bool any() const { return Flags != 0; }
198 bool none() const { return Flags == 0; }
199 bool all() const { return Flags == ~0U; }
200
201 void clear() { Flags = 0; }
202 void set() { Flags = ~0U; }
203
204 /// Flag queries
205 bool allowReassoc() const { return 0 != (Flags & AllowReassoc); }
206 bool noNaNs() const { return 0 != (Flags & NoNaNs); }
207 bool noInfs() const { return 0 != (Flags & NoInfs); }
208 bool noSignedZeros() const { return 0 != (Flags & NoSignedZeros); }
209 bool allowReciprocal() const { return 0 != (Flags & AllowReciprocal); }
210 bool allowContract() const { return 0 != (Flags & AllowContract); }
211 bool approxFunc() const { return 0 != (Flags & ApproxFunc); }
212 /// 'Fast' means all bits are set.
213 bool isFast() const { return all(); }
214
215 /// Flag setters
216 void setAllowReassoc(bool B = true) {
217 Flags = (Flags & ~AllowReassoc) | B * AllowReassoc;
218 }
219 void setNoNaNs(bool B = true) {
220 Flags = (Flags & ~NoNaNs) | B * NoNaNs;
221 }
222 void setNoInfs(bool B = true) {
223 Flags = (Flags & ~NoInfs) | B * NoInfs;
224 }
225 void setNoSignedZeros(bool B = true) {
226 Flags = (Flags & ~NoSignedZeros) | B * NoSignedZeros;
227 }
228 void setAllowReciprocal(bool B = true) {
229 Flags = (Flags & ~AllowReciprocal) | B * AllowReciprocal;
230 }
231 void setAllowContract(bool B = true) {
232 Flags = (Flags & ~AllowContract) | B * AllowContract;
233 }
234 void setApproxFunc(bool B = true) {
235 Flags = (Flags & ~ApproxFunc) | B * ApproxFunc;
236 }
237 void setFast(bool B = true) { B ? set() : clear(); }
238
239 void operator&=(const FastMathFlags &OtherFlags) {
240 Flags &= OtherFlags.Flags;
241 }
242};
243
244/// Utility class for floating point operations which can have
245/// information about relaxed accuracy requirements attached to them.
246class FPMathOperator : public Operator {
247private:
248 friend class Instruction;
249
250 /// 'Fast' means all bits are set.
251 void setFast(bool B) {
252 setHasAllowReassoc(B);
253 setHasNoNaNs(B);
254 setHasNoInfs(B);
255 setHasNoSignedZeros(B);
256 setHasAllowReciprocal(B);
257 setHasAllowContract(B);
258 setHasApproxFunc(B);
259 }
260
261 void setHasAllowReassoc(bool B) {
262 SubclassOptionalData =
263 (SubclassOptionalData & ~FastMathFlags::AllowReassoc) |
264 (B * FastMathFlags::AllowReassoc);
265 }
266
267 void setHasNoNaNs(bool B) {
268 SubclassOptionalData =
269 (SubclassOptionalData & ~FastMathFlags::NoNaNs) |
270 (B * FastMathFlags::NoNaNs);
271 }
272
273 void setHasNoInfs(bool B) {
274 SubclassOptionalData =
275 (SubclassOptionalData & ~FastMathFlags::NoInfs) |
276 (B * FastMathFlags::NoInfs);
277 }
278
279 void setHasNoSignedZeros(bool B) {
280 SubclassOptionalData =
281 (SubclassOptionalData & ~FastMathFlags::NoSignedZeros) |
282 (B * FastMathFlags::NoSignedZeros);
283 }
284
285 void setHasAllowReciprocal(bool B) {
286 SubclassOptionalData =
287 (SubclassOptionalData & ~FastMathFlags::AllowReciprocal) |
288 (B * FastMathFlags::AllowReciprocal);
289 }
290
291 void setHasAllowContract(bool B) {
292 SubclassOptionalData =
293 (SubclassOptionalData & ~FastMathFlags::AllowContract) |
294 (B * FastMathFlags::AllowContract);
295 }
296
297 void setHasApproxFunc(bool B) {
298 SubclassOptionalData =
299 (SubclassOptionalData & ~FastMathFlags::ApproxFunc) |
300 (B * FastMathFlags::ApproxFunc);
301 }
302
303 /// Convenience function for setting multiple fast-math flags.
304 /// FMF is a mask of the bits to set.
305 void setFastMathFlags(FastMathFlags FMF) {
306 SubclassOptionalData |= FMF.Flags;
307 }
308
309 /// Convenience function for copying all fast-math flags.
310 /// All values in FMF are transferred to this operator.
311 void copyFastMathFlags(FastMathFlags FMF) {
312 SubclassOptionalData = FMF.Flags;
313 }
314
315public:
316 /// Test if this operation allows all non-strict floating-point transforms.
317 bool isFast() const {
318 return ((SubclassOptionalData & FastMathFlags::AllowReassoc) != 0 &&
319 (SubclassOptionalData & FastMathFlags::NoNaNs) != 0 &&
320 (SubclassOptionalData & FastMathFlags::NoInfs) != 0 &&
321 (SubclassOptionalData & FastMathFlags::NoSignedZeros) != 0 &&
322 (SubclassOptionalData & FastMathFlags::AllowReciprocal) != 0 &&
323 (SubclassOptionalData & FastMathFlags::AllowContract) != 0 &&
324 (SubclassOptionalData & FastMathFlags::ApproxFunc) != 0);
325 }
326
327 /// Test if this operation may be simplified with reassociative transforms.
328 bool hasAllowReassoc() const {
329 return (SubclassOptionalData & FastMathFlags::AllowReassoc) != 0;
330 }
331
332 /// Test if this operation's arguments and results are assumed not-NaN.
333 bool hasNoNaNs() const {
334 return (SubclassOptionalData & FastMathFlags::NoNaNs) != 0;
335 }
336
337 /// Test if this operation's arguments and results are assumed not-infinite.
338 bool hasNoInfs() const {
339 return (SubclassOptionalData & FastMathFlags::NoInfs) != 0;
340 }
341
342 /// Test if this operation can ignore the sign of zero.
343 bool hasNoSignedZeros() const {
344 return (SubclassOptionalData & FastMathFlags::NoSignedZeros) != 0;
345 }
346
347 /// Test if this operation can use reciprocal multiply instead of division.
348 bool hasAllowReciprocal() const {
349 return (SubclassOptionalData & FastMathFlags::AllowReciprocal) != 0;
350 }
351
352 /// Test if this operation can be floating-point contracted (FMA).
353 bool hasAllowContract() const {
354 return (SubclassOptionalData & FastMathFlags::AllowContract) != 0;
355 }
356
357 /// Test if this operation allows approximations of math library functions or
358 /// intrinsics.
359 bool hasApproxFunc() const {
360 return (SubclassOptionalData & FastMathFlags::ApproxFunc) != 0;
361 }
362
363 /// Convenience function for getting all the fast-math flags
364 FastMathFlags getFastMathFlags() const {
365 return FastMathFlags(SubclassOptionalData);
366 }
367
368 /// Get the maximum error permitted by this operation in ULPs. An accuracy of
369 /// 0.0 means that the operation should be performed with the default
370 /// precision.
371 float getFPAccuracy() const;
372
373 static bool classof(const Value *V) {
374 unsigned Opcode;
375 if (auto *I = dyn_cast<Instruction>(V))
376 Opcode = I->getOpcode();
377 else if (auto *CE = dyn_cast<ConstantExpr>(V))
378 Opcode = CE->getOpcode();
379 else
380 return false;
381
382 switch (Opcode) {
383 case Instruction::FNeg:
384 case Instruction::FAdd:
385 case Instruction::FSub:
386 case Instruction::FMul:
387 case Instruction::FDiv:
388 case Instruction::FRem:
389 // FIXME: To clean up and correct the semantics of fast-math-flags, FCmp
390 // should not be treated as a math op, but the other opcodes should.
391 // This would make things consistent with Select/PHI (FP value type
392 // determines whether they are math ops and, therefore, capable of
393 // having fast-math-flags).
394 case Instruction::FCmp:
395 return true;
396 case Instruction::PHI:
397 case Instruction::Select:
398 case Instruction::Call: {
399 Type *Ty = V->getType();
400 while (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty))
401 Ty = ArrTy->getElementType();
402 return Ty->isFPOrFPVectorTy();
403 }
404 default:
405 return false;
406 }
407 }
408};
409
410/// A helper template for defining operators for individual opcodes.
411template<typename SuperClass, unsigned Opc>
412class ConcreteOperator : public SuperClass {
413public:
414 static bool classof(const Instruction *I) {
415 return I->getOpcode() == Opc;
416 }
417 static bool classof(const ConstantExpr *CE) {
418 return CE->getOpcode() == Opc;
419 }
420 static bool classof(const Value *V) {
421 return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
422 (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
423 }
424};
425
426class AddOperator
427 : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Add> {
428};
429class SubOperator
430 : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Sub> {
431};
432class MulOperator
433 : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Mul> {
434};
435class ShlOperator
436 : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Shl> {
437};
438
439class SDivOperator
440 : public ConcreteOperator<PossiblyExactOperator, Instruction::SDiv> {
441};
442class UDivOperator
443 : public ConcreteOperator<PossiblyExactOperator, Instruction::UDiv> {
444};
445class AShrOperator
446 : public ConcreteOperator<PossiblyExactOperator, Instruction::AShr> {
447};
448class LShrOperator
449 : public ConcreteOperator<PossiblyExactOperator, Instruction::LShr> {
450};
451
452class ZExtOperator : public ConcreteOperator<Operator, Instruction::ZExt> {};
453
454class GEPOperator
455 : public ConcreteOperator<Operator, Instruction::GetElementPtr> {
456 friend class GetElementPtrInst;
457 friend class ConstantExpr;
458
459 enum {
460 IsInBounds = (1 << 0),
461 // InRangeIndex: bits 1-6
462 };
463
464 void setIsInBounds(bool B) {
465 SubclassOptionalData =
466 (SubclassOptionalData & ~IsInBounds) | (B * IsInBounds);
467 }
468
469public:
470 /// Test whether this is an inbounds GEP, as defined by LangRef.html.
471 bool isInBounds() const {
472 return SubclassOptionalData & IsInBounds;
473 }
474
475 /// Returns the offset of the index with an inrange attachment, or None if
476 /// none.
477 Optional<unsigned> getInRangeIndex() const {
478 if (SubclassOptionalData >> 1 == 0) return None;
479 return (SubclassOptionalData >> 1) - 1;
480 }
481
482 inline op_iterator idx_begin() { return op_begin()+1; }
483 inline const_op_iterator idx_begin() const { return op_begin()+1; }
484 inline op_iterator idx_end() { return op_end(); }
485 inline const_op_iterator idx_end() const { return op_end(); }
486
487 Value *getPointerOperand() {
488 return getOperand(0);
489 }
490 const Value *getPointerOperand() const {
491 return getOperand(0);
492 }
493 static unsigned getPointerOperandIndex() {
494 return 0U; // get index for modifying correct operand
495 }
496
497 /// Method to return the pointer operand as a PointerType.
498 Type *getPointerOperandType() const {
499 return getPointerOperand()->getType();
500 }
501
502 Type *getSourceElementType() const;
503 Type *getResultElementType() const;
504
505 /// Method to return the address space of the pointer operand.
506 unsigned getPointerAddressSpace() const {
507 return getPointerOperandType()->getPointerAddressSpace();
508 }
509
510 unsigned getNumIndices() const { // Note: always non-negative
511 return getNumOperands() - 1;
512 }
513
514 bool hasIndices() const {
515 return getNumOperands() > 1;
516 }
517
518 /// Return true if all of the indices of this GEP are zeros.
519 /// If so, the result pointer and the first operand have the same
520 /// value, just potentially different types.
521 bool hasAllZeroIndices() const {
522 for (const_op_iterator I = idx_begin(), E = idx_end(); I != E; ++I) {
523 if (ConstantInt *C = dyn_cast<ConstantInt>(I))
524 if (C->isZero())
525 continue;
526 return false;
527 }
528 return true;
529 }
530
531 /// Return true if all of the indices of this GEP are constant integers.
532 /// If so, the result pointer and the first operand have
533 /// a constant offset between them.
534 bool hasAllConstantIndices() const {
535 for (const_op_iterator I = idx_begin(), E = idx_end(); I != E; ++I) {
536 if (!isa<ConstantInt>(I))
537 return false;
538 }
539 return true;
540 }
541
542 unsigned countNonConstantIndices() const {
543 return count_if(make_range(idx_begin(), idx_end()), [](const Use& use) {
544 return !isa<ConstantInt>(*use);
545 });
546 }
547
548 /// Accumulate the constant address offset of this GEP if possible.
549 ///
550 /// This routine accepts an APInt into which it will accumulate the constant
551 /// offset of this GEP if the GEP is in fact constant. If the GEP is not
552 /// all-constant, it returns false and the value of the offset APInt is
553 /// undefined (it is *not* preserved!). The APInt passed into this routine
554 /// must be at exactly as wide as the IntPtr type for the address space of the
555 /// base GEP pointer.
556 bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset) const;
557};
558
559class PtrToIntOperator
560 : public ConcreteOperator<Operator, Instruction::PtrToInt> {
561 friend class PtrToInt;
562 friend class ConstantExpr;
563
564public:
565 Value *getPointerOperand() {
566 return getOperand(0);
567 }
568 const Value *getPointerOperand() const {
569 return getOperand(0);
570 }
571
572 static unsigned getPointerOperandIndex() {
573 return 0U; // get index for modifying correct operand
574 }
575
576 /// Method to return the pointer operand as a PointerType.
577 Type *getPointerOperandType() const {
578 return getPointerOperand()->getType();
579 }
580
581 /// Method to return the address space of the pointer operand.
582 unsigned getPointerAddressSpace() const {
583 return cast<PointerType>(getPointerOperandType())->getAddressSpace();
584 }
585};
586
587class BitCastOperator
588 : public ConcreteOperator<Operator, Instruction::BitCast> {
589 friend class BitCastInst;
590 friend class ConstantExpr;
591
592public:
593 Type *getSrcTy() const {
594 return getOperand(0)->getType();
595 }
596
597 Type *getDestTy() const {
598 return getType();
599 }
600};
601
602} // end namespace llvm
603
604#endif // LLVM_IR_OPERATOR_H

/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h

1//===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file provides a helper that implements much of the TTI interface in
11/// terms of the target-independent code generator and TargetLowering
12/// interfaces.
13//
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_CODEGEN_BASICTTIIMPL_H
17#define LLVM_CODEGEN_BASICTTIIMPL_H
18
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/BitVector.h"
22#include "llvm/ADT/SmallPtrSet.h"
23#include "llvm/ADT/SmallVector.h"
24#include "llvm/Analysis/LoopInfo.h"
25#include "llvm/Analysis/TargetTransformInfo.h"
26#include "llvm/Analysis/TargetTransformInfoImpl.h"
27#include "llvm/CodeGen/ISDOpcodes.h"
28#include "llvm/CodeGen/TargetLowering.h"
29#include "llvm/CodeGen/TargetSubtargetInfo.h"
30#include "llvm/CodeGen/ValueTypes.h"
31#include "llvm/IR/BasicBlock.h"
32#include "llvm/IR/CallSite.h"
33#include "llvm/IR/Constant.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DataLayout.h"
36#include "llvm/IR/DerivedTypes.h"
37#include "llvm/IR/InstrTypes.h"
38#include "llvm/IR/Instruction.h"
39#include "llvm/IR/Instructions.h"
40#include "llvm/IR/Intrinsics.h"
41#include "llvm/IR/Operator.h"
42#include "llvm/IR/Type.h"
43#include "llvm/IR/Value.h"
44#include "llvm/MC/MCSchedule.h"
45#include "llvm/Support/Casting.h"
46#include "llvm/Support/CommandLine.h"
47#include "llvm/Support/ErrorHandling.h"
48#include "llvm/Support/MachineValueType.h"
49#include "llvm/Support/MathExtras.h"
50#include <algorithm>
51#include <cassert>
52#include <cstdint>
53#include <limits>
54#include <utility>
55
56namespace llvm {
57
58class Function;
59class GlobalValue;
60class LLVMContext;
61class ScalarEvolution;
62class SCEV;
63class TargetMachine;
64
65extern cl::opt<unsigned> PartialUnrollingThreshold;
66
67/// Base class which can be used to help build a TTI implementation.
68///
69/// This class provides as much implementation of the TTI interface as is
70/// possible using the target independent parts of the code generator.
71///
72/// In order to subclass it, your class must implement a getST() method to
73/// return the subtarget, and a getTLI() method to return the target lowering.
74/// We need these methods implemented in the derived class so that this class
75/// doesn't have to duplicate storage for them.
76template <typename T>
77class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
78private:
79 using BaseT = TargetTransformInfoImplCRTPBase<T>;
80 using TTI = TargetTransformInfo;
81
82 /// Estimate a cost of Broadcast as an extract and sequence of insert
83 /// operations.
84 unsigned getBroadcastShuffleOverhead(Type *Ty) {
85 assert(Ty->isVectorTy() && "Can only shuffle vectors")((Ty->isVectorTy() && "Can only shuffle vectors") ?
static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only shuffle vectors\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 85, __PRETTY_FUNCTION__))
;
86 unsigned Cost = 0;
87 // Broadcast cost is equal to the cost of extracting the zero'th element
88 // plus the cost of inserting it into every element of the result vector.
89 Cost += static_cast<T *>(this)->getVectorInstrCost(
90 Instruction::ExtractElement, Ty, 0);
91
92 for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
93 Cost += static_cast<T *>(this)->getVectorInstrCost(
94 Instruction::InsertElement, Ty, i);
95 }
96 return Cost;
97 }
98
99 /// Estimate a cost of shuffle as a sequence of extract and insert
100 /// operations.
101 unsigned getPermuteShuffleOverhead(Type *Ty) {
102 assert(Ty->isVectorTy() && "Can only shuffle vectors")((Ty->isVectorTy() && "Can only shuffle vectors") ?
static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only shuffle vectors\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 102, __PRETTY_FUNCTION__))
;
103 unsigned Cost = 0;
104 // Shuffle cost is equal to the cost of extracting element from its argument
105 // plus the cost of inserting them onto the result vector.
106
107 // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
108 // index 0 of first vector, index 1 of second vector,index 2 of first
109 // vector and finally index 3 of second vector and insert them at index
110 // <0,1,2,3> of result vector.
111 for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
112 Cost += static_cast<T *>(this)
113 ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
114 Cost += static_cast<T *>(this)
115 ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
116 }
117 return Cost;
118 }
119
120 /// Estimate a cost of subvector extraction as a sequence of extract and
121 /// insert operations.
122 unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
123 assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only extract subvectors from vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only extract subvectors from vectors\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 124, __PRETTY_FUNCTION__))
124 "Can only extract subvectors from vectors")((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only extract subvectors from vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only extract subvectors from vectors\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 124, __PRETTY_FUNCTION__))
;
125 int NumSubElts = SubTy->getVectorNumElements();
126 assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_ExtractSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_ExtractSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 127, __PRETTY_FUNCTION__))
127 "SK_ExtractSubvector index out of range")(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_ExtractSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_ExtractSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 127, __PRETTY_FUNCTION__))
;
128
129 unsigned Cost = 0;
130 // Subvector extraction cost is equal to the cost of extracting element from
131 // the source type plus the cost of inserting them into the result vector
132 // type.
133 for (int i = 0; i != NumSubElts; ++i) {
134 Cost += static_cast<T *>(this)->getVectorInstrCost(
135 Instruction::ExtractElement, Ty, i + Index);
136 Cost += static_cast<T *>(this)->getVectorInstrCost(
137 Instruction::InsertElement, SubTy, i);
138 }
139 return Cost;
140 }
141
142 /// Estimate a cost of subvector insertion as a sequence of extract and
143 /// insert operations.
144 unsigned getInsertSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
145 assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only insert subvectors into vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only insert subvectors into vectors\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 146, __PRETTY_FUNCTION__))
146 "Can only insert subvectors into vectors")((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only insert subvectors into vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only insert subvectors into vectors\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 146, __PRETTY_FUNCTION__))
;
147 int NumSubElts = SubTy->getVectorNumElements();
148 assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_InsertSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_InsertSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 149, __PRETTY_FUNCTION__))
149 "SK_InsertSubvector index out of range")(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_InsertSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_InsertSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 149, __PRETTY_FUNCTION__))
;
150
151 unsigned Cost = 0;
152 // Subvector insertion cost is equal to the cost of extracting element from
153 // the source type plus the cost of inserting them into the result vector
154 // type.
155 for (int i = 0; i != NumSubElts; ++i) {
156 Cost += static_cast<T *>(this)->getVectorInstrCost(
157 Instruction::ExtractElement, SubTy, i);
158 Cost += static_cast<T *>(this)->getVectorInstrCost(
159 Instruction::InsertElement, Ty, i + Index);
160 }
161 return Cost;
162 }
163
164 /// Local query method delegates up to T which *must* implement this!
165 const TargetSubtargetInfo *getST() const {
166 return static_cast<const T *>(this)->getST();
167 }
168
169 /// Local query method delegates up to T which *must* implement this!
170 const TargetLoweringBase *getTLI() const {
171 return static_cast<const T *>(this)->getTLI();
172 }
173
174 static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
175 switch (M) {
176 case TTI::MIM_Unindexed:
177 return ISD::UNINDEXED;
178 case TTI::MIM_PreInc:
179 return ISD::PRE_INC;
180 case TTI::MIM_PreDec:
181 return ISD::PRE_DEC;
182 case TTI::MIM_PostInc:
183 return ISD::POST_INC;
184 case TTI::MIM_PostDec:
185 return ISD::POST_DEC;
186 }
187 llvm_unreachable("Unexpected MemIndexedMode")::llvm::llvm_unreachable_internal("Unexpected MemIndexedMode"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 187)
;
188 }
189
190protected:
191 explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
192 : BaseT(DL) {}
193 virtual ~BasicTTIImplBase() = default;
194
195 using TargetTransformInfoImplBase::DL;
196
197public:
198 /// \name Scalar TTI Implementations
199 /// @{
200 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
201 unsigned AddressSpace, unsigned Alignment,
202 bool *Fast) const {
203 EVT E = EVT::getIntegerVT(Context, BitWidth);
204 return getTLI()->allowsMisalignedMemoryAccesses(
205 E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);
206 }
207
208 bool hasBranchDivergence() { return false; }
209
210 bool useGPUDivergenceAnalysis() { return false; }
211
212 bool isSourceOfDivergence(const Value *V) { return false; }
213
214 bool isAlwaysUniform(const Value *V) { return false; }
215
216 unsigned getFlatAddressSpace() {
217 // Return an invalid address space.
218 return -1;
219 }
220
221 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
222 Intrinsic::ID IID) const {
223 return false;
224 }
225
226 bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
227 Value *OldV, Value *NewV) const {
228 return false;
229 }
230
231 bool isLegalAddImmediate(int64_t imm) {
232 return getTLI()->isLegalAddImmediate(imm);
233 }
234
235 bool isLegalICmpImmediate(int64_t imm) {
236 return getTLI()->isLegalICmpImmediate(imm);
237 }
238
239 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
240 bool HasBaseReg, int64_t Scale,
241 unsigned AddrSpace, Instruction *I = nullptr) {
242 TargetLoweringBase::AddrMode AM;
243 AM.BaseGV = BaseGV;
244 AM.BaseOffs = BaseOffset;
245 AM.HasBaseReg = HasBaseReg;
246 AM.Scale = Scale;
247 return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
248 }
249
250 bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty,
251 const DataLayout &DL) const {
252 EVT VT = getTLI()->getValueType(DL, Ty);
253 return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
254 }
255
256 bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty,
257 const DataLayout &DL) const {
258 EVT VT = getTLI()->getValueType(DL, Ty);
259 return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
260 }
261
262 bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
263 return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
264 }
265
266 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
267 bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
268 TargetLoweringBase::AddrMode AM;
269 AM.BaseGV = BaseGV;
270 AM.BaseOffs = BaseOffset;
271 AM.HasBaseReg = HasBaseReg;
272 AM.Scale = Scale;
273 return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
274 }
275
276 bool isTruncateFree(Type *Ty1, Type *Ty2) {
277 return getTLI()->isTruncateFree(Ty1, Ty2);
278 }
279
280 bool isProfitableToHoist(Instruction *I) {
281 return getTLI()->isProfitableToHoist(I);
282 }
283
284 bool useAA() const { return getST()->useAA(); }
285
286 bool isTypeLegal(Type *Ty) {
287 EVT VT = getTLI()->getValueType(DL, Ty);
288 return getTLI()->isTypeLegal(VT);
289 }
290
291 int getGEPCost(Type *PointeeType, const Value *Ptr,
292 ArrayRef<const Value *> Operands) {
293 return BaseT::getGEPCost(PointeeType, Ptr, Operands);
294 }
295
296 int getExtCost(const Instruction *I, const Value *Src) {
297 if (getTLI()->isExtFree(I))
298 return TargetTransformInfo::TCC_Free;
299
300 if (isa<ZExtInst>(I) || isa<SExtInst>(I))
301 if (const LoadInst *LI = dyn_cast<LoadInst>(Src))
302 if (getTLI()->isExtLoad(LI, I, DL))
303 return TargetTransformInfo::TCC_Free;
304
305 return TargetTransformInfo::TCC_Basic;
306 }
307
308 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
309 ArrayRef<const Value *> Arguments, const User *U) {
310 return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U);
311 }
312
313 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
314 ArrayRef<Type *> ParamTys, const User *U) {
315 if (IID == Intrinsic::cttz) {
316 if (getTLI()->isCheapToSpeculateCttz())
317 return TargetTransformInfo::TCC_Basic;
318 return TargetTransformInfo::TCC_Expensive;
319 }
320
321 if (IID == Intrinsic::ctlz) {
322 if (getTLI()->isCheapToSpeculateCtlz())
323 return TargetTransformInfo::TCC_Basic;
324 return TargetTransformInfo::TCC_Expensive;
325 }
326
327 return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U);
328 }
329
330 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
331 unsigned &JumpTableSize,
332 ProfileSummaryInfo *PSI,
333 BlockFrequencyInfo *BFI) {
334 /// Try to find the estimated number of clusters. Note that the number of
335 /// clusters identified in this function could be different from the actual
336 /// numbers found in lowering. This function ignore switches that are
337 /// lowered with a mix of jump table / bit test / BTree. This function was
338 /// initially intended to be used when estimating the cost of switch in
339 /// inline cost heuristic, but it's a generic cost model to be used in other
340 /// places (e.g., in loop unrolling).
341 unsigned N = SI.getNumCases();
342 const TargetLoweringBase *TLI = getTLI();
343 const DataLayout &DL = this->getDataLayout();
344
345 JumpTableSize = 0;
346 bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
347
348 // Early exit if both a jump table and bit test are not allowed.
349 if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N))
350 return N;
351
352 APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
353 APInt MinCaseVal = MaxCaseVal;
354 for (auto CI : SI.cases()) {
355 const APInt &CaseVal = CI.getCaseValue()->getValue();
356 if (CaseVal.sgt(MaxCaseVal))
357 MaxCaseVal = CaseVal;
358 if (CaseVal.slt(MinCaseVal))
359 MinCaseVal = CaseVal;
360 }
361
362 // Check if suitable for a bit test
363 if (N <= DL.getIndexSizeInBits(0u)) {
364 SmallPtrSet<const BasicBlock *, 4> Dests;
365 for (auto I : SI.cases())
366 Dests.insert(I.getCaseSuccessor());
367
368 if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
369 DL))
370 return 1;
371 }
372
373 // Check if suitable for a jump table.
374 if (IsJTAllowed) {
375 if (N < 2 || N < TLI->getMinimumJumpTableEntries())
376 return N;
377 uint64_t Range =
378 (MaxCaseVal - MinCaseVal)
379 .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
380 // Check whether a range of clusters is dense enough for a jump table
381 if (TLI->isSuitableForJumpTable(&SI, N, Range, PSI, BFI)) {
382 JumpTableSize = Range;
383 return 1;
384 }
385 }
386 return N;
387 }
388
389 bool shouldBuildLookupTables() {
390 const TargetLoweringBase *TLI = getTLI();
391 return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
392 TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
393 }
394
395 bool haveFastSqrt(Type *Ty) {
396 const TargetLoweringBase *TLI = getTLI();
397 EVT VT = TLI->getValueType(DL, Ty);
398 return TLI->isTypeLegal(VT) &&
399 TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
400 }
401
402 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) {
403 return true;
404 }
405
406 unsigned getFPOpCost(Type *Ty) {
407 // Check whether FADD is available, as a proxy for floating-point in
408 // general.
409 const TargetLoweringBase *TLI = getTLI();
410 EVT VT = TLI->getValueType(DL, Ty);
411 if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT))
412 return TargetTransformInfo::TCC_Basic;
413 return TargetTransformInfo::TCC_Expensive;
414 }
415
416 unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
417 const TargetLoweringBase *TLI = getTLI();
418 switch (Opcode) {
45
Control jumps to 'case AddrSpaceCast:' at line 429
419 default: break;
420 case Instruction::Trunc:
421 if (TLI->isTruncateFree(OpTy, Ty))
422 return TargetTransformInfo::TCC_Free;
423 return TargetTransformInfo::TCC_Basic;
424 case Instruction::ZExt:
425 if (TLI->isZExtFree(OpTy, Ty))
426 return TargetTransformInfo::TCC_Free;
427 return TargetTransformInfo::TCC_Basic;
428
429 case Instruction::AddrSpaceCast:
430 if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(),
46
Called C++ object pointer is null
431 Ty->getPointerAddressSpace()))
432 return TargetTransformInfo::TCC_Free;
433 return TargetTransformInfo::TCC_Basic;
434 }
435
436 return BaseT::getOperationCost(Opcode, Ty, OpTy);
437 }
438
439 unsigned getInliningThresholdMultiplier() { return 1; }
440
441 int getInlinerVectorBonusPercent() { return 150; }
442
443 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
444 TTI::UnrollingPreferences &UP) {
445 // This unrolling functionality is target independent, but to provide some
446 // motivation for its intended use, for x86:
447
448 // According to the Intel 64 and IA-32 Architectures Optimization Reference
449 // Manual, Intel Core models and later have a loop stream detector (and
450 // associated uop queue) that can benefit from partial unrolling.
451 // The relevant requirements are:
452 // - The loop must have no more than 4 (8 for Nehalem and later) branches
453 // taken, and none of them may be calls.
454 // - The loop can have no more than 18 (28 for Nehalem and later) uops.
455
456 // According to the Software Optimization Guide for AMD Family 15h
457 // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
458 // and loop buffer which can benefit from partial unrolling.
459 // The relevant requirements are:
460 // - The loop must have fewer than 16 branches
461 // - The loop must have less than 40 uops in all executed loop branches
462
463 // The number of taken branches in a loop is hard to estimate here, and
464 // benchmarking has revealed that it is better not to be conservative when
465 // estimating the branch count. As a result, we'll ignore the branch limits
466 // until someone finds a case where it matters in practice.
467
468 unsigned MaxOps;
469 const TargetSubtargetInfo *ST = getST();
470 if (PartialUnrollingThreshold.getNumOccurrences() > 0)
471 MaxOps = PartialUnrollingThreshold;
472 else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
473 MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
474 else
475 return;
476
477 // Scan the loop: don't unroll loops with calls.
478 for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E;
479 ++I) {
480 BasicBlock *BB = *I;
481
482 for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
483 if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
484 ImmutableCallSite CS(&*J);
485 if (const Function *F = CS.getCalledFunction()) {
486 if (!static_cast<T *>(this)->isLoweredToCall(F))
487 continue;
488 }
489
490 return;
491 }
492 }
493
494 // Enable runtime and partial unrolling up to the specified size.
495 // Enable using trip count upper bound to unroll loops.
496 UP.Partial = UP.Runtime = UP.UpperBound = true;
497 UP.PartialThreshold = MaxOps;
498
499 // Avoid unrolling when optimizing for size.
500 UP.OptSizeThreshold = 0;
501 UP.PartialOptSizeThreshold = 0;
502
503 // Set number of instructions optimized when "back edge"
504 // becomes "fall through" to default value of 2.
505 UP.BEInsns = 2;
506 }
507
508 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
509 AssumptionCache &AC,
510 TargetLibraryInfo *LibInfo,
511 HardwareLoopInfo &HWLoopInfo) {
512 return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
513 }
514
515 bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
516 AssumptionCache &AC, TargetLibraryInfo *TLI,
517 DominatorTree *DT,
518 const LoopAccessInfo *LAI) {
519 return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
520 }
521
522 int getInstructionLatency(const Instruction *I) {
523 if (isa<LoadInst>(I))
524 return getST()->getSchedModel().DefaultLoadLatency;
525
526 return BaseT::getInstructionLatency(I);
527 }
528
529 virtual Optional<unsigned>
530 getCacheSize(TargetTransformInfo::CacheLevel Level) const {
531 return Optional<unsigned>(
532 getST()->getCacheSize(static_cast<unsigned>(Level)));
533 }
534
535 virtual Optional<unsigned>
536 getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const {
537 Optional<unsigned> TargetResult =
538 getST()->getCacheAssociativity(static_cast<unsigned>(Level));
539
540 if (TargetResult)
541 return TargetResult;
542
543 return BaseT::getCacheAssociativity(Level);
544 }
545
546 virtual unsigned getCacheLineSize() const {
547 return getST()->getCacheLineSize();
548 }
549
550 virtual unsigned getPrefetchDistance() const {
551 return getST()->getPrefetchDistance();
552 }
553
554 virtual unsigned getMinPrefetchStride() const {
555 return getST()->getMinPrefetchStride();
556 }
557
558 virtual unsigned getMaxPrefetchIterationsAhead() const {
559 return getST()->getMaxPrefetchIterationsAhead();
560 }
561
562 /// @}
563
564 /// \name Vector TTI Implementations
565 /// @{
566
567 unsigned getRegisterBitWidth(bool Vector) const { return 32; }
568
569 /// Estimate the overhead of scalarizing an instruction. Insert and Extract
570 /// are set if the result needs to be inserted and/or extracted from vectors.
571 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
572 assert(Ty->isVectorTy() && "Can only scalarize vectors")((Ty->isVectorTy() && "Can only scalarize vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only scalarize vectors\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 572, __PRETTY_FUNCTION__))
;
573 unsigned Cost = 0;
574
575 for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
576 if (Insert)
577 Cost += static_cast<T *>(this)
578 ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
579 if (Extract)
580 Cost += static_cast<T *>(this)
581 ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
582 }
583
584 return Cost;
585 }
586
587 /// Estimate the overhead of scalarizing an instructions unique
588 /// non-constant operands. The types of the arguments are ordinarily
589 /// scalar, in which case the costs are multiplied with VF.
590 unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
591 unsigned VF) {
592 unsigned Cost = 0;
593 SmallPtrSet<const Value*, 4> UniqueOperands;
594 for (const Value *A : Args) {
595 if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
596 Type *VecTy = nullptr;
597 if (A->getType()->isVectorTy()) {
598 VecTy = A->getType();
599 // If A is a vector operand, VF should be 1 or correspond to A.
600 assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&(((VF == 1 || VF == VecTy->getVectorNumElements()) &&
"Vector argument does not match VF") ? static_cast<void>
(0) : __assert_fail ("(VF == 1 || VF == VecTy->getVectorNumElements()) && \"Vector argument does not match VF\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 601, __PRETTY_FUNCTION__))
601 "Vector argument does not match VF")(((VF == 1 || VF == VecTy->getVectorNumElements()) &&
"Vector argument does not match VF") ? static_cast<void>
(0) : __assert_fail ("(VF == 1 || VF == VecTy->getVectorNumElements()) && \"Vector argument does not match VF\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 601, __PRETTY_FUNCTION__))
;
602 }
603 else
604 VecTy = VectorType::get(A->getType(), VF);
605
606 Cost += getScalarizationOverhead(VecTy, false, true);
607 }
608 }
609
610 return Cost;
611 }
612
613 unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) {
614 assert(VecTy->isVectorTy())((VecTy->isVectorTy()) ? static_cast<void> (0) : __assert_fail
("VecTy->isVectorTy()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 614, __PRETTY_FUNCTION__))
;
615
616 unsigned Cost = 0;
617
618 Cost += getScalarizationOverhead(VecTy, true, false);
619 if (!Args.empty())
620 Cost += getOperandsScalarizationOverhead(Args,
621 VecTy->getVectorNumElements());
622 else
623 // When no information on arguments is provided, we add the cost
624 // associated with one argument as a heuristic.
625 Cost += getScalarizationOverhead(VecTy, false, true);
626
627 return Cost;
628 }
629
630 unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
631
632 unsigned getArithmeticInstrCost(
633 unsigned Opcode, Type *Ty,
634 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
635 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
636 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
637 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
638 ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
639 const Instruction *CxtI = nullptr) {
640 // Check if any of the operands are vector operands.
641 const TargetLoweringBase *TLI = getTLI();
642 int ISD = TLI->InstructionOpcodeToISD(Opcode);
643 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 643, __PRETTY_FUNCTION__))
;
644
645 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
646
647 bool IsFloat = Ty->isFPOrFPVectorTy();
648 // Assume that floating point arithmetic operations cost twice as much as
649 // integer operations.
650 unsigned OpCost = (IsFloat ? 2 : 1);
651
652 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
653 // The operation is legal. Assume it costs 1.
654 // TODO: Once we have extract/insert subvector cost we need to use them.
655 return LT.first * OpCost;
656 }
657
658 if (!TLI->isOperationExpand(ISD, LT.second)) {
659 // If the operation is custom lowered, then assume that the code is twice
660 // as expensive.
661 return LT.first * 2 * OpCost;
662 }
663
664 // Else, assume that we need to scalarize this op.
665 // TODO: If one of the types get legalized by splitting, handle this
666 // similarly to what getCastInstrCost() does.
667 if (Ty->isVectorTy()) {
668 unsigned Num = Ty->getVectorNumElements();
669 unsigned Cost = static_cast<T *>(this)
670 ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
671 // Return the cost of multiple scalar invocation plus the cost of
672 // inserting and extracting the values.
673 return getScalarizationOverhead(Ty, Args) + Num * Cost;
674 }
675
676 // We don't know anything about this scalar instruction.
677 return OpCost;
678 }
679
680 unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
681 Type *SubTp) {
682 switch (Kind) {
683 case TTI::SK_Broadcast:
684 return getBroadcastShuffleOverhead(Tp);
685 case TTI::SK_Select:
686 case TTI::SK_Reverse:
687 case TTI::SK_Transpose:
688 case TTI::SK_PermuteSingleSrc:
689 case TTI::SK_PermuteTwoSrc:
690 return getPermuteShuffleOverhead(Tp);
691 case TTI::SK_ExtractSubvector:
692 return getExtractSubvectorOverhead(Tp, Index, SubTp);
693 case TTI::SK_InsertSubvector:
694 return getInsertSubvectorOverhead(Tp, Index, SubTp);
695 }
696 llvm_unreachable("Unknown TTI::ShuffleKind")::llvm::llvm_unreachable_internal("Unknown TTI::ShuffleKind",
"/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 696)
;
697 }
698
699 unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
700 const Instruction *I = nullptr) {
701 const TargetLoweringBase *TLI = getTLI();
702 int ISD = TLI->InstructionOpcodeToISD(Opcode);
703 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 703, __PRETTY_FUNCTION__))
;
704 std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
705 std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
706
707 // Check for NOOP conversions.
708 if (SrcLT.first == DstLT.first &&
709 SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
710
711 // Bitcast between types that are legalized to the same type are free.
712 if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
713 return 0;
714 }
715
716 if (Opcode == Instruction::Trunc &&
717 TLI->isTruncateFree(SrcLT.second, DstLT.second))
718 return 0;
719
720 if (Opcode == Instruction::ZExt &&
721 TLI->isZExtFree(SrcLT.second, DstLT.second))
722 return 0;
723
724 if (Opcode == Instruction::AddrSpaceCast &&
725 TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(),
726 Dst->getPointerAddressSpace()))
727 return 0;
728
729 // If this is a zext/sext of a load, return 0 if the corresponding
730 // extending load exists on target.
731 if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
732 I && isa<LoadInst>(I->getOperand(0))) {
733 EVT ExtVT = EVT::getEVT(Dst);
734 EVT LoadVT = EVT::getEVT(Src);
735 unsigned LType =
736 ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
737 if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
738 return 0;
739 }
740
741 // If the cast is marked as legal (or promote) then assume low cost.
742 if (SrcLT.first == DstLT.first &&
743 TLI->isOperationLegalOrPromote(ISD, DstLT.second))
744 return 1;
745
746 // Handle scalar conversions.
747 if (!Src->isVectorTy() && !Dst->isVectorTy()) {
748 // Scalar bitcasts are usually free.
749 if (Opcode == Instruction::BitCast)
750 return 0;
751
752 // Just check the op cost. If the operation is legal then assume it costs
753 // 1.
754 if (!TLI->isOperationExpand(ISD, DstLT.second))
755 return 1;
756
757 // Assume that illegal scalar instruction are expensive.
758 return 4;
759 }
760
761 // Check vector-to-vector casts.
762 if (Dst->isVectorTy() && Src->isVectorTy()) {
763 // If the cast is between same-sized registers, then the check is simple.
764 if (SrcLT.first == DstLT.first &&
765 SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
766
767 // Assume that Zext is done using AND.
768 if (Opcode == Instruction::ZExt)
769 return 1;
770
771 // Assume that sext is done using SHL and SRA.
772 if (Opcode == Instruction::SExt)
773 return 2;
774
775 // Just check the op cost. If the operation is legal then assume it
776 // costs
777 // 1 and multiply by the type-legalization overhead.
778 if (!TLI->isOperationExpand(ISD, DstLT.second))
779 return SrcLT.first * 1;
780 }
781
782 // If we are legalizing by splitting, query the concrete TTI for the cost
783 // of casting the original vector twice. We also need to factor in the
784 // cost of the split itself. Count that as 1, to be consistent with
785 // TLI->getTypeLegalizationCost().
786 if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
787 TargetLowering::TypeSplitVector ||
788 TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
789 TargetLowering::TypeSplitVector) &&
790 Src->getVectorNumElements() > 1 && Dst->getVectorNumElements() > 1) {
791 Type *SplitDst = VectorType::get(Dst->getVectorElementType(),
792 Dst->getVectorNumElements() / 2);
793 Type *SplitSrc = VectorType::get(Src->getVectorElementType(),
794 Src->getVectorNumElements() / 2);
795 T *TTI = static_cast<T *>(this);
796 return TTI->getVectorSplitCost() +
797 (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I));
798 }
799
800 // In other cases where the source or destination are illegal, assume
801 // the operation will get scalarized.
802 unsigned Num = Dst->getVectorNumElements();
803 unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
804 Opcode, Dst->getScalarType(), Src->getScalarType(), I);
805
806 // Return the cost of multiple scalar invocation plus the cost of
807 // inserting and extracting the values.
808 return getScalarizationOverhead(Dst, true, true) + Num * Cost;
809 }
810
811 // We already handled vector-to-vector and scalar-to-scalar conversions.
812 // This
813 // is where we handle bitcast between vectors and scalars. We need to assume
814 // that the conversion is scalarized in one way or another.
815 if (Opcode == Instruction::BitCast)
816 // Illegal bitcasts are done by storing and loading from a stack slot.
817 return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true)
818 : 0) +
819 (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false)
820 : 0);
821
822 llvm_unreachable("Unhandled cast")::llvm::llvm_unreachable_internal("Unhandled cast", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 822)
;
823 }
824
825 unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
826 VectorType *VecTy, unsigned Index) {
827 return static_cast<T *>(this)->getVectorInstrCost(
828 Instruction::ExtractElement, VecTy, Index) +
829 static_cast<T *>(this)->getCastInstrCost(Opcode, Dst,
830 VecTy->getElementType());
831 }
832
833 unsigned getCFInstrCost(unsigned Opcode) {
834 // Branches are assumed to be predicted.
835 return 0;
836 }
837
838 unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
839 const Instruction *I) {
840 const TargetLoweringBase *TLI = getTLI();
841 int ISD = TLI->InstructionOpcodeToISD(Opcode);
842 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 842, __PRETTY_FUNCTION__))
;
843
844 // Selects on vectors are actually vector selects.
845 if (ISD == ISD::SELECT) {
846 assert(CondTy && "CondTy must exist")((CondTy && "CondTy must exist") ? static_cast<void
> (0) : __assert_fail ("CondTy && \"CondTy must exist\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 846, __PRETTY_FUNCTION__))
;
847 if (CondTy->isVectorTy())
848 ISD = ISD::VSELECT;
849 }
850 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
851
852 if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
853 !TLI->isOperationExpand(ISD, LT.second)) {
854 // The operation is legal. Assume it costs 1. Multiply
855 // by the type-legalization overhead.
856 return LT.first * 1;
857 }
858
859 // Otherwise, assume that the cast is scalarized.
860 // TODO: If one of the types get legalized by splitting, handle this
861 // similarly to what getCastInstrCost() does.
862 if (ValTy->isVectorTy()) {
863 unsigned Num = ValTy->getVectorNumElements();
864 if (CondTy)
865 CondTy = CondTy->getScalarType();
866 unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
867 Opcode, ValTy->getScalarType(), CondTy, I);
868
869 // Return the cost of multiple scalar invocation plus the cost of
870 // inserting and extracting the values.
871 return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
872 }
873
874 // Unknown scalar opcode.
875 return 1;
876 }
877
878 unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
879 std::pair<unsigned, MVT> LT =
880 getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
881
882 return LT.first;
883 }
884
885 unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
886 unsigned AddressSpace,
887 const Instruction *I = nullptr) {
888 assert(!Src->isVoidTy() && "Invalid type")((!Src->isVoidTy() && "Invalid type") ? static_cast
<void> (0) : __assert_fail ("!Src->isVoidTy() && \"Invalid type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 888, __PRETTY_FUNCTION__))
;
889 std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
890
891 // Assuming that all loads of legal types cost 1.
892 unsigned Cost = LT.first;
893
894 if (Src->isVectorTy() &&
895 Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) {
896 // This is a vector load that legalizes to a larger type than the vector
897 // itself. Unless the corresponding extending load or truncating store is
898 // legal, then this will scalarize.
899 TargetLowering::LegalizeAction LA = TargetLowering::Expand;
900 EVT MemVT = getTLI()->getValueType(DL, Src);
901 if (Opcode == Instruction::Store)
902 LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
903 else
904 LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
905
906 if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
907 // This is a vector load/store for some illegal type that is scalarized.
908 // We must account for the cost of building or decomposing the vector.
909 Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
910 Opcode == Instruction::Store);
911 }
912 }
913
914 return Cost;
915 }
916
917 unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
918 unsigned Factor,
919 ArrayRef<unsigned> Indices,
920 unsigned Alignment, unsigned AddressSpace,
921 bool UseMaskForCond = false,
922 bool UseMaskForGaps = false) {
923 VectorType *VT = dyn_cast<VectorType>(VecTy);
924 assert(VT && "Expect a vector type for interleaved memory op")((VT && "Expect a vector type for interleaved memory op"
) ? static_cast<void> (0) : __assert_fail ("VT && \"Expect a vector type for interleaved memory op\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 924, __PRETTY_FUNCTION__))
;
925
926 unsigned NumElts = VT->getNumElements();
927 assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor")((Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor"
) ? static_cast<void> (0) : __assert_fail ("Factor > 1 && NumElts % Factor == 0 && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 927, __PRETTY_FUNCTION__))
;
928
929 unsigned NumSubElts = NumElts / Factor;
930 VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
931
932 // Firstly, the cost of load/store operation.
933 unsigned Cost;
934 if (UseMaskForCond || UseMaskForGaps)
935 Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
936 Opcode, VecTy, Alignment, AddressSpace);
937 else
938 Cost = static_cast<T *>(this)->getMemoryOpCost(
939 Opcode, VecTy, MaybeAlign(Alignment), AddressSpace);
940
941 // Legalize the vector type, and get the legalized and unlegalized type
942 // sizes.
943 MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
944 unsigned VecTySize =
945 static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy);
946 unsigned VecTyLTSize = VecTyLT.getStoreSize();
947
948 // Return the ceiling of dividing A by B.
949 auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
950
951 // Scale the cost of the memory operation by the fraction of legalized
952 // instructions that will actually be used. We shouldn't account for the
953 // cost of dead instructions since they will be removed.
954 //
955 // E.g., An interleaved load of factor 8:
956 // %vec = load <16 x i64>, <16 x i64>* %ptr
957 // %v0 = shufflevector %vec, undef, <0, 8>
958 //
959 // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
960 // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
961 // type). The other loads are unused.
962 //
963 // We only scale the cost of loads since interleaved store groups aren't
964 // allowed to have gaps.
965 if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) {
966 // The number of loads of a legal type it will take to represent a load
967 // of the unlegalized vector type.
968 unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize);
969
970 // The number of elements of the unlegalized type that correspond to a
971 // single legal instruction.
972 unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts);
973
974 // Determine which legal instructions will be used.
975 BitVector UsedInsts(NumLegalInsts, false);
976 for (unsigned Index : Indices)
977 for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
978 UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
979
980 // Scale the cost of the load by the fraction of legal instructions that
981 // will be used.
982 Cost *= UsedInsts.count() / NumLegalInsts;
983 }
984
985 // Then plus the cost of interleave operation.
986 if (Opcode == Instruction::Load) {
987 // The interleave cost is similar to extract sub vectors' elements
988 // from the wide vector, and insert them into sub vectors.
989 //
990 // E.g. An interleaved load of factor 2 (with one member of index 0):
991 // %vec = load <8 x i32>, <8 x i32>* %ptr
992 // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
993 // The cost is estimated as extract elements at 0, 2, 4, 6 from the
994 // <8 x i32> vector and insert them into a <4 x i32> vector.
995
996 assert(Indices.size() <= Factor &&((Indices.size() <= Factor && "Interleaved memory op has too many members"
) ? static_cast<void> (0) : __assert_fail ("Indices.size() <= Factor && \"Interleaved memory op has too many members\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 997, __PRETTY_FUNCTION__))
997 "Interleaved memory op has too many members")((Indices.size() <= Factor && "Interleaved memory op has too many members"
) ? static_cast<void> (0) : __assert_fail ("Indices.size() <= Factor && \"Interleaved memory op has too many members\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 997, __PRETTY_FUNCTION__))
;
998
999 for (unsigned Index : Indices) {
1000 assert(Index < Factor && "Invalid index for interleaved memory op")((Index < Factor && "Invalid index for interleaved memory op"
) ? static_cast<void> (0) : __assert_fail ("Index < Factor && \"Invalid index for interleaved memory op\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1000, __PRETTY_FUNCTION__))
;
1001
1002 // Extract elements from loaded vector for each sub vector.
1003 for (unsigned i = 0; i < NumSubElts; i++)
1004 Cost += static_cast<T *>(this)->getVectorInstrCost(
1005 Instruction::ExtractElement, VT, Index + i * Factor);
1006 }
1007
1008 unsigned InsSubCost = 0;
1009 for (unsigned i = 0; i < NumSubElts; i++)
1010 InsSubCost += static_cast<T *>(this)->getVectorInstrCost(
1011 Instruction::InsertElement, SubVT, i);
1012
1013 Cost += Indices.size() * InsSubCost;
1014 } else {
1015 // The interleave cost is extract all elements from sub vectors, and
1016 // insert them into the wide vector.
1017 //
1018 // E.g. An interleaved store of factor 2:
1019 // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
1020 // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
1021 // The cost is estimated as extract all elements from both <4 x i32>
1022 // vectors and insert into the <8 x i32> vector.
1023
1024 unsigned ExtSubCost = 0;
1025 for (unsigned i = 0; i < NumSubElts; i++)
1026 ExtSubCost += static_cast<T *>(this)->getVectorInstrCost(
1027 Instruction::ExtractElement, SubVT, i);
1028 Cost += ExtSubCost * Factor;
1029
1030 for (unsigned i = 0; i < NumElts; i++)
1031 Cost += static_cast<T *>(this)
1032 ->getVectorInstrCost(Instruction::InsertElement, VT, i);
1033 }
1034
1035 if (!UseMaskForCond)
1036 return Cost;
1037
1038 Type *I8Type = Type::getInt8Ty(VT->getContext());
1039 VectorType *MaskVT = VectorType::get(I8Type, NumElts);
1040 SubVT = VectorType::get(I8Type, NumSubElts);
1041
1042 // The Mask shuffling cost is extract all the elements of the Mask
1043 // and insert each of them Factor times into the wide vector:
1044 //
1045 // E.g. an interleaved group with factor 3:
1046 // %mask = icmp ult <8 x i32> %vec1, %vec2
1047 // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
1048 // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
1049 // The cost is estimated as extract all mask elements from the <8xi1> mask
1050 // vector and insert them factor times into the <24xi1> shuffled mask
1051 // vector.
1052 for (unsigned i = 0; i < NumSubElts; i++)
1053 Cost += static_cast<T *>(this)->getVectorInstrCost(
1054 Instruction::ExtractElement, SubVT, i);
1055
1056 for (unsigned i = 0; i < NumElts; i++)
1057 Cost += static_cast<T *>(this)->getVectorInstrCost(
1058 Instruction::InsertElement, MaskVT, i);
1059
1060 // The Gaps mask is invariant and created outside the loop, therefore the
1061 // cost of creating it is not accounted for here. However if we have both
1062 // a MaskForGaps and some other mask that guards the execution of the
1063 // memory access, we need to account for the cost of And-ing the two masks
1064 // inside the loop.
1065 if (UseMaskForGaps)
1066 Cost += static_cast<T *>(this)->getArithmeticInstrCost(
1067 BinaryOperator::And, MaskVT);
1068
1069 return Cost;
1070 }
1071
1072 /// Get intrinsic cost based on arguments.
1073 unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
1074 ArrayRef<Value *> Args, FastMathFlags FMF,
1075 unsigned VF = 1) {
1076 unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
1077 assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type")(((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type"
) ? static_cast<void> (0) : __assert_fail ("(RetVF == 1 || VF == 1) && \"VF > 1 and RetVF is a vector type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1077, __PRETTY_FUNCTION__))
;
1078 auto *ConcreteTTI = static_cast<T *>(this);
1079
1080 switch (IID) {
1081 default: {
1082 // Assume that we need to scalarize this intrinsic.
1083 SmallVector<Type *, 4> Types;
1084 for (Value *Op : Args) {
1085 Type *OpTy = Op->getType();
1086 assert(VF == 1 || !OpTy->isVectorTy())((VF == 1 || !OpTy->isVectorTy()) ? static_cast<void>
(0) : __assert_fail ("VF == 1 || !OpTy->isVectorTy()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1086, __PRETTY_FUNCTION__))
;
1087 Types.push_back(VF == 1 ? OpTy : VectorType::get(OpTy, VF));
1088 }
1089
1090 if (VF > 1 && !RetTy->isVoidTy())
1091 RetTy = VectorType::get(RetTy, VF);
1092
1093 // Compute the scalarization overhead based on Args for a vector
1094 // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
1095 // CostModel will pass a vector RetTy and VF is 1.
1096 unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
1097 if (RetVF > 1 || VF > 1) {
1098 ScalarizationCost = 0;
1099 if (!RetTy->isVoidTy())
1100 ScalarizationCost += getScalarizationOverhead(RetTy, true, false);
1101 ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
1102 }
1103
1104 return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF,
1105 ScalarizationCost);
1106 }
1107 case Intrinsic::masked_scatter: {
1108 assert(VF == 1 && "Can't vectorize types here.")((VF == 1 && "Can't vectorize types here.") ? static_cast
<void> (0) : __assert_fail ("VF == 1 && \"Can't vectorize types here.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1108, __PRETTY_FUNCTION__))
;
1109 Value *Mask = Args[3];
1110 bool VarMask = !isa<Constant>(Mask);
1111 unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
1112 return ConcreteTTI->getGatherScatterOpCost(
1113 Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment);
1114 }
1115 case Intrinsic::masked_gather: {
1116 assert(VF == 1 && "Can't vectorize types here.")((VF == 1 && "Can't vectorize types here.") ? static_cast
<void> (0) : __assert_fail ("VF == 1 && \"Can't vectorize types here.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1116, __PRETTY_FUNCTION__))
;
1117 Value *Mask = Args[2];
1118 bool VarMask = !isa<Constant>(Mask);
1119 unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
1120 return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy,
1121 Args[0], VarMask, Alignment);
1122 }
1123 case Intrinsic::experimental_vector_reduce_add:
1124 case Intrinsic::experimental_vector_reduce_mul:
1125 case Intrinsic::experimental_vector_reduce_and:
1126 case Intrinsic::experimental_vector_reduce_or:
1127 case Intrinsic::experimental_vector_reduce_xor:
1128 case Intrinsic::experimental_vector_reduce_v2_fadd:
1129 case Intrinsic::experimental_vector_reduce_v2_fmul:
1130 case Intrinsic::experimental_vector_reduce_smax:
1131 case Intrinsic::experimental_vector_reduce_smin:
1132 case Intrinsic::experimental_vector_reduce_fmax:
1133 case Intrinsic::experimental_vector_reduce_fmin:
1134 case Intrinsic::experimental_vector_reduce_umax:
1135 case Intrinsic::experimental_vector_reduce_umin:
1136 return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF);
1137 case Intrinsic::fshl:
1138 case Intrinsic::fshr: {
1139 Value *X = Args[0];
1140 Value *Y = Args[1];
1141 Value *Z = Args[2];
1142 TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW;
1143 TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX);
1144 TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY);
1145 TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ);
1146 TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue;
1147 OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
1148 : TTI::OP_None;
1149 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
1150 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
1151 unsigned Cost = 0;
1152 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy);
1153 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy);
1154 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy,
1155 OpKindX, OpKindZ, OpPropsX);
1156 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
1157 OpKindY, OpKindZ, OpPropsY);
1158 // Non-constant shift amounts requires a modulo.
1159 if (OpKindZ != TTI::OK_UniformConstantValue &&
1160 OpKindZ != TTI::OK_NonUniformConstantValue)
1161 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1162 OpKindZ, OpKindBW, OpPropsZ,
1163 OpPropsBW);
1164 // For non-rotates (X != Y) we must add shift-by-zero handling costs.
1165 if (X != Y) {
1166 Type *CondTy = RetTy->getWithNewBitWidth(1);
1167 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1168 CondTy, nullptr);
1169 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1170 CondTy, nullptr);
1171 }
1172 return Cost;
1173 }
1174 }
1175 }
1176
1177 /// Get intrinsic cost based on argument types.
1178 /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the
1179 /// cost of scalarizing the arguments and the return value will be computed
1180 /// based on types.
1181 unsigned getIntrinsicInstrCost(
1182 Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
1183 unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
1184 auto *ConcreteTTI = static_cast<T *>(this);
1185
1186 SmallVector<unsigned, 2> ISDs;
1187 unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
1188 switch (IID) {
1189 default: {
1190 // Assume that we need to scalarize this intrinsic.
1191 unsigned ScalarizationCost = ScalarizationCostPassed;
1192 unsigned ScalarCalls = 1;
1193 Type *ScalarRetTy = RetTy;
1194 if (RetTy->isVectorTy()) {
1195 if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1196 ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
1197 ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
1198 ScalarRetTy = RetTy->getScalarType();
1199 }
1200 SmallVector<Type *, 4> ScalarTys;
1201 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1202 Type *Ty = Tys[i];
1203 if (Ty->isVectorTy()) {
1204 if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1205 ScalarizationCost += getScalarizationOverhead(Ty, false, true);
1206 ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
1207 Ty = Ty->getScalarType();
1208 }
1209 ScalarTys.push_back(Ty);
1210 }
1211 if (ScalarCalls == 1)
1212 return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
1213
1214 unsigned ScalarCost =
1215 ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF);
1216
1217 return ScalarCalls * ScalarCost + ScalarizationCost;
1218 }
1219 // Look for intrinsics that can be lowered directly or turned into a scalar
1220 // intrinsic call.
1221 case Intrinsic::sqrt:
1222 ISDs.push_back(ISD::FSQRT);
1223 break;
1224 case Intrinsic::sin:
1225 ISDs.push_back(ISD::FSIN);
1226 break;
1227 case Intrinsic::cos:
1228 ISDs.push_back(ISD::FCOS);
1229 break;
1230 case Intrinsic::exp:
1231 ISDs.push_back(ISD::FEXP);
1232 break;
1233 case Intrinsic::exp2:
1234 ISDs.push_back(ISD::FEXP2);
1235 break;
1236 case Intrinsic::log:
1237 ISDs.push_back(ISD::FLOG);
1238 break;
1239 case Intrinsic::log10:
1240 ISDs.push_back(ISD::FLOG10);
1241 break;
1242 case Intrinsic::log2:
1243 ISDs.push_back(ISD::FLOG2);
1244 break;
1245 case Intrinsic::fabs:
1246 ISDs.push_back(ISD::FABS);
1247 break;
1248 case Intrinsic::canonicalize:
1249 ISDs.push_back(ISD::FCANONICALIZE);
1250 break;
1251 case Intrinsic::minnum:
1252 ISDs.push_back(ISD::FMINNUM);
1253 if (FMF.noNaNs())
1254 ISDs.push_back(ISD::FMINIMUM);
1255 break;
1256 case Intrinsic::maxnum:
1257 ISDs.push_back(ISD::FMAXNUM);
1258 if (FMF.noNaNs())
1259 ISDs.push_back(ISD::FMAXIMUM);
1260 break;
1261 case Intrinsic::copysign:
1262 ISDs.push_back(ISD::FCOPYSIGN);
1263 break;
1264 case Intrinsic::floor:
1265 ISDs.push_back(ISD::FFLOOR);
1266 break;
1267 case Intrinsic::ceil:
1268 ISDs.push_back(ISD::FCEIL);
1269 break;
1270 case Intrinsic::trunc:
1271 ISDs.push_back(ISD::FTRUNC);
1272 break;
1273 case Intrinsic::nearbyint:
1274 ISDs.push_back(ISD::FNEARBYINT);
1275 break;
1276 case Intrinsic::rint:
1277 ISDs.push_back(ISD::FRINT);
1278 break;
1279 case Intrinsic::round:
1280 ISDs.push_back(ISD::FROUND);
1281 break;
1282 case Intrinsic::pow:
1283 ISDs.push_back(ISD::FPOW);
1284 break;
1285 case Intrinsic::fma:
1286 ISDs.push_back(ISD::FMA);
1287 break;
1288 case Intrinsic::fmuladd:
1289 ISDs.push_back(ISD::FMA);
1290 break;
1291 case Intrinsic::experimental_constrained_fmuladd:
1292 ISDs.push_back(ISD::STRICT_FMA);
1293 break;
1294 // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1295 case Intrinsic::lifetime_start:
1296 case Intrinsic::lifetime_end:
1297 case Intrinsic::sideeffect:
1298 return 0;
1299 case Intrinsic::masked_store:
1300 return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0,
1301 0);
1302 case Intrinsic::masked_load:
1303 return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
1304 case Intrinsic::experimental_vector_reduce_add:
1305 return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0],
1306 /*IsPairwiseForm=*/false);
1307 case Intrinsic::experimental_vector_reduce_mul:
1308 return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0],
1309 /*IsPairwiseForm=*/false);
1310 case Intrinsic::experimental_vector_reduce_and:
1311 return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0],
1312 /*IsPairwiseForm=*/false);
1313 case Intrinsic::experimental_vector_reduce_or:
1314 return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0],
1315 /*IsPairwiseForm=*/false);
1316 case Intrinsic::experimental_vector_reduce_xor:
1317 return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0],
1318 /*IsPairwiseForm=*/false);
1319 case Intrinsic::experimental_vector_reduce_v2_fadd:
1320 return ConcreteTTI->getArithmeticReductionCost(
1321 Instruction::FAdd, Tys[0],
1322 /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1323 // reductions.
1324 case Intrinsic::experimental_vector_reduce_v2_fmul:
1325 return ConcreteTTI->getArithmeticReductionCost(
1326 Instruction::FMul, Tys[0],
1327 /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1328 // reductions.
1329 case Intrinsic::experimental_vector_reduce_smax:
1330 case Intrinsic::experimental_vector_reduce_smin:
1331 case Intrinsic::experimental_vector_reduce_fmax:
1332 case Intrinsic::experimental_vector_reduce_fmin:
1333 return ConcreteTTI->getMinMaxReductionCost(
1334 Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1335 /*IsUnsigned=*/true);
1336 case Intrinsic::experimental_vector_reduce_umax:
1337 case Intrinsic::experimental_vector_reduce_umin:
1338 return ConcreteTTI->getMinMaxReductionCost(
1339 Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1340 /*IsUnsigned=*/false);
1341 case Intrinsic::sadd_sat:
1342 case Intrinsic::ssub_sat: {
1343 Type *CondTy = RetTy->getWithNewBitWidth(1);
1344
1345 Type *OpTy = StructType::create({RetTy, CondTy});
1346 Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
1347 ? Intrinsic::sadd_with_overflow
1348 : Intrinsic::ssub_with_overflow;
1349
1350 // SatMax -> Overflow && SumDiff < 0
1351 // SatMin -> Overflow && SumDiff >= 0
1352 unsigned Cost = 0;
1353 Cost += ConcreteTTI->getIntrinsicInstrCost(
1354 OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1355 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1356 CondTy, nullptr);
1357 Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1358 CondTy, nullptr);
1359 return Cost;
1360 }
1361 case Intrinsic::uadd_sat:
1362 case Intrinsic::usub_sat: {
1363 Type *CondTy = RetTy->getWithNewBitWidth(1);
1364
1365 Type *OpTy = StructType::create({RetTy, CondTy});
1366 Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
1367 ? Intrinsic::uadd_with_overflow
1368 : Intrinsic::usub_with_overflow;
1369
1370 unsigned Cost = 0;
1371 Cost += ConcreteTTI->getIntrinsicInstrCost(
1372 OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1373 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1374 CondTy, nullptr);
1375 return Cost;
1376 }
1377 case Intrinsic::smul_fix:
1378 case Intrinsic::umul_fix: {
1379 unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
1380 Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
1381
1382 unsigned ExtOp =
1383 IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1384
1385 unsigned Cost = 0;
1386 Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy);
1387 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1388 Cost +=
1389 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy);
1390 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy,
1391 TTI::OK_AnyValue,
1392 TTI::OK_UniformConstantValue);
1393 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy,
1394 TTI::OK_AnyValue,
1395 TTI::OK_UniformConstantValue);
1396 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy);
1397 return Cost;
1398 }
1399 case Intrinsic::sadd_with_overflow:
1400 case Intrinsic::ssub_with_overflow: {
1401 Type *SumTy = RetTy->getContainedType(0);
1402 Type *OverflowTy = RetTy->getContainedType(1);
1403 unsigned Opcode = IID == Intrinsic::sadd_with_overflow
1404 ? BinaryOperator::Add
1405 : BinaryOperator::Sub;
1406
1407 // LHSSign -> LHS >= 0
1408 // RHSSign -> RHS >= 0
1409 // SumSign -> Sum >= 0
1410 //
1411 // Add:
1412 // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
1413 // Sub:
1414 // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
1415 unsigned Cost = 0;
1416 Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1417 Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1418 OverflowTy, nullptr);
1419 Cost += 2 * ConcreteTTI->getCmpSelInstrCost(
1420 BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr);
1421 Cost +=
1422 ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy);
1423 return Cost;
1424 }
1425 case Intrinsic::uadd_with_overflow:
1426 case Intrinsic::usub_with_overflow: {
1427 Type *SumTy = RetTy->getContainedType(0);
1428 Type *OverflowTy = RetTy->getContainedType(1);
1429 unsigned Opcode = IID == Intrinsic::uadd_with_overflow
1430 ? BinaryOperator::Add
1431 : BinaryOperator::Sub;
1432
1433 unsigned Cost = 0;
1434 Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1435 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1436 OverflowTy, nullptr);
1437 return Cost;
1438 }
1439 case Intrinsic::smul_with_overflow:
1440 case Intrinsic::umul_with_overflow: {
1441 Type *MulTy = RetTy->getContainedType(0);
1442 Type *OverflowTy = RetTy->getContainedType(1);
1443 unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
1444 Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
1445
1446 unsigned ExtOp =
1447 IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1448
1449 unsigned Cost = 0;
1450 Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy);
1451 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1452 Cost +=
1453 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy);
1454 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy,
1455 TTI::OK_AnyValue,
1456 TTI::OK_UniformConstantValue);
1457
1458 if (IID == Intrinsic::smul_with_overflow)
1459 Cost += ConcreteTTI->getArithmeticInstrCost(
1460 Instruction::AShr, MulTy, TTI::OK_AnyValue,
1461 TTI::OK_UniformConstantValue);
1462
1463 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy,
1464 OverflowTy, nullptr);
1465 return Cost;
1466 }
1467 case Intrinsic::ctpop:
1468 ISDs.push_back(ISD::CTPOP);
1469 // In case of legalization use TCC_Expensive. This is cheaper than a
1470 // library call but still not a cheap instruction.
1471 SingleCallCost = TargetTransformInfo::TCC_Expensive;
1472 break;
1473 // FIXME: ctlz, cttz, ...
1474 case Intrinsic::bswap:
1475 ISDs.push_back(ISD::BSWAP);
1476 break;
1477 case Intrinsic::bitreverse:
1478 ISDs.push_back(ISD::BITREVERSE);
1479 break;
1480 }
1481
1482 const TargetLoweringBase *TLI = getTLI();
1483 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
1484
1485 SmallVector<unsigned, 2> LegalCost;
1486 SmallVector<unsigned, 2> CustomCost;
1487 for (unsigned ISD : ISDs) {
1488 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
1489 if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
1490 TLI->isFAbsFree(LT.second)) {
1491 return 0;
1492 }
1493
1494 // The operation is legal. Assume it costs 1.
1495 // If the type is split to multiple registers, assume that there is some
1496 // overhead to this.
1497 // TODO: Once we have extract/insert subvector cost we need to use them.
1498 if (LT.first > 1)
1499 LegalCost.push_back(LT.first * 2);
1500 else
1501 LegalCost.push_back(LT.first * 1);
1502 } else if (!TLI->isOperationExpand(ISD, LT.second)) {
1503 // If the operation is custom lowered then assume
1504 // that the code is twice as expensive.
1505 CustomCost.push_back(LT.first * 2);
1506 }
1507 }
1508
1509 auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
1510 if (MinLegalCostI != LegalCost.end())
1511 return *MinLegalCostI;
1512
1513 auto MinCustomCostI =
1514 std::min_element(CustomCost.begin(), CustomCost.end());
1515 if (MinCustomCostI != CustomCost.end())
1516 return *MinCustomCostI;
1517
1518 // If we can't lower fmuladd into an FMA estimate the cost as a floating
1519 // point mul followed by an add.
1520 if (IID == Intrinsic::fmuladd)
1521 return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
1522 ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
1523 if (IID == Intrinsic::experimental_constrained_fmuladd)
1524 return ConcreteTTI->getIntrinsicCost(
1525 Intrinsic::experimental_constrained_fmul, RetTy, Tys,
1526 nullptr) +
1527 ConcreteTTI->getIntrinsicCost(
1528 Intrinsic::experimental_constrained_fadd, RetTy, Tys, nullptr);
1529
1530 // Else, assume that we need to scalarize this intrinsic. For math builtins
1531 // this will emit a costly libcall, adding call overhead and spills. Make it
1532 // very expensive.
1533 if (RetTy->isVectorTy()) {
1534 unsigned ScalarizationCost =
1535 ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max())
1536 ? ScalarizationCostPassed
1537 : getScalarizationOverhead(RetTy, true, false));
1538 unsigned ScalarCalls = RetTy->getVectorNumElements();
1539 SmallVector<Type *, 4> ScalarTys;
1540 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1541 Type *Ty = Tys[i];
1542 if (Ty->isVectorTy())
1543 Ty = Ty->getScalarType();
1544 ScalarTys.push_back(Ty);
1545 }
1546 unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost(
1547 IID, RetTy->getScalarType(), ScalarTys, FMF);
1548 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1549 if (Tys[i]->isVectorTy()) {
1550 if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1551 ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
1552 ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
1553 }
1554 }
1555
1556 return ScalarCalls * ScalarCost + ScalarizationCost;
1557 }
1558
1559 // This is going to be turned into a library call, make it expensive.
1560 return SingleCallCost;
1561 }
1562
1563 /// Compute a cost of the given call instruction.
1564 ///
1565 /// Compute the cost of calling function F with return type RetTy and
1566 /// argument types Tys. F might be nullptr, in this case the cost of an
1567 /// arbitrary call with the specified signature will be returned.
1568 /// This is used, for instance, when we estimate call of a vector
1569 /// counterpart of the given function.
1570 /// \param F Called function, might be nullptr.
1571 /// \param RetTy Return value types.
1572 /// \param Tys Argument types.
1573 /// \returns The cost of Call instruction.
1574 unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) {
1575 return 10;
1576 }
1577
1578 unsigned getNumberOfParts(Type *Tp) {
1579 std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
1580 return LT.first;
1581 }
1582
1583 unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *,
1584 const SCEV *) {
1585 return 0;
1586 }
1587
1588 /// Try to calculate arithmetic and shuffle op costs for reduction operations.
1589 /// We're assuming that reduction operation are performing the following way:
1590 /// 1. Non-pairwise reduction
1591 /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1592 /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
1593 /// \----------------v-------------/ \----------v------------/
1594 /// n/2 elements n/2 elements
1595 /// %red1 = op <n x t> %val, <n x t> val1
1596 /// After this operation we have a vector %red1 where only the first n/2
1597 /// elements are meaningful, the second n/2 elements are undefined and can be
1598 /// dropped. All other operations are actually working with the vector of
1599 /// length n/2, not n, though the real vector length is still n.
1600 /// %val2 = shufflevector<n x t> %red1, <n x t> %undef,
1601 /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
1602 /// \----------------v-------------/ \----------v------------/
1603 /// n/4 elements 3*n/4 elements
1604 /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of
1605 /// length n/2, the resulting vector has length n/4 etc.
1606 /// 2. Pairwise reduction:
1607 /// Everything is the same except for an additional shuffle operation which
1608 /// is used to produce operands for pairwise kind of reductions.
1609 /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1610 /// <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef>
1611 /// \-------------v----------/ \----------v------------/
1612 /// n/2 elements n/2 elements
1613 /// %val2 = shufflevector<n x t> %val, <n x t> %undef,
1614 /// <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef>
1615 /// \-------------v----------/ \----------v------------/
1616 /// n/2 elements n/2 elements
1617 /// %red1 = op <n x t> %val1, <n x t> val2
1618 /// Again, the operation is performed on <n x t> vector, but the resulting
1619 /// vector %red1 is <n/2 x t> vector.
1620 ///
1621 /// The cost model should take into account that the actual length of the
1622 /// vector is reduced on each iteration.
1623 unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1624 bool IsPairwise) {
1625 assert(Ty->isVectorTy() && "Expect a vector type")((Ty->isVectorTy() && "Expect a vector type") ? static_cast
<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1625, __PRETTY_FUNCTION__))
;
1626 Type *ScalarTy = Ty->getVectorElementType();
1627 unsigned NumVecElts = Ty->getVectorNumElements();
1628 unsigned NumReduxLevels = Log2_32(NumVecElts);
1629 unsigned ArithCost = 0;
1630 unsigned ShuffleCost = 0;
1631 auto *ConcreteTTI = static_cast<T *>(this);
1632 std::pair<unsigned, MVT> LT =
1633 ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1634 unsigned LongVectorCount = 0;
1635 unsigned MVTLen =
1636 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1637 while (NumVecElts > MVTLen) {
1638 NumVecElts /= 2;
1639 Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1640 // Assume the pairwise shuffles add a cost.
1641 ShuffleCost += (IsPairwise + 1) *
1642 ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1643 NumVecElts, SubTy);
1644 ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy);
1645 Ty = SubTy;
1646 ++LongVectorCount;
1647 }
1648
1649 NumReduxLevels -= LongVectorCount;
1650
1651 // The minimal length of the vector is limited by the real length of vector
1652 // operations performed on the current platform. That's why several final
1653 // reduction operations are performed on the vectors with the same
1654 // architecture-dependent length.
1655
1656 // Non pairwise reductions need one shuffle per reduction level. Pairwise
1657 // reductions need two shuffles on every level, but the last one. On that
1658 // level one of the shuffles is <0, u, u, ...> which is identity.
1659 unsigned NumShuffles = NumReduxLevels;
1660 if (IsPairwise && NumReduxLevels >= 1)
1661 NumShuffles += NumReduxLevels - 1;
1662 ShuffleCost += NumShuffles *
1663 ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1664 0, Ty);
1665 ArithCost += NumReduxLevels *
1666 ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
1667 return ShuffleCost + ArithCost +
1668 ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1669 }
1670
1671 /// Try to calculate op costs for min/max reduction operations.
1672 /// \param CondTy Conditional type for the Select instruction.
1673 unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise,
1674 bool) {
1675 assert(Ty->isVectorTy() && "Expect a vector type")((Ty->isVectorTy() && "Expect a vector type") ? static_cast
<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1675, __PRETTY_FUNCTION__))
;
1676 Type *ScalarTy = Ty->getVectorElementType();
1677 Type *ScalarCondTy = CondTy->getVectorElementType();
1678 unsigned NumVecElts = Ty->getVectorNumElements();
1679 unsigned NumReduxLevels = Log2_32(NumVecElts);
1680 unsigned CmpOpcode;
1681 if (Ty->isFPOrFPVectorTy()) {
1682 CmpOpcode = Instruction::FCmp;
1683 } else {
1684 assert(Ty->isIntOrIntVectorTy() &&((Ty->isIntOrIntVectorTy() && "expecting floating point or integer type for min/max reduction"
) ? static_cast<void> (0) : __assert_fail ("Ty->isIntOrIntVectorTy() && \"expecting floating point or integer type for min/max reduction\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1685, __PRETTY_FUNCTION__))
1685 "expecting floating point or integer type for min/max reduction")((Ty->isIntOrIntVectorTy() && "expecting floating point or integer type for min/max reduction"
) ? static_cast<void> (0) : __assert_fail ("Ty->isIntOrIntVectorTy() && \"expecting floating point or integer type for min/max reduction\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1685, __PRETTY_FUNCTION__))
;
1686 CmpOpcode = Instruction::ICmp;
1687 }
1688 unsigned MinMaxCost = 0;
1689 unsigned ShuffleCost = 0;
1690 auto *ConcreteTTI = static_cast<T *>(this);
1691 std::pair<unsigned, MVT> LT =
1692 ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1693 unsigned LongVectorCount = 0;
1694 unsigned MVTLen =
1695 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1696 while (NumVecElts > MVTLen) {
1697 NumVecElts /= 2;
1698 Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1699 CondTy = VectorType::get(ScalarCondTy, NumVecElts);
1700
1701 // Assume the pairwise shuffles add a cost.
1702 ShuffleCost += (IsPairwise + 1) *
1703 ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1704 NumVecElts, SubTy);
1705 MinMaxCost +=
1706 ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) +
1707 ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
1708 nullptr);
1709 Ty = SubTy;
1710 ++LongVectorCount;
1711 }
1712
1713 NumReduxLevels -= LongVectorCount;
1714
1715 // The minimal length of the vector is limited by the real length of vector
1716 // operations performed on the current platform. That's why several final
1717 // reduction opertions are perfomed on the vectors with the same
1718 // architecture-dependent length.
1719
1720 // Non pairwise reductions need one shuffle per reduction level. Pairwise
1721 // reductions need two shuffles on every level, but the last one. On that
1722 // level one of the shuffles is <0, u, u, ...> which is identity.
1723 unsigned NumShuffles = NumReduxLevels;
1724 if (IsPairwise && NumReduxLevels >= 1)
1725 NumShuffles += NumReduxLevels - 1;
1726 ShuffleCost += NumShuffles *
1727 ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1728 0, Ty);
1729 MinMaxCost +=
1730 NumReduxLevels *
1731 (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
1732 ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
1733 nullptr));
1734 // The last min/max should be in vector registers and we counted it above.
1735 // So just need a single extractelement.
1736 return ShuffleCost + MinMaxCost +
1737 ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1738 }
1739
1740 unsigned getVectorSplitCost() { return 1; }
1741
1742 /// @}
1743};
1744
1745/// Concrete BasicTTIImpl that can be used if no further customization
1746/// is needed.
1747class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
1748 using BaseT = BasicTTIImplBase<BasicTTIImpl>;
1749
1750 friend class BasicTTIImplBase<BasicTTIImpl>;
1751
1752 const TargetSubtargetInfo *ST;
1753 const TargetLoweringBase *TLI;
1754
1755 const TargetSubtargetInfo *getST() const { return ST; }
1756 const TargetLoweringBase *getTLI() const { return TLI; }
1757
1758public:
1759 explicit BasicTTIImpl(const TargetMachine *TM, const Function &F);
1760};
1761
1762} // end namespace llvm
1763
1764#endif // LLVM_CODEGEN_BASICTTIIMPL_H