Bug Summary

File:llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Warning:line 76, column 25
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AMDGPUTargetTransformInfo.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/build-llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/build-llvm/include -I /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/build-llvm/lib/Target/AMDGPU -fdebug-prefix-map=/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2019-12-11-181444-25759-1 -x c++ /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

1//===- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// This file implements a TargetTransformInfo analysis pass specific to the
11// AMDGPU target machine. It uses the target's detailed information to provide
12// more precise answers to certain TTI queries, while letting the target
13// independent and default TTI implementations handle the rest.
14//
15//===----------------------------------------------------------------------===//
16
17#include "AMDGPUTargetTransformInfo.h"
18#include "AMDGPUSubtarget.h"
19#include "Utils/AMDGPUBaseInfo.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/Analysis/LoopInfo.h"
22#include "llvm/Analysis/TargetTransformInfo.h"
23#include "llvm/Analysis/ValueTracking.h"
24#include "llvm/CodeGen/ISDOpcodes.h"
25#include "llvm/CodeGen/ValueTypes.h"
26#include "llvm/IR/Argument.h"
27#include "llvm/IR/Attributes.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/CallingConv.h"
30#include "llvm/IR/DataLayout.h"
31#include "llvm/IR/DerivedTypes.h"
32#include "llvm/IR/Function.h"
33#include "llvm/IR/Instruction.h"
34#include "llvm/IR/Instructions.h"
35#include "llvm/IR/IntrinsicInst.h"
36#include "llvm/IR/Module.h"
37#include "llvm/IR/PatternMatch.h"
38#include "llvm/IR/Type.h"
39#include "llvm/IR/Value.h"
40#include "llvm/MC/SubtargetFeature.h"
41#include "llvm/Support/Casting.h"
42#include "llvm/Support/CommandLine.h"
43#include "llvm/Support/Debug.h"
44#include "llvm/Support/ErrorHandling.h"
45#include "llvm/Support/MachineValueType.h"
46#include "llvm/Support/raw_ostream.h"
47#include "llvm/Target/TargetMachine.h"
48#include <algorithm>
49#include <cassert>
50#include <limits>
51#include <utility>
52
53using namespace llvm;
54
55#define DEBUG_TYPE"AMDGPUtti" "AMDGPUtti"
56
57static cl::opt<unsigned> UnrollThresholdPrivate(
58 "amdgpu-unroll-threshold-private",
59 cl::desc("Unroll threshold for AMDGPU if private memory used in a loop"),
60 cl::init(2000), cl::Hidden);
61
62static cl::opt<unsigned> UnrollThresholdLocal(
63 "amdgpu-unroll-threshold-local",
64 cl::desc("Unroll threshold for AMDGPU if local memory used in a loop"),
65 cl::init(1000), cl::Hidden);
66
67static cl::opt<unsigned> UnrollThresholdIf(
68 "amdgpu-unroll-threshold-if",
69 cl::desc("Unroll threshold increment for AMDGPU for each if statement inside loop"),
70 cl::init(150), cl::Hidden);
71
72static bool dependsOnLocalPhi(const Loop *L, const Value *Cond,
73 unsigned Depth = 0) {
74 const Instruction *I = dyn_cast<Instruction>(Cond);
75 if (!I)
76 return false;
77
78 for (const Value *V : I->operand_values()) {
79 if (!L->contains(I))
80 continue;
81 if (const PHINode *PHI = dyn_cast<PHINode>(V)) {
82 if (llvm::none_of(L->getSubLoops(), [PHI](const Loop* SubLoop) {
83 return SubLoop->contains(PHI); }))
84 return true;
85 } else if (Depth < 10 && dependsOnLocalPhi(L, V, Depth+1))
86 return true;
87 }
88 return false;
89}
90
91void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
92 TTI::UnrollingPreferences &UP) {
93 UP.Threshold = 300; // Twice the default.
94 UP.MaxCount = std::numeric_limits<unsigned>::max();
95 UP.Partial = true;
96
97 // TODO: Do we want runtime unrolling?
98
99 // Maximum alloca size than can fit registers. Reserve 16 registers.
100 const unsigned MaxAlloca = (256 - 16) * 4;
101 unsigned ThresholdPrivate = UnrollThresholdPrivate;
102 unsigned ThresholdLocal = UnrollThresholdLocal;
103 unsigned MaxBoost = std::max(ThresholdPrivate, ThresholdLocal);
104 for (const BasicBlock *BB : L->getBlocks()) {
105 const DataLayout &DL = BB->getModule()->getDataLayout();
106 unsigned LocalGEPsSeen = 0;
107
108 if (llvm::any_of(L->getSubLoops(), [BB](const Loop* SubLoop) {
109 return SubLoop->contains(BB); }))
110 continue; // Block belongs to an inner loop.
111
112 for (const Instruction &I : *BB) {
113 // Unroll a loop which contains an "if" statement whose condition
114 // defined by a PHI belonging to the loop. This may help to eliminate
115 // if region and potentially even PHI itself, saving on both divergence
116 // and registers used for the PHI.
117 // Add a small bonus for each of such "if" statements.
118 if (const BranchInst *Br = dyn_cast<BranchInst>(&I)) {
119 if (UP.Threshold < MaxBoost && Br->isConditional()) {
120 BasicBlock *Succ0 = Br->getSuccessor(0);
121 BasicBlock *Succ1 = Br->getSuccessor(1);
122 if ((L->contains(Succ0) && L->isLoopExiting(Succ0)) ||
123 (L->contains(Succ1) && L->isLoopExiting(Succ1)))
124 continue;
125 if (dependsOnLocalPhi(L, Br->getCondition())) {
126 UP.Threshold += UnrollThresholdIf;
127 LLVM_DEBUG(dbgs() << "Set unroll threshold " << UP.Thresholddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("AMDGPUtti")) { dbgs() << "Set unroll threshold " <<
UP.Threshold << " for loop:\n" << *L << " due to "
<< *Br << '\n'; } } while (false)
128 << " for loop:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("AMDGPUtti")) { dbgs() << "Set unroll threshold " <<
UP.Threshold << " for loop:\n" << *L << " due to "
<< *Br << '\n'; } } while (false)
129 << *L << " due to " << *Br << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("AMDGPUtti")) { dbgs() << "Set unroll threshold " <<
UP.Threshold << " for loop:\n" << *L << " due to "
<< *Br << '\n'; } } while (false)
;
130 if (UP.Threshold >= MaxBoost)
131 return;
132 }
133 }
134 continue;
135 }
136
137 const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I);
138 if (!GEP)
139 continue;
140
141 unsigned AS = GEP->getAddressSpace();
142 unsigned Threshold = 0;
143 if (AS == AMDGPUAS::PRIVATE_ADDRESS)
144 Threshold = ThresholdPrivate;
145 else if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS)
146 Threshold = ThresholdLocal;
147 else
148 continue;
149
150 if (UP.Threshold >= Threshold)
151 continue;
152
153 if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
154 const Value *Ptr = GEP->getPointerOperand();
155 const AllocaInst *Alloca =
156 dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr, DL));
157 if (!Alloca || !Alloca->isStaticAlloca())
158 continue;
159 Type *Ty = Alloca->getAllocatedType();
160 unsigned AllocaSize = Ty->isSized() ? DL.getTypeAllocSize(Ty) : 0;
161 if (AllocaSize > MaxAlloca)
162 continue;
163 } else if (AS == AMDGPUAS::LOCAL_ADDRESS ||
164 AS == AMDGPUAS::REGION_ADDRESS) {
165 LocalGEPsSeen++;
166 // Inhibit unroll for local memory if we have seen addressing not to
167 // a variable, most likely we will be unable to combine it.
168 // Do not unroll too deep inner loops for local memory to give a chance
169 // to unroll an outer loop for a more important reason.
170 if (LocalGEPsSeen > 1 || L->getLoopDepth() > 2 ||
171 (!isa<GlobalVariable>(GEP->getPointerOperand()) &&
172 !isa<Argument>(GEP->getPointerOperand())))
173 continue;
174 }
175
176 // Check if GEP depends on a value defined by this loop itself.
177 bool HasLoopDef = false;
178 for (const Value *Op : GEP->operands()) {
179 const Instruction *Inst = dyn_cast<Instruction>(Op);
180 if (!Inst || L->isLoopInvariant(Op))
181 continue;
182
183 if (llvm::any_of(L->getSubLoops(), [Inst](const Loop* SubLoop) {
184 return SubLoop->contains(Inst); }))
185 continue;
186 HasLoopDef = true;
187 break;
188 }
189 if (!HasLoopDef)
190 continue;
191
192 // We want to do whatever we can to limit the number of alloca
193 // instructions that make it through to the code generator. allocas
194 // require us to use indirect addressing, which is slow and prone to
195 // compiler bugs. If this loop does an address calculation on an
196 // alloca ptr, then we want to use a higher than normal loop unroll
197 // threshold. This will give SROA a better chance to eliminate these
198 // allocas.
199 //
200 // We also want to have more unrolling for local memory to let ds
201 // instructions with different offsets combine.
202 //
203 // Don't use the maximum allowed value here as it will make some
204 // programs way too big.
205 UP.Threshold = Threshold;
206 LLVM_DEBUG(dbgs() << "Set unroll threshold " << Thresholddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("AMDGPUtti")) { dbgs() << "Set unroll threshold " <<
Threshold << " for loop:\n" << *L << " due to "
<< *GEP << '\n'; } } while (false)
207 << " for loop:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("AMDGPUtti")) { dbgs() << "Set unroll threshold " <<
Threshold << " for loop:\n" << *L << " due to "
<< *GEP << '\n'; } } while (false)
208 << *L << " due to " << *GEP << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("AMDGPUtti")) { dbgs() << "Set unroll threshold " <<
Threshold << " for loop:\n" << *L << " due to "
<< *GEP << '\n'; } } while (false)
;
209 if (UP.Threshold >= MaxBoost)
210 return;
211 }
212 }
213}
214
215unsigned GCNTTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
216 // The concept of vector registers doesn't really exist. Some packed vector
217 // operations operate on the normal 32-bit registers.
218 return 256;
219}
220
221unsigned GCNTTIImpl::getNumberOfRegisters(bool Vec) const {
222 // This is really the number of registers to fill when vectorizing /
223 // interleaving loops, so we lie to avoid trying to use all registers.
224 return getHardwareNumberOfRegisters(Vec) >> 3;
225}
226
227unsigned GCNTTIImpl::getRegisterBitWidth(bool Vector) const {
228 return 32;
229}
230
231unsigned GCNTTIImpl::getMinVectorRegisterBitWidth() const {
232 return 32;
233}
234
235unsigned GCNTTIImpl::getLoadVectorFactor(unsigned VF, unsigned LoadSize,
236 unsigned ChainSizeInBytes,
237 VectorType *VecTy) const {
238 unsigned VecRegBitWidth = VF * LoadSize;
239 if (VecRegBitWidth > 128 && VecTy->getScalarSizeInBits() < 32)
240 // TODO: Support element-size less than 32bit?
241 return 128 / LoadSize;
242
243 return VF;
244}
245
246unsigned GCNTTIImpl::getStoreVectorFactor(unsigned VF, unsigned StoreSize,
247 unsigned ChainSizeInBytes,
248 VectorType *VecTy) const {
249 unsigned VecRegBitWidth = VF * StoreSize;
250 if (VecRegBitWidth > 128)
251 return 128 / StoreSize;
252
253 return VF;
254}
255
256unsigned GCNTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
257 if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
258 AddrSpace == AMDGPUAS::CONSTANT_ADDRESS ||
259 AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
260 AddrSpace == AMDGPUAS::BUFFER_FAT_POINTER) {
261 return 512;
262 }
263
264 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS ||
265 AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
266 AddrSpace == AMDGPUAS::REGION_ADDRESS)
267 return 128;
268
269 if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
270 return 8 * ST->getMaxPrivateElementSize();
271
272 llvm_unreachable("unhandled address space")::llvm::llvm_unreachable_internal("unhandled address space", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp"
, 272)
;
273}
274
275bool GCNTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
276 unsigned Alignment,
277 unsigned AddrSpace) const {
278 // We allow vectorization of flat stores, even though we may need to decompose
279 // them later if they may access private memory. We don't have enough context
280 // here, and legalization can handle it.
281 if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) {
282 return (Alignment >= 4 || ST->hasUnalignedScratchAccess()) &&
283 ChainSizeInBytes <= ST->getMaxPrivateElementSize();
284 }
285 return true;
286}
287
288bool GCNTTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
289 unsigned Alignment,
290 unsigned AddrSpace) const {
291 return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
292}
293
294bool GCNTTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
295 unsigned Alignment,
296 unsigned AddrSpace) const {
297 return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
298}
299
300unsigned GCNTTIImpl::getMaxInterleaveFactor(unsigned VF) {
301 // Disable unrolling if the loop is not vectorized.
302 // TODO: Enable this again.
303 if (VF == 1)
304 return 1;
305
306 return 8;
307}
308
309bool GCNTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
310 MemIntrinsicInfo &Info) const {
311 switch (Inst->getIntrinsicID()) {
312 case Intrinsic::amdgcn_atomic_inc:
313 case Intrinsic::amdgcn_atomic_dec:
314 case Intrinsic::amdgcn_ds_ordered_add:
315 case Intrinsic::amdgcn_ds_ordered_swap:
316 case Intrinsic::amdgcn_ds_fadd:
317 case Intrinsic::amdgcn_ds_fmin:
318 case Intrinsic::amdgcn_ds_fmax: {
319 auto *Ordering = dyn_cast<ConstantInt>(Inst->getArgOperand(2));
320 auto *Volatile = dyn_cast<ConstantInt>(Inst->getArgOperand(4));
321 if (!Ordering || !Volatile)
322 return false; // Invalid.
323
324 unsigned OrderingVal = Ordering->getZExtValue();
325 if (OrderingVal > static_cast<unsigned>(AtomicOrdering::SequentiallyConsistent))
326 return false;
327
328 Info.PtrVal = Inst->getArgOperand(0);
329 Info.Ordering = static_cast<AtomicOrdering>(OrderingVal);
330 Info.ReadMem = true;
331 Info.WriteMem = true;
332 Info.IsVolatile = !Volatile->isNullValue();
333 return true;
334 }
335 default:
336 return false;
337 }
338}
339
340int GCNTTIImpl::getArithmeticInstrCost(
341 unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
342 TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
343 TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args ) {
344 EVT OrigTy = TLI->getValueType(DL, Ty);
345 if (!OrigTy.isSimple()) {
346 return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
347 Opd1PropInfo, Opd2PropInfo);
348 }
349
350 // Legalize the type.
351 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
352 int ISD = TLI->InstructionOpcodeToISD(Opcode);
353
354 // Because we don't have any legal vector operations, but the legal types, we
355 // need to account for split vectors.
356 unsigned NElts = LT.second.isVector() ?
357 LT.second.getVectorNumElements() : 1;
358
359 MVT::SimpleValueType SLT = LT.second.getScalarType().SimpleTy;
360
361 switch (ISD) {
362 case ISD::SHL:
363 case ISD::SRL:
364 case ISD::SRA:
365 if (SLT == MVT::i64)
366 return get64BitInstrCost() * LT.first * NElts;
367
368 // i32
369 return getFullRateInstrCost() * LT.first * NElts;
370 case ISD::ADD:
371 case ISD::SUB:
372 case ISD::AND:
373 case ISD::OR:
374 case ISD::XOR:
375 if (SLT == MVT::i64){
376 // and, or and xor are typically split into 2 VALU instructions.
377 return 2 * getFullRateInstrCost() * LT.first * NElts;
378 }
379
380 return LT.first * NElts * getFullRateInstrCost();
381 case ISD::MUL: {
382 const int QuarterRateCost = getQuarterRateInstrCost();
383 if (SLT == MVT::i64) {
384 const int FullRateCost = getFullRateInstrCost();
385 return (4 * QuarterRateCost + (2 * 2) * FullRateCost) * LT.first * NElts;
386 }
387
388 // i32
389 return QuarterRateCost * NElts * LT.first;
390 }
391 case ISD::FADD:
392 case ISD::FSUB:
393 case ISD::FMUL:
394 if (SLT == MVT::f64)
395 return LT.first * NElts * get64BitInstrCost();
396
397 if (SLT == MVT::f32 || SLT == MVT::f16)
398 return LT.first * NElts * getFullRateInstrCost();
399 break;
400 case ISD::FDIV:
401 case ISD::FREM:
402 // FIXME: frem should be handled separately. The fdiv in it is most of it,
403 // but the current lowering is also not entirely correct.
404 if (SLT == MVT::f64) {
405 int Cost = 4 * get64BitInstrCost() + 7 * getQuarterRateInstrCost();
406 // Add cost of workaround.
407 if (!ST->hasUsableDivScaleConditionOutput())
408 Cost += 3 * getFullRateInstrCost();
409
410 return LT.first * Cost * NElts;
411 }
412
413 if (!Args.empty() && match(Args[0], PatternMatch::m_FPOne())) {
414 // TODO: This is more complicated, unsafe flags etc.
415 if ((SLT == MVT::f32 && !ST->hasFP32Denormals()) ||
416 (SLT == MVT::f16 && ST->has16BitInsts())) {
417 return LT.first * getQuarterRateInstrCost() * NElts;
418 }
419 }
420
421 if (SLT == MVT::f16 && ST->has16BitInsts()) {
422 // 2 x v_cvt_f32_f16
423 // f32 rcp
424 // f32 fmul
425 // v_cvt_f16_f32
426 // f16 div_fixup
427 int Cost = 4 * getFullRateInstrCost() + 2 * getQuarterRateInstrCost();
428 return LT.first * Cost * NElts;
429 }
430
431 if (SLT == MVT::f32 || SLT == MVT::f16) {
432 int Cost = 7 * getFullRateInstrCost() + 1 * getQuarterRateInstrCost();
433
434 if (!ST->hasFP32Denormals()) {
435 // FP mode switches.
436 Cost += 2 * getFullRateInstrCost();
437 }
438
439 return LT.first * NElts * Cost;
440 }
441 break;
442 default:
443 break;
444 }
445
446 return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
447 Opd1PropInfo, Opd2PropInfo);
448}
449
450unsigned GCNTTIImpl::getCFInstrCost(unsigned Opcode) {
451 // XXX - For some reason this isn't called for switch.
452 switch (Opcode) {
453 case Instruction::Br:
454 case Instruction::Ret:
455 return 10;
456 default:
457 return BaseT::getCFInstrCost(Opcode);
458 }
459}
460
461int GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *Ty,
462 bool IsPairwise) {
463 EVT OrigTy = TLI->getValueType(DL, Ty);
464
465 // Computes cost on targets that have packed math instructions(which support
466 // 16-bit types only).
467 if (IsPairwise ||
468 !ST->hasVOP3PInsts() ||
469 OrigTy.getScalarSizeInBits() != 16)
470 return BaseT::getArithmeticReductionCost(Opcode, Ty, IsPairwise);
471
472 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
473 return LT.first * getFullRateInstrCost();
474}
475
476int GCNTTIImpl::getMinMaxReductionCost(Type *Ty, Type *CondTy,
477 bool IsPairwise,
478 bool IsUnsigned) {
479 EVT OrigTy = TLI->getValueType(DL, Ty);
480
481 // Computes cost on targets that have packed math instructions(which support
482 // 16-bit types only).
483 if (IsPairwise ||
484 !ST->hasVOP3PInsts() ||
485 OrigTy.getScalarSizeInBits() != 16)
486 return BaseT::getMinMaxReductionCost(Ty, CondTy, IsPairwise, IsUnsigned);
487
488 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
489 return LT.first * getHalfRateInstrCost();
490}
491
492int GCNTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
493 unsigned Index) {
494 switch (Opcode) {
495 case Instruction::ExtractElement:
496 case Instruction::InsertElement: {
497 unsigned EltSize
498 = DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType());
499 if (EltSize < 32) {
500 if (EltSize == 16 && Index == 0 && ST->has16BitInsts())
501 return 0;
502 return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
503 }
504
505 // Extracts are just reads of a subregister, so are free. Inserts are
506 // considered free because we don't want to have any cost for scalarizing
507 // operations, and we don't have to copy into a different register class.
508
509 // Dynamic indexing isn't free and is best avoided.
510 return Index == ~0u ? 2 : 0;
511 }
512 default:
513 return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
514 }
515}
516
517
518
519static bool isArgPassedInSGPR(const Argument *A) {
520 const Function *F = A->getParent();
521
522 // Arguments to compute shaders are never a source of divergence.
523 CallingConv::ID CC = F->getCallingConv();
524 switch (CC) {
525 case CallingConv::AMDGPU_KERNEL:
526 case CallingConv::SPIR_KERNEL:
527 return true;
528 case CallingConv::AMDGPU_VS:
529 case CallingConv::AMDGPU_LS:
530 case CallingConv::AMDGPU_HS:
531 case CallingConv::AMDGPU_ES:
532 case CallingConv::AMDGPU_GS:
533 case CallingConv::AMDGPU_PS:
534 case CallingConv::AMDGPU_CS:
535 // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
536 // Everything else is in VGPRs.
537 return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
538 F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
539 default:
540 // TODO: Should calls support inreg for SGPR inputs?
541 return false;
542 }
543}
544
545/// \returns true if the result of the value could potentially be
546/// different across workitems in a wavefront.
547bool GCNTTIImpl::isSourceOfDivergence(const Value *V) const {
548 if (const Argument *A = dyn_cast<Argument>(V))
549 return !isArgPassedInSGPR(A);
550
551 // Loads from the private and flat address spaces are divergent, because
552 // threads can execute the load instruction with the same inputs and get
553 // different results.
554 //
555 // All other loads are not divergent, because if threads issue loads with the
556 // same arguments, they will always get the same result.
557 if (const LoadInst *Load = dyn_cast<LoadInst>(V))
558 return Load->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
559 Load->getPointerAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
560
561 // Atomics are divergent because they are executed sequentially: when an
562 // atomic operation refers to the same address in each thread, then each
563 // thread after the first sees the value written by the previous thread as
564 // original value.
565 if (isa<AtomicRMWInst>(V) || isa<AtomicCmpXchgInst>(V))
566 return true;
567
568 if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V))
569 return AMDGPU::isIntrinsicSourceOfDivergence(Intrinsic->getIntrinsicID());
570
571 // Assume all function calls are a source of divergence.
572 if (isa<CallInst>(V) || isa<InvokeInst>(V))
573 return true;
574
575 return false;
576}
577
578bool GCNTTIImpl::isAlwaysUniform(const Value *V) const {
579 if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) {
580 switch (Intrinsic->getIntrinsicID()) {
581 default:
582 return false;
583 case Intrinsic::amdgcn_readfirstlane:
584 case Intrinsic::amdgcn_readlane:
585 case Intrinsic::amdgcn_icmp:
586 case Intrinsic::amdgcn_fcmp:
587 return true;
588 }
589 }
590 return false;
591}
592
593bool GCNTTIImpl::collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
594 Intrinsic::ID IID) const {
595 switch (IID) {
596 case Intrinsic::amdgcn_atomic_inc:
597 case Intrinsic::amdgcn_atomic_dec:
598 case Intrinsic::amdgcn_ds_fadd:
599 case Intrinsic::amdgcn_ds_fmin:
600 case Intrinsic::amdgcn_ds_fmax:
601 case Intrinsic::amdgcn_is_shared:
602 case Intrinsic::amdgcn_is_private:
603 OpIndexes.push_back(0);
604 return true;
605 default:
606 return false;
607 }
608}
609
610bool GCNTTIImpl::rewriteIntrinsicWithAddressSpace(
611 IntrinsicInst *II, Value *OldV, Value *NewV) const {
612 auto IntrID = II->getIntrinsicID();
613 switch (IntrID) {
614 case Intrinsic::amdgcn_atomic_inc:
615 case Intrinsic::amdgcn_atomic_dec:
616 case Intrinsic::amdgcn_ds_fadd:
617 case Intrinsic::amdgcn_ds_fmin:
618 case Intrinsic::amdgcn_ds_fmax: {
619 const ConstantInt *IsVolatile = cast<ConstantInt>(II->getArgOperand(4));
620 if (!IsVolatile->isZero())
621 return false;
622 Module *M = II->getParent()->getParent()->getParent();
623 Type *DestTy = II->getType();
624 Type *SrcTy = NewV->getType();
625 Function *NewDecl =
626 Intrinsic::getDeclaration(M, II->getIntrinsicID(), {DestTy, SrcTy});
627 II->setArgOperand(0, NewV);
628 II->setCalledFunction(NewDecl);
629 return true;
630 }
631 case Intrinsic::amdgcn_is_shared:
632 case Intrinsic::amdgcn_is_private: {
633 unsigned TrueAS = IntrID == Intrinsic::amdgcn_is_shared ?
634 AMDGPUAS::LOCAL_ADDRESS : AMDGPUAS::PRIVATE_ADDRESS;
635 unsigned NewAS = NewV->getType()->getPointerAddressSpace();
636 LLVMContext &Ctx = NewV->getType()->getContext();
637 ConstantInt *NewVal = (TrueAS == NewAS) ?
638 ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx);
639 II->replaceAllUsesWith(NewVal);
640 II->eraseFromParent();
641 return true;
642 }
643 default:
644 return false;
645 }
646}
647
648unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
649 Type *SubTp) {
650 if (ST->hasVOP3PInsts()) {
651 VectorType *VT = cast<VectorType>(Tp);
652 if (VT->getNumElements() == 2 &&
653 DL.getTypeSizeInBits(VT->getElementType()) == 16) {
654 // With op_sel VOP3P instructions freely can access the low half or high
655 // half of a register, so any swizzle is free.
656
657 switch (Kind) {
658 case TTI::SK_Broadcast:
659 case TTI::SK_Reverse:
660 case TTI::SK_PermuteSingleSrc:
661 return 0;
662 default:
663 break;
664 }
665 }
666 }
667
668 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
669}
670
671bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
672 const Function *Callee) const {
673 const TargetMachine &TM = getTLI()->getTargetMachine();
674 const FeatureBitset &CallerBits =
675 TM.getSubtargetImpl(*Caller)->getFeatureBits();
676 const FeatureBitset &CalleeBits =
677 TM.getSubtargetImpl(*Callee)->getFeatureBits();
678
679 FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
680 FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
681 if ((RealCallerBits & RealCalleeBits) != RealCalleeBits)
682 return false;
683
684 // FIXME: dx10_clamp can just take the caller setting, but there seems to be
685 // no way to support merge for backend defined attributes.
686 AMDGPU::SIModeRegisterDefaults CallerMode(*Caller);
687 AMDGPU::SIModeRegisterDefaults CalleeMode(*Callee);
688 return CallerMode.isInlineCompatible(CalleeMode);
689}
690
691void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
692 TTI::UnrollingPreferences &UP) {
693 CommonTTI.getUnrollingPreferences(L, SE, UP);
694}
695
696unsigned GCNTTIImpl::getUserCost(const User *U,
697 ArrayRef<const Value *> Operands) {
698 const Instruction *I = dyn_cast<Instruction>(U);
1
Assuming 'U' is not a 'Instruction'
699 if (!I
1.1
'I' is null
1.1
'I' is null
1.1
'I' is null
1.1
'I' is null
1.1
'I' is null
1.1
'I' is null
1.1
'I' is null
)
2
Taking true branch
700 return BaseT::getUserCost(U, Operands);
3
Calling 'TargetTransformInfoImplCRTPBase::getUserCost'
701
702 // Estimate different operations to be optimized out
703 switch (I->getOpcode()) {
704 case Instruction::ExtractElement: {
705 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
706 unsigned Idx = -1;
707 if (CI)
708 Idx = CI->getZExtValue();
709 return getVectorInstrCost(I->getOpcode(), I->getOperand(0)->getType(), Idx);
710 }
711 case Instruction::InsertElement: {
712 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
713 unsigned Idx = -1;
714 if (CI)
715 Idx = CI->getZExtValue();
716 return getVectorInstrCost(I->getOpcode(), I->getType(), Idx);
717 }
718 case Instruction::Call: {
719 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
720 SmallVector<Value *, 4> Args(II->arg_operands());
721 FastMathFlags FMF;
722 if (auto *FPMO = dyn_cast<FPMathOperator>(II))
723 FMF = FPMO->getFastMathFlags();
724 return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args,
725 FMF);
726 } else {
727 return BaseT::getUserCost(U, Operands);
728 }
729 }
730 case Instruction::ShuffleVector: {
731 const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
732 Type *Ty = Shuffle->getType();
733 Type *SrcTy = Shuffle->getOperand(0)->getType();
734
735 // TODO: Identify and add costs for insert subvector, etc.
736 int SubIndex;
737 if (Shuffle->isExtractSubvectorMask(SubIndex))
738 return getShuffleCost(TTI::SK_ExtractSubvector, SrcTy, SubIndex, Ty);
739
740 if (Shuffle->changesLength())
741 return BaseT::getUserCost(U, Operands);
742
743 if (Shuffle->isIdentity())
744 return 0;
745
746 if (Shuffle->isReverse())
747 return getShuffleCost(TTI::SK_Reverse, Ty, 0, nullptr);
748
749 if (Shuffle->isSelect())
750 return getShuffleCost(TTI::SK_Select, Ty, 0, nullptr);
751
752 if (Shuffle->isTranspose())
753 return getShuffleCost(TTI::SK_Transpose, Ty, 0, nullptr);
754
755 if (Shuffle->isZeroEltSplat())
756 return getShuffleCost(TTI::SK_Broadcast, Ty, 0, nullptr);
757
758 if (Shuffle->isSingleSource())
759 return getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, 0, nullptr);
760
761 return getShuffleCost(TTI::SK_PermuteTwoSrc, Ty, 0, nullptr);
762 }
763 case Instruction::ZExt:
764 case Instruction::SExt:
765 case Instruction::FPToUI:
766 case Instruction::FPToSI:
767 case Instruction::FPExt:
768 case Instruction::PtrToInt:
769 case Instruction::IntToPtr:
770 case Instruction::SIToFP:
771 case Instruction::UIToFP:
772 case Instruction::Trunc:
773 case Instruction::FPTrunc:
774 case Instruction::BitCast:
775 case Instruction::AddrSpaceCast: {
776 return getCastInstrCost(I->getOpcode(), I->getType(),
777 I->getOperand(0)->getType(), I);
778 }
779 case Instruction::Add:
780 case Instruction::FAdd:
781 case Instruction::Sub:
782 case Instruction::FSub:
783 case Instruction::Mul:
784 case Instruction::FMul:
785 case Instruction::UDiv:
786 case Instruction::SDiv:
787 case Instruction::FDiv:
788 case Instruction::URem:
789 case Instruction::SRem:
790 case Instruction::FRem:
791 case Instruction::Shl:
792 case Instruction::LShr:
793 case Instruction::AShr:
794 case Instruction::And:
795 case Instruction::Or:
796 case Instruction::Xor:
797 case Instruction::FNeg: {
798 return getArithmeticInstrCost(I->getOpcode(), I->getType(),
799 TTI::OK_AnyValue, TTI::OK_AnyValue,
800 TTI::OP_None, TTI::OP_None, Operands);
801 }
802 default:
803 break;
804 }
805
806 return BaseT::getUserCost(U, Operands);
807}
808
809unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
810 return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
811}
812
813unsigned R600TTIImpl::getNumberOfRegisters(bool Vec) const {
814 return getHardwareNumberOfRegisters(Vec);
815}
816
817unsigned R600TTIImpl::getRegisterBitWidth(bool Vector) const {
818 return 32;
819}
820
821unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const {
822 return 32;
823}
824
825unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
826 if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
827 AddrSpace == AMDGPUAS::CONSTANT_ADDRESS)
828 return 128;
829 if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
830 AddrSpace == AMDGPUAS::REGION_ADDRESS)
831 return 64;
832 if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
833 return 32;
834
835 if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS ||
836 AddrSpace == AMDGPUAS::PARAM_I_ADDRESS ||
837 (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&
838 AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))
839 return 128;
840 llvm_unreachable("unhandled address space")::llvm::llvm_unreachable_internal("unhandled address space", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp"
, 840)
;
841}
842
843bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
844 unsigned Alignment,
845 unsigned AddrSpace) const {
846 // We allow vectorization of flat stores, even though we may need to decompose
847 // them later if they may access private memory. We don't have enough context
848 // here, and legalization can handle it.
849 return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS);
850}
851
852bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
853 unsigned Alignment,
854 unsigned AddrSpace) const {
855 return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
856}
857
858bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
859 unsigned Alignment,
860 unsigned AddrSpace) const {
861 return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
862}
863
864unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) {
865 // Disable unrolling if the loop is not vectorized.
866 // TODO: Enable this again.
867 if (VF == 1)
868 return 1;
869
870 return 8;
871}
872
873unsigned R600TTIImpl::getCFInstrCost(unsigned Opcode) {
874 // XXX - For some reason this isn't called for switch.
875 switch (Opcode) {
876 case Instruction::Br:
877 case Instruction::Ret:
878 return 10;
879 default:
880 return BaseT::getCFInstrCost(Opcode);
881 }
882}
883
884int R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
885 unsigned Index) {
886 switch (Opcode) {
887 case Instruction::ExtractElement:
888 case Instruction::InsertElement: {
889 unsigned EltSize
890 = DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType());
891 if (EltSize < 32) {
892 return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
893 }
894
895 // Extracts are just reads of a subregister, so are free. Inserts are
896 // considered free because we don't want to have any cost for scalarizing
897 // operations, and we don't have to copy into a different register class.
898
899 // Dynamic indexing isn't free and is best avoided.
900 return Index == ~0u ? 2 : 0;
901 }
902 default:
903 return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
904 }
905}
906
907void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
908 TTI::UnrollingPreferences &UP) {
909 CommonTTI.getUnrollingPreferences(L, SE, UP);
910}

/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

1//===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file provides helpers for the implementation of
10/// a TargetTransformInfo-conforming class.
11///
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
16
17#include "llvm/Analysis/ScalarEvolutionExpressions.h"
18#include "llvm/Analysis/TargetTransformInfo.h"
19#include "llvm/Analysis/VectorUtils.h"
20#include "llvm/IR/CallSite.h"
21#include "llvm/IR/DataLayout.h"
22#include "llvm/IR/Function.h"
23#include "llvm/IR/GetElementPtrTypeIterator.h"
24#include "llvm/IR/Operator.h"
25#include "llvm/IR/Type.h"
26
27namespace llvm {
28
29/// Base class for use as a mix-in that aids implementing
30/// a TargetTransformInfo-compatible class.
31class TargetTransformInfoImplBase {
32protected:
33 typedef TargetTransformInfo TTI;
34
35 const DataLayout &DL;
36
37 explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {}
38
39public:
40 // Provide value semantics. MSVC requires that we spell all of these out.
41 TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg)
42 : DL(Arg.DL) {}
43 TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {}
44
45 const DataLayout &getDataLayout() const { return DL; }
46
47 unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
48 switch (Opcode) {
47
Control jumps to 'case IntToPtr:' at line 73
49 default:
50 // By default, just classify everything as 'basic'.
51 return TTI::TCC_Basic;
52
53 case Instruction::GetElementPtr:
54 llvm_unreachable("Use getGEPCost for GEP operations!")::llvm::llvm_unreachable_internal("Use getGEPCost for GEP operations!"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 54)
;
55
56 case Instruction::BitCast:
57 assert(OpTy && "Cast instructions must provide the operand type")((OpTy && "Cast instructions must provide the operand type"
) ? static_cast<void> (0) : __assert_fail ("OpTy && \"Cast instructions must provide the operand type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 57, __PRETTY_FUNCTION__))
;
58 if (Ty == OpTy || (Ty->isPointerTy() && OpTy->isPointerTy()))
59 // Identity and pointer-to-pointer casts are free.
60 return TTI::TCC_Free;
61
62 // Otherwise, the default basic cost is used.
63 return TTI::TCC_Basic;
64
65 case Instruction::FDiv:
66 case Instruction::FRem:
67 case Instruction::SDiv:
68 case Instruction::SRem:
69 case Instruction::UDiv:
70 case Instruction::URem:
71 return TTI::TCC_Expensive;
72
73 case Instruction::IntToPtr: {
74 // An inttoptr cast is free so long as the input is a legal integer type
75 // which doesn't contain values outside the range of a pointer.
76 unsigned OpSize = OpTy->getScalarSizeInBits();
48
Called C++ object pointer is null
77 if (DL.isLegalInteger(OpSize) &&
78 OpSize <= DL.getPointerTypeSizeInBits(Ty))
79 return TTI::TCC_Free;
80
81 // Otherwise it's not a no-op.
82 return TTI::TCC_Basic;
83 }
84 case Instruction::PtrToInt: {
85 // A ptrtoint cast is free so long as the result is large enough to store
86 // the pointer, and a legal integer type.
87 unsigned DestSize = Ty->getScalarSizeInBits();
88 if (DL.isLegalInteger(DestSize) &&
89 DestSize >= DL.getPointerTypeSizeInBits(OpTy))
90 return TTI::TCC_Free;
91
92 // Otherwise it's not a no-op.
93 return TTI::TCC_Basic;
94 }
95 case Instruction::Trunc:
96 // trunc to a native type is free (assuming the target has compare and
97 // shift-right of the same width).
98 if (DL.isLegalInteger(DL.getTypeSizeInBits(Ty)))
99 return TTI::TCC_Free;
100
101 return TTI::TCC_Basic;
102 }
103 }
104
105 int getGEPCost(Type *PointeeType, const Value *Ptr,
106 ArrayRef<const Value *> Operands) {
107 // In the basic model, we just assume that all-constant GEPs will be folded
108 // into their uses via addressing modes.
109 for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
110 if (!isa<Constant>(Operands[Idx]))
111 return TTI::TCC_Basic;
112
113 return TTI::TCC_Free;
114 }
115
116 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
117 unsigned &JTSize,
118 ProfileSummaryInfo *PSI,
119 BlockFrequencyInfo *BFI) {
120 (void)PSI;
121 (void)BFI;
122 JTSize = 0;
123 return SI.getNumCases();
124 }
125
126 int getExtCost(const Instruction *I, const Value *Src) {
127 return TTI::TCC_Basic;
128 }
129
130 unsigned getCallCost(FunctionType *FTy, int NumArgs, const User *U) {
131 assert(FTy && "FunctionType must be provided to this routine.")((FTy && "FunctionType must be provided to this routine."
) ? static_cast<void> (0) : __assert_fail ("FTy && \"FunctionType must be provided to this routine.\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 131, __PRETTY_FUNCTION__))
;
132
133 // The target-independent implementation just measures the size of the
134 // function by approximating that each argument will take on average one
135 // instruction to prepare.
136
137 if (NumArgs < 0)
138 // Set the argument number to the number of explicit arguments in the
139 // function.
140 NumArgs = FTy->getNumParams();
141
142 return TTI::TCC_Basic * (NumArgs + 1);
143 }
144
145 unsigned getInliningThresholdMultiplier() { return 1; }
146
147 int getInlinerVectorBonusPercent() { return 150; }
148
149 unsigned getMemcpyCost(const Instruction *I) {
150 return TTI::TCC_Expensive;
151 }
152
153 bool hasBranchDivergence() { return false; }
154
155 bool isSourceOfDivergence(const Value *V) { return false; }
156
157 bool isAlwaysUniform(const Value *V) { return false; }
158
159 unsigned getFlatAddressSpace () {
160 return -1;
161 }
162
163 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
164 Intrinsic::ID IID) const {
165 return false;
166 }
167
168 bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
169 Value *OldV, Value *NewV) const {
170 return false;
171 }
172
173 bool isLoweredToCall(const Function *F) {
174 assert(F && "A concrete function must be provided to this routine.")((F && "A concrete function must be provided to this routine."
) ? static_cast<void> (0) : __assert_fail ("F && \"A concrete function must be provided to this routine.\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 174, __PRETTY_FUNCTION__))
;
175
176 // FIXME: These should almost certainly not be handled here, and instead
177 // handled with the help of TLI or the target itself. This was largely
178 // ported from existing analysis heuristics here so that such refactorings
179 // can take place in the future.
180
181 if (F->isIntrinsic())
182 return false;
183
184 if (F->hasLocalLinkage() || !F->hasName())
185 return true;
186
187 StringRef Name = F->getName();
188
189 // These will all likely lower to a single selection DAG node.
190 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
191 Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" ||
192 Name == "fmin" || Name == "fminf" || Name == "fminl" ||
193 Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
194 Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" ||
195 Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
196 return false;
197
198 // These are all likely to be optimized into something smaller.
199 if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
200 Name == "exp2l" || Name == "exp2f" || Name == "floor" ||
201 Name == "floorf" || Name == "ceil" || Name == "round" ||
202 Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" ||
203 Name == "llabs")
204 return false;
205
206 return true;
207 }
208
209 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
210 AssumptionCache &AC,
211 TargetLibraryInfo *LibInfo,
212 HardwareLoopInfo &HWLoopInfo) {
213 return false;
214 }
215
216 bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
217 AssumptionCache &AC, TargetLibraryInfo *TLI,
218 DominatorTree *DT,
219 const LoopAccessInfo *LAI) const {
220 return false;
221 }
222
223 void getUnrollingPreferences(Loop *, ScalarEvolution &,
224 TTI::UnrollingPreferences &) {}
225
226 bool isLegalAddImmediate(int64_t Imm) { return false; }
227
228 bool isLegalICmpImmediate(int64_t Imm) { return false; }
229
230 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
231 bool HasBaseReg, int64_t Scale,
232 unsigned AddrSpace, Instruction *I = nullptr) {
233 // Guess that only reg and reg+reg addressing is allowed. This heuristic is
234 // taken from the implementation of LSR.
235 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
236 }
237
238 bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) {
239 return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
240 C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
241 std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
242 C2.ScaleCost, C2.ImmCost, C2.SetupCost);
243 }
244
245 bool canMacroFuseCmp() { return false; }
246
247 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
248 DominatorTree *DT, AssumptionCache *AC,
249 TargetLibraryInfo *LibInfo) {
250 return false;
251 }
252
253 bool shouldFavorPostInc() const { return false; }
254
255 bool shouldFavorBackedgeIndex(const Loop *L) const { return false; }
256
257 bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) { return false; }
258
259 bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) { return false; }
260
261 bool isLegalNTStore(Type *DataType, Align Alignment) {
262 // By default, assume nontemporal memory stores are available for stores
263 // that are aligned and have a size that is a power of 2.
264 unsigned DataSize = DL.getTypeStoreSize(DataType);
265 return Alignment >= DataSize && isPowerOf2_32(DataSize);
266 }
267
268 bool isLegalNTLoad(Type *DataType, Align Alignment) {
269 // By default, assume nontemporal memory loads are available for loads that
270 // are aligned and have a size that is a power of 2.
271 unsigned DataSize = DL.getTypeStoreSize(DataType);
272 return Alignment >= DataSize && isPowerOf2_32(DataSize);
273 }
274
275 bool isLegalMaskedScatter(Type *DataType) { return false; }
276
277 bool isLegalMaskedGather(Type *DataType) { return false; }
278
279 bool isLegalMaskedCompressStore(Type *DataType) { return false; }
280
281 bool isLegalMaskedExpandLoad(Type *DataType) { return false; }
282
283 bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; }
284
285 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; }
286
287 bool prefersVectorizedAddressing() { return true; }
288
289 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
290 bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
291 // Guess that all legal addressing mode are free.
292 if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
293 Scale, AddrSpace))
294 return 0;
295 return -1;
296 }
297
298 bool LSRWithInstrQueries() { return false; }
299
300 bool isTruncateFree(Type *Ty1, Type *Ty2) { return false; }
301
302 bool isProfitableToHoist(Instruction *I) { return true; }
303
304 bool useAA() { return false; }
305
306 bool isTypeLegal(Type *Ty) { return false; }
307
308 bool shouldBuildLookupTables() { return true; }
309 bool shouldBuildLookupTablesForConstant(Constant *C) { return true; }
310
311 bool useColdCCForColdCall(Function &F) { return false; }
312
313 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
314 return 0;
315 }
316
317 unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
318 unsigned VF) { return 0; }
319
320 bool supportsEfficientVectorElementLoadStore() { return false; }
321
322 bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; }
323
324 TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
325 bool IsZeroCmp) const {
326 return {};
327 }
328
329 bool enableInterleavedAccessVectorization() { return false; }
330
331 bool enableMaskedInterleavedAccessVectorization() { return false; }
332
333 bool isFPVectorizationPotentiallyUnsafe() { return false; }
334
335 bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
336 unsigned BitWidth,
337 unsigned AddressSpace,
338 unsigned Alignment,
339 bool *Fast) { return false; }
340
341 TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) {
342 return TTI::PSK_Software;
343 }
344
345 bool haveFastSqrt(Type *Ty) { return false; }
346
347 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { return true; }
348
349 unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; }
350
351 int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
352 Type *Ty) {
353 return 0;
354 }
355
356 unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; }
357
358 unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
359 Type *Ty) {
360 return TTI::TCC_Free;
361 }
362
363 unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
364 Type *Ty) {
365 return TTI::TCC_Free;
366 }
367
368 unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; }
369
370 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const {
371 return Vector ? 1 : 0;
372 };
373
374 const char* getRegisterClassName(unsigned ClassID) const {
375 switch (ClassID) {
376 default:
377 return "Generic::Unknown Register Class";
378 case 0: return "Generic::ScalarRC";
379 case 1: return "Generic::VectorRC";
380 }
381 }
382
383 unsigned getRegisterBitWidth(bool Vector) const { return 32; }
384
385 unsigned getMinVectorRegisterBitWidth() { return 128; }
386
387 bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; }
388
389 unsigned getMinimumVF(unsigned ElemWidth) const { return 0; }
390
391 bool
392 shouldConsiderAddressTypePromotion(const Instruction &I,
393 bool &AllowPromotionWithoutCommonHeader) {
394 AllowPromotionWithoutCommonHeader = false;
395 return false;
396 }
397
398 unsigned getCacheLineSize() const { return 0; }
399
400 llvm::Optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level) const {
401 switch (Level) {
402 case TargetTransformInfo::CacheLevel::L1D:
403 LLVM_FALLTHROUGH[[gnu::fallthrough]];
404 case TargetTransformInfo::CacheLevel::L2D:
405 return llvm::Optional<unsigned>();
406 }
407 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel")::llvm::llvm_unreachable_internal("Unknown TargetTransformInfo::CacheLevel"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 407)
;
408 }
409
410 llvm::Optional<unsigned> getCacheAssociativity(
411 TargetTransformInfo::CacheLevel Level) const {
412 switch (Level) {
413 case TargetTransformInfo::CacheLevel::L1D:
414 LLVM_FALLTHROUGH[[gnu::fallthrough]];
415 case TargetTransformInfo::CacheLevel::L2D:
416 return llvm::Optional<unsigned>();
417 }
418
419 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel")::llvm::llvm_unreachable_internal("Unknown TargetTransformInfo::CacheLevel"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 419)
;
420 }
421
422 unsigned getPrefetchDistance() const { return 0; }
423 unsigned getMinPrefetchStride() const { return 1; }
424 unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX(2147483647 *2U +1U); }
425
426 unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
427
428 unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
429 TTI::OperandValueKind Opd1Info,
430 TTI::OperandValueKind Opd2Info,
431 TTI::OperandValueProperties Opd1PropInfo,
432 TTI::OperandValueProperties Opd2PropInfo,
433 ArrayRef<const Value *> Args) {
434 return 1;
435 }
436
437 unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Ty, int Index,
438 Type *SubTp) {
439 return 1;
440 }
441
442 unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
443 const Instruction *I) { return 1; }
444
445 unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
446 VectorType *VecTy, unsigned Index) {
447 return 1;
448 }
449
450 unsigned getCFInstrCost(unsigned Opcode) { return 1; }
451
452 unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
453 const Instruction *I) {
454 return 1;
455 }
456
457 unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
458 return 1;
459 }
460
461 unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
462 unsigned AddressSpace, const Instruction *I) {
463 return 1;
464 }
465
466 unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
467 unsigned AddressSpace) {
468 return 1;
469 }
470
471 unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
472 bool VariableMask,
473 unsigned Alignment) {
474 return 1;
475 }
476
477 unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
478 unsigned Factor,
479 ArrayRef<unsigned> Indices,
480 unsigned Alignment, unsigned AddressSpace,
481 bool UseMaskForCond = false,
482 bool UseMaskForGaps = false) {
483 return 1;
484 }
485
486 unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
487 ArrayRef<Type *> Tys, FastMathFlags FMF,
488 unsigned ScalarizationCostPassed) {
489 return 1;
490 }
491 unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
492 ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) {
493 return 1;
494 }
495
496 unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) {
497 return 1;
498 }
499
500 unsigned getNumberOfParts(Type *Tp) { return 0; }
501
502 unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *,
503 const SCEV *) {
504 return 0;
505 }
506
507 unsigned getArithmeticReductionCost(unsigned, Type *, bool) { return 1; }
508
509 unsigned getMinMaxReductionCost(Type *, Type *, bool, bool) { return 1; }
510
511 unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { return 0; }
512
513 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) {
514 return false;
515 }
516
517 unsigned getAtomicMemIntrinsicMaxElementSize() const {
518 // Note for overrides: You must ensure for all element unordered-atomic
519 // memory intrinsics that all power-of-2 element sizes up to, and
520 // including, the return value of this method have a corresponding
521 // runtime lib call. These runtime lib call definitions can be found
522 // in RuntimeLibcalls.h
523 return 0;
524 }
525
526 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
527 Type *ExpectedType) {
528 return nullptr;
529 }
530
531 Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
532 unsigned SrcAlign, unsigned DestAlign) const {
533 return Type::getInt8Ty(Context);
534 }
535
536 void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
537 LLVMContext &Context,
538 unsigned RemainingBytes,
539 unsigned SrcAlign,
540 unsigned DestAlign) const {
541 for (unsigned i = 0; i != RemainingBytes; ++i)
542 OpsOut.push_back(Type::getInt8Ty(Context));
543 }
544
545 bool areInlineCompatible(const Function *Caller,
546 const Function *Callee) const {
547 return (Caller->getFnAttribute("target-cpu") ==
548 Callee->getFnAttribute("target-cpu")) &&
549 (Caller->getFnAttribute("target-features") ==
550 Callee->getFnAttribute("target-features"));
551 }
552
553 bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,
554 SmallPtrSetImpl<Argument *> &Args) const {
555 return (Caller->getFnAttribute("target-cpu") ==
556 Callee->getFnAttribute("target-cpu")) &&
557 (Caller->getFnAttribute("target-features") ==
558 Callee->getFnAttribute("target-features"));
559 }
560
561 bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty,
562 const DataLayout &DL) const {
563 return false;
564 }
565
566 bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty,
567 const DataLayout &DL) const {
568 return false;
569 }
570
571 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; }
572
573 bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
574
575 bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
576
577 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
578 unsigned Alignment,
579 unsigned AddrSpace) const {
580 return true;
581 }
582
583 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
584 unsigned Alignment,
585 unsigned AddrSpace) const {
586 return true;
587 }
588
589 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
590 unsigned ChainSizeInBytes,
591 VectorType *VecTy) const {
592 return VF;
593 }
594
595 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
596 unsigned ChainSizeInBytes,
597 VectorType *VecTy) const {
598 return VF;
599 }
600
601 bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
602 TTI::ReductionFlags Flags) const {
603 return false;
604 }
605
606 bool shouldExpandReduction(const IntrinsicInst *II) const {
607 return true;
608 }
609
610 unsigned getGISelRematGlobalCost() const {
611 return 1;
612 }
613
614protected:
615 // Obtain the minimum required size to hold the value (without the sign)
616 // In case of a vector it returns the min required size for one element.
617 unsigned minRequiredElementSize(const Value* Val, bool &isSigned) {
618 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
619 const auto* VectorValue = cast<Constant>(Val);
620
621 // In case of a vector need to pick the max between the min
622 // required size for each element
623 auto *VT = cast<VectorType>(Val->getType());
624
625 // Assume unsigned elements
626 isSigned = false;
627
628 // The max required size is the total vector width divided by num
629 // of elements in the vector
630 unsigned MaxRequiredSize = VT->getBitWidth() / VT->getNumElements();
631
632 unsigned MinRequiredSize = 0;
633 for(unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
634 if (auto* IntElement =
635 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
636 bool signedElement = IntElement->getValue().isNegative();
637 // Get the element min required size.
638 unsigned ElementMinRequiredSize =
639 IntElement->getValue().getMinSignedBits() - 1;
640 // In case one element is signed then all the vector is signed.
641 isSigned |= signedElement;
642 // Save the max required bit size between all the elements.
643 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
644 }
645 else {
646 // not an int constant element
647 return MaxRequiredSize;
648 }
649 }
650 return MinRequiredSize;
651 }
652
653 if (const auto* CI = dyn_cast<ConstantInt>(Val)) {
654 isSigned = CI->getValue().isNegative();
655 return CI->getValue().getMinSignedBits() - 1;
656 }
657
658 if (const auto* Cast = dyn_cast<SExtInst>(Val)) {
659 isSigned = true;
660 return Cast->getSrcTy()->getScalarSizeInBits() - 1;
661 }
662
663 if (const auto* Cast = dyn_cast<ZExtInst>(Val)) {
664 isSigned = false;
665 return Cast->getSrcTy()->getScalarSizeInBits();
666 }
667
668 isSigned = false;
669 return Val->getType()->getScalarSizeInBits();
670 }
671
672 bool isStridedAccess(const SCEV *Ptr) {
673 return Ptr && isa<SCEVAddRecExpr>(Ptr);
674 }
675
676 const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE,
677 const SCEV *Ptr) {
678 if (!isStridedAccess(Ptr))
679 return nullptr;
680 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
681 return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
682 }
683
684 bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr,
685 int64_t MergeDistance) {
686 const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
687 if (!Step)
688 return false;
689 APInt StrideVal = Step->getAPInt();
690 if (StrideVal.getBitWidth() > 64)
691 return false;
692 // FIXME: Need to take absolute value for negative stride case.
693 return StrideVal.getSExtValue() < MergeDistance;
694 }
695};
696
697/// CRTP base class for use as a mix-in that aids implementing
698/// a TargetTransformInfo-compatible class.
699template <typename T>
700class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
701private:
702 typedef TargetTransformInfoImplBase BaseT;
703
704protected:
705 explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {}
706
707public:
708 using BaseT::getCallCost;
709
710 unsigned getCallCost(const Function *F, int NumArgs, const User *U) {
711 assert(F && "A concrete function must be provided to this routine.")((F && "A concrete function must be provided to this routine."
) ? static_cast<void> (0) : __assert_fail ("F && \"A concrete function must be provided to this routine.\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 711, __PRETTY_FUNCTION__))
;
712
713 if (NumArgs < 0)
714 // Set the argument number to the number of explicit arguments in the
715 // function.
716 NumArgs = F->arg_size();
717
718 if (Intrinsic::ID IID = F->getIntrinsicID()) {
719 FunctionType *FTy = F->getFunctionType();
720 SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end());
721 return static_cast<T *>(this)
722 ->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys, U);
723 }
724
725 if (!static_cast<T *>(this)->isLoweredToCall(F))
726 return TTI::TCC_Basic; // Give a basic cost if it will be lowered
727 // directly.
728
729 return static_cast<T *>(this)->getCallCost(F->getFunctionType(), NumArgs, U);
730 }
731
732 unsigned getCallCost(const Function *F, ArrayRef<const Value *> Arguments,
733 const User *U) {
734 // Simply delegate to generic handling of the call.
735 // FIXME: We should use instsimplify or something else to catch calls which
736 // will constant fold with these arguments.
737 return static_cast<T *>(this)->getCallCost(F, Arguments.size(), U);
738 }
739
740 using BaseT::getGEPCost;
741
742 int getGEPCost(Type *PointeeType, const Value *Ptr,
743 ArrayRef<const Value *> Operands) {
744 assert(PointeeType && Ptr && "can't get GEPCost of nullptr")((PointeeType && Ptr && "can't get GEPCost of nullptr"
) ? static_cast<void> (0) : __assert_fail ("PointeeType && Ptr && \"can't get GEPCost of nullptr\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 744, __PRETTY_FUNCTION__))
;
745 // TODO: will remove this when pointers have an opaque type.
746 assert(Ptr->getType()->getScalarType()->getPointerElementType() ==((Ptr->getType()->getScalarType()->getPointerElementType
() == PointeeType && "explicit pointee type doesn't match operand's pointee type"
) ? static_cast<void> (0) : __assert_fail ("Ptr->getType()->getScalarType()->getPointerElementType() == PointeeType && \"explicit pointee type doesn't match operand's pointee type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 748, __PRETTY_FUNCTION__))
747 PointeeType &&((Ptr->getType()->getScalarType()->getPointerElementType
() == PointeeType && "explicit pointee type doesn't match operand's pointee type"
) ? static_cast<void> (0) : __assert_fail ("Ptr->getType()->getScalarType()->getPointerElementType() == PointeeType && \"explicit pointee type doesn't match operand's pointee type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 748, __PRETTY_FUNCTION__))
748 "explicit pointee type doesn't match operand's pointee type")((Ptr->getType()->getScalarType()->getPointerElementType
() == PointeeType && "explicit pointee type doesn't match operand's pointee type"
) ? static_cast<void> (0) : __assert_fail ("Ptr->getType()->getScalarType()->getPointerElementType() == PointeeType && \"explicit pointee type doesn't match operand's pointee type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 748, __PRETTY_FUNCTION__))
;
749 auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
750 bool HasBaseReg = (BaseGV == nullptr);
751
752 auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
753 APInt BaseOffset(PtrSizeBits, 0);
754 int64_t Scale = 0;
755
756 auto GTI = gep_type_begin(PointeeType, Operands);
757 Type *TargetType = nullptr;
758
759 // Handle the case where the GEP instruction has a single operand,
760 // the basis, therefore TargetType is a nullptr.
761 if (Operands.empty())
762 return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
763
764 for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
765 TargetType = GTI.getIndexedType();
766 // We assume that the cost of Scalar GEP with constant index and the
767 // cost of Vector GEP with splat constant index are the same.
768 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
769 if (!ConstIdx)
770 if (auto Splat = getSplatValue(*I))
771 ConstIdx = dyn_cast<ConstantInt>(Splat);
772 if (StructType *STy = GTI.getStructTypeOrNull()) {
773 // For structures the index is always splat or scalar constant
774 assert(ConstIdx && "Unexpected GEP index")((ConstIdx && "Unexpected GEP index") ? static_cast<
void> (0) : __assert_fail ("ConstIdx && \"Unexpected GEP index\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 774, __PRETTY_FUNCTION__))
;
775 uint64_t Field = ConstIdx->getZExtValue();
776 BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
777 } else {
778 int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType());
779 if (ConstIdx) {
780 BaseOffset +=
781 ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
782 } else {
783 // Needs scale register.
784 if (Scale != 0)
785 // No addressing mode takes two scale registers.
786 return TTI::TCC_Basic;
787 Scale = ElementSize;
788 }
789 }
790 }
791
792 if (static_cast<T *>(this)->isLegalAddressingMode(
793 TargetType, const_cast<GlobalValue *>(BaseGV),
794 BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale,
795 Ptr->getType()->getPointerAddressSpace()))
796 return TTI::TCC_Free;
797 return TTI::TCC_Basic;
798 }
799
800 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
801 ArrayRef<Type *> ParamTys, const User *U) {
802 switch (IID) {
803 default:
804 // Intrinsics rarely (if ever) have normal argument setup constraints.
805 // Model them as having a basic instruction cost.
806 return TTI::TCC_Basic;
807
808 // TODO: other libc intrinsics.
809 case Intrinsic::memcpy:
810 return static_cast<T *>(this)->getMemcpyCost(dyn_cast<Instruction>(U));
811
812 case Intrinsic::annotation:
813 case Intrinsic::assume:
814 case Intrinsic::sideeffect:
815 case Intrinsic::dbg_declare:
816 case Intrinsic::dbg_value:
817 case Intrinsic::dbg_label:
818 case Intrinsic::invariant_start:
819 case Intrinsic::invariant_end:
820 case Intrinsic::launder_invariant_group:
821 case Intrinsic::strip_invariant_group:
822 case Intrinsic::is_constant:
823 case Intrinsic::lifetime_start:
824 case Intrinsic::lifetime_end:
825 case Intrinsic::objectsize:
826 case Intrinsic::ptr_annotation:
827 case Intrinsic::var_annotation:
828 case Intrinsic::experimental_gc_result:
829 case Intrinsic::experimental_gc_relocate:
830 case Intrinsic::coro_alloc:
831 case Intrinsic::coro_begin:
832 case Intrinsic::coro_free:
833 case Intrinsic::coro_end:
834 case Intrinsic::coro_frame:
835 case Intrinsic::coro_size:
836 case Intrinsic::coro_suspend:
837 case Intrinsic::coro_param:
838 case Intrinsic::coro_subfn_addr:
839 // These intrinsics don't actually represent code after lowering.
840 return TTI::TCC_Free;
841 }
842 }
843
844 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
845 ArrayRef<const Value *> Arguments, const User *U) {
846 // Delegate to the generic intrinsic handling code. This mostly provides an
847 // opportunity for targets to (for example) special case the cost of
848 // certain intrinsics based on constants used as arguments.
849 SmallVector<Type *, 8> ParamTys;
850 ParamTys.reserve(Arguments.size());
851 for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
852 ParamTys.push_back(Arguments[Idx]->getType());
853 return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys, U);
854 }
855
856 unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands) {
857 if (isa<PHINode>(U))
4
Assuming 'U' is not a 'PHINode'
5
Taking false branch
858 return TTI::TCC_Free; // Model all PHI nodes as free.
859
860 if (isa<ExtractValueInst>(U))
6
Assuming 'U' is not a 'ExtractValueInst'
7
Taking false branch
861 return TTI::TCC_Free; // Model all ExtractValue nodes as free.
862
863 // Static alloca doesn't generate target instructions.
864 if (auto *A
8.1
'A' is null
8.1
'A' is null
8.1
'A' is null
8.1
'A' is null
8.1
'A' is null
8.1
'A' is null
8.1
'A' is null
= dyn_cast<AllocaInst>(U))
8
Assuming 'U' is not a 'AllocaInst'
9
Taking false branch
865 if (A->isStaticAlloca())
866 return TTI::TCC_Free;
867
868 if (const GEPOperator *GEP
10.1
'GEP' is null
10.1
'GEP' is null
10.1
'GEP' is null
10.1
'GEP' is null
10.1
'GEP' is null
10.1
'GEP' is null
10.1
'GEP' is null
= dyn_cast<GEPOperator>(U)) {
10
Assuming 'U' is not a 'GEPOperator'
11
Taking false branch
869 return static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(),
870 GEP->getPointerOperand(),
871 Operands.drop_front());
872 }
873
874 if (auto CS = ImmutableCallSite(U)) {
12
Calling 'CallSiteBase::operator bool'
26
Returning from 'CallSiteBase::operator bool'
27
Taking false branch
875 const Function *F = CS.getCalledFunction();
876 if (!F) {
877 // Just use the called value type.
878 Type *FTy = CS.getCalledValue()->getType()->getPointerElementType();
879 return static_cast<T *>(this)
880 ->getCallCost(cast<FunctionType>(FTy), CS.arg_size(), U);
881 }
882
883 SmallVector<const Value *, 8> Arguments(CS.arg_begin(), CS.arg_end());
884 return static_cast<T *>(this)->getCallCost(F, Arguments, U);
885 }
886
887 if (isa<SExtInst>(U) || isa<ZExtInst>(U) || isa<FPExtInst>(U))
28
Assuming 'U' is not a 'SExtInst'
29
Assuming 'U' is not a 'ZExtInst'
30
Assuming 'U' is not a 'FPExtInst'
31
Taking false branch
888 // The old behaviour of generally treating extensions of icmp to be free
889 // has been removed. A target that needs it should override getUserCost().
890 return static_cast<T *>(this)->getExtCost(cast<Instruction>(U),
891 Operands.back());
892
893 return static_cast<T *>(this)->getOperationCost(
42
Calling 'BasicTTIImplBase::getOperationCost'
894 Operator::getOpcode(U), U->getType(),
32
Calling 'Operator::getOpcode'
38
Returning from 'Operator::getOpcode'
895 U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr);
39
Assuming the condition is false
40
'?' condition is false
41
Passing null pointer value via 3rd parameter 'OpTy'
896 }
897
898 int getInstructionLatency(const Instruction *I) {
899 SmallVector<const Value *, 4> Operands(I->value_op_begin(),
900 I->value_op_end());
901 if (getUserCost(I, Operands) == TTI::TCC_Free)
902 return 0;
903
904 if (isa<LoadInst>(I))
905 return 4;
906
907 Type *DstTy = I->getType();
908
909 // Usually an intrinsic is a simple instruction.
910 // A real function call is much slower.
911 if (auto *CI = dyn_cast<CallInst>(I)) {
912 const Function *F = CI->getCalledFunction();
913 if (!F || static_cast<T *>(this)->isLoweredToCall(F))
914 return 40;
915 // Some intrinsics return a value and a flag, we use the value type
916 // to decide its latency.
917 if (StructType* StructTy = dyn_cast<StructType>(DstTy))
918 DstTy = StructTy->getElementType(0);
919 // Fall through to simple instructions.
920 }
921
922 if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy))
923 DstTy = VectorTy->getElementType();
924 if (DstTy->isFloatingPointTy())
925 return 3;
926
927 return 1;
928 }
929};
930}
931
932#endif

/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/CallSite.h

1//===- CallSite.h - Abstract Call & Invoke instrs ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the CallSite class, which is a handy wrapper for code that
10// wants to treat Call, Invoke and CallBr instructions in a generic way. When
11// in non-mutation context (e.g. an analysis) ImmutableCallSite should be used.
12// Finally, when some degree of customization is necessary between these two
13// extremes, CallSiteBase<> can be supplied with fine-tuned parameters.
14//
15// NOTE: These classes are supposed to have "value semantics". So they should be
16// passed by value, not by reference; they should not be "new"ed or "delete"d.
17// They are efficiently copyable, assignable and constructable, with cost
18// equivalent to copying a pointer (notice that they have only a single data
19// member). The internal representation carries a flag which indicates which of
20// the three variants is enclosed. This allows for cheaper checks when various
21// accessors of CallSite are employed.
22//
23//===----------------------------------------------------------------------===//
24
25#ifndef LLVM_IR_CALLSITE_H
26#define LLVM_IR_CALLSITE_H
27
28#include "llvm/ADT/Optional.h"
29#include "llvm/ADT/PointerIntPair.h"
30#include "llvm/ADT/iterator_range.h"
31#include "llvm/IR/Attributes.h"
32#include "llvm/IR/CallingConv.h"
33#include "llvm/IR/Function.h"
34#include "llvm/IR/InstrTypes.h"
35#include "llvm/IR/Instruction.h"
36#include "llvm/IR/Instructions.h"
37#include "llvm/IR/Use.h"
38#include "llvm/IR/User.h"
39#include "llvm/IR/Value.h"
40#include "llvm/Support/Casting.h"
41#include <cassert>
42#include <cstdint>
43#include <iterator>
44
45namespace llvm {
46
47namespace Intrinsic {
48enum ID : unsigned;
49}
50
51template <typename FunTy = const Function, typename BBTy = const BasicBlock,
52 typename ValTy = const Value, typename UserTy = const User,
53 typename UseTy = const Use, typename InstrTy = const Instruction,
54 typename CallTy = const CallInst,
55 typename InvokeTy = const InvokeInst,
56 typename CallBrTy = const CallBrInst,
57 typename IterTy = User::const_op_iterator>
58class CallSiteBase {
59protected:
60 PointerIntPair<InstrTy *, 2, int> I;
61
62 CallSiteBase() = default;
63 CallSiteBase(CallTy *CI) : I(CI, 1) { assert(CI)((CI) ? static_cast<void> (0) : __assert_fail ("CI", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/CallSite.h"
, 63, __PRETTY_FUNCTION__))
; }
64 CallSiteBase(InvokeTy *II) : I(II, 0) { assert(II)((II) ? static_cast<void> (0) : __assert_fail ("II", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/CallSite.h"
, 64, __PRETTY_FUNCTION__))
; }
65 CallSiteBase(CallBrTy *CBI) : I(CBI, 2) { assert(CBI)((CBI) ? static_cast<void> (0) : __assert_fail ("CBI", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/CallSite.h"
, 65, __PRETTY_FUNCTION__))
; }
66 explicit CallSiteBase(ValTy *II) { *this = get(II); }
67
68private:
69 /// This static method is like a constructor. It will create an appropriate
70 /// call site for a Call, Invoke or CallBr instruction, but it can also create
71 /// a null initialized CallSiteBase object for something which is NOT a call
72 /// site.
73 static CallSiteBase get(ValTy *V) {
74 if (InstrTy *II = dyn_cast<InstrTy>(V)) {
75 if (II->getOpcode() == Instruction::Call)
76 return CallSiteBase(static_cast<CallTy*>(II));
77 if (II->getOpcode() == Instruction::Invoke)
78 return CallSiteBase(static_cast<InvokeTy*>(II));
79 if (II->getOpcode() == Instruction::CallBr)
80 return CallSiteBase(static_cast<CallBrTy *>(II));
81 }
82 return CallSiteBase();
83 }
84
85public:
86 /// Return true if a CallInst is enclosed.
87 bool isCall() const { return I.getInt() == 1; }
88
89 /// Return true if a InvokeInst is enclosed. !I.getInt() may also signify a
90 /// NULL instruction pointer, so check that.
91 bool isInvoke() const { return getInstruction() && I.getInt() == 0; }
92
93 /// Return true if a CallBrInst is enclosed.
94 bool isCallBr() const { return I.getInt() == 2; }
95
96 InstrTy *getInstruction() const { return I.getPointer(); }
97 InstrTy *operator->() const { return I.getPointer(); }
98 explicit operator bool() const { return I.getPointer(); }
13
Calling 'PointerIntPair::getPointer'
24
Returning from 'PointerIntPair::getPointer'
25
Returning zero, which participates in a condition later
99
100 /// Get the basic block containing the call site.
101 BBTy* getParent() const { return getInstruction()->getParent(); }
102
103 /// Return the pointer to function that is being called.
104 ValTy *getCalledValue() const {
105 assert(getInstruction() && "Not a call, invoke or callbr instruction!")((getInstruction() && "Not a call, invoke or callbr instruction!"
) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call, invoke or callbr instruction!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/CallSite.h"
, 105, __PRETTY_FUNCTION__))
;
106 return *getCallee();
107 }
108
109 /// Return the function being called if this is a direct call, otherwise
110 /// return null (if it's an indirect call).
111 FunTy *getCalledFunction() const {
112 return dyn_cast<FunTy>(getCalledValue());
113 }
114
115 /// Return true if the callsite is an indirect call.
116 bool isIndirectCall() const {
117 const Value *V = getCalledValue();
118 if (!V)
119 return false;
120 if (isa<FunTy>(V) || isa<Constant>(V))
121 return false;
122 if (const CallBase *CB = dyn_cast<CallBase>(getInstruction()))
123 if (CB->isInlineAsm())
124 return false;
125 return true;
126 }
127
128 /// Set the callee to the specified value. Unlike the function of the same
129 /// name on CallBase, does not modify the type!
130 void setCalledFunction(Value *V) {
131 assert(getInstruction() && "Not a call, callbr, or invoke instruction!")((getInstruction() && "Not a call, callbr, or invoke instruction!"
) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call, callbr, or invoke instruction!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/CallSite.h"
, 131, __PRETTY_FUNCTION__))
;
132 assert(cast<PointerType>(V->getType())->getElementType() ==((cast<PointerType>(V->getType())->getElementType
() == cast<CallBase>(getInstruction())->getFunctionType
() && "New callee type does not match FunctionType on call"
) ? static_cast<void> (0) : __assert_fail ("cast<PointerType>(V->getType())->getElementType() == cast<CallBase>(getInstruction())->getFunctionType() && \"New callee type does not match FunctionType on call\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/CallSite.h"
, 134, __PRETTY_FUNCTION__))
133 cast<CallBase>(getInstruction())->getFunctionType() &&((cast<PointerType>(V->getType())->getElementType
() == cast<CallBase>(getInstruction())->getFunctionType
() && "New callee type does not match FunctionType on call"
) ? static_cast<void> (0) : __assert_fail ("cast<PointerType>(V->getType())->getElementType() == cast<CallBase>(getInstruction())->getFunctionType() && \"New callee type does not match FunctionType on call\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/CallSite.h"
, 134, __PRETTY_FUNCTION__))
134 "New callee type does not match FunctionType on call")((cast<PointerType>(V->getType())->getElementType
() == cast<CallBase>(getInstruction())->getFunctionType
() && "New callee type does not match FunctionType on call"
) ? static_cast<void> (0) : __assert_fail ("cast<PointerType>(V->getType())->getElementType() == cast<CallBase>(getInstruction())->getFunctionType() && \"New callee type does not match FunctionType on call\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/CallSite.h"
, 134, __PRETTY_FUNCTION__))
;
135 *getCallee() = V;
136 }
137
138 /// Return the intrinsic ID of the intrinsic called by this CallSite,
139 /// or Intrinsic::not_intrinsic if the called function is not an
140 /// intrinsic, or if this CallSite is an indirect call.
141 Intrinsic::ID getIntrinsicID() const {
142 if (auto *F = getCalledFunction())
143 return F->getIntrinsicID();
144 // Don't use Intrinsic::not_intrinsic, as it will require pulling
145 // Intrinsics.h into every header that uses CallSite.
146 return static_cast<Intrinsic::ID>(0);
147 }
148
149 /// Determine whether the passed iterator points to the callee operand's Use.
150 bool isCallee(Value::const_user_iterator UI) const {
151 return isCallee(&UI.getUse());
152 }
153
154 /// Determine whether this Use is the callee operand's Use.
155 bool isCallee(const Use *U) const { return getCallee() == U; }
156
157 /// Determine whether the passed iterator points to an argument operand.
158 bool isArgOperand(Value::const_user_iterator UI) const {
159 return isArgOperand(&UI.getUse());
160 }
161
162 /// Determine whether the passed use points to an argument operand.
163 bool isArgOperand(const Use *U) const {
164 assert(getInstruction() == U->getUser())((getInstruction() == U->getUser()) ? static_cast<void>
(0) : __assert_fail ("getInstruction() == U->getUser()", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/CallSite.h"
, 164, __PRETTY_FUNCTION__))
;
165 return arg_begin() <= U && U < arg_end();
166 }
167
168 /// Determine whether the passed iterator points to a bundle operand.
169 bool isBundleOperand(Value::const_user_iterator UI) const {
170 return isBundleOperand(&UI.getUse());
171 }
172
173 /// Determine whether the passed use points to a bundle operand.
174 bool isBundleOperand(const Use *U) const {
175 assert(getInstruction() == U->getUser())((getInstruction() == U->getUser()) ? static_cast<void>
(0) : __assert_fail ("getInstruction() == U->getUser()", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/CallSite.h"
, 175, __PRETTY_FUNCTION__))
;
176 if (!hasOperandBundles())
177 return false;
178 unsigned OperandNo = U - (*this)->op_begin();
179 return getBundleOperandsStartIndex() <= OperandNo &&
180 OperandNo < getBundleOperandsEndIndex();
181 }
182
183 /// Determine whether the passed iterator points to a data operand.
184 bool isDataOperand(Value::const_user_iterator UI) const {
185 return isDataOperand(&UI.getUse());
186 }
187
188 /// Determine whether the passed use points to a data operand.
189 bool isDataOperand(const Use *U) const {
190 return data_operands_begin() <= U && U < data_operands_end();
191 }
192
193 ValTy *getArgument(unsigned ArgNo) const {
194 assert(arg_begin() + ArgNo < arg_end() && "Argument # out of range!")((arg_begin() + ArgNo < arg_end() && "Argument # out of range!"
) ? static_cast<void> (0) : __assert_fail ("arg_begin() + ArgNo < arg_end() && \"Argument # out of range!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/CallSite.h"
, 194, __PRETTY_FUNCTION__))
;
195 return *(arg_begin() + ArgNo);
196 }
197
198 void setArgument(unsigned ArgNo, Value* newVal) {
199 assert(getInstruction() && "Not a call, invoke or callbr instruction!")((getInstruction() && "Not a call, invoke or callbr instruction!"
) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call, invoke or callbr instruction!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/CallSite.h"
, 199, __PRETTY_FUNCTION__))
;
200 assert(arg_begin() + ArgNo < arg_end() && "Argument # out of range!")((arg_begin() + ArgNo < arg_end() && "Argument # out of range!"
) ? static_cast<void> (0) : __assert_fail ("arg_begin() + ArgNo < arg_end() && \"Argument # out of range!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/CallSite.h"
, 200, __PRETTY_FUNCTION__))
;
201 getInstruction()->setOperand(ArgNo, newVal);
202 }
203
204 /// Given a value use iterator, returns the argument that corresponds to it.
205 /// Iterator must actually correspond to an argument.
206 unsigned getArgumentNo(Value::const_user_iterator I) const {
207 return getArgumentNo(&I.getUse());
208 }
209
210 /// Given a use for an argument, get the argument number that corresponds to
211 /// it.
212 unsigned getArgumentNo(const Use *U) const {
213 assert(getInstruction() && "Not a call, invoke or callbr instruction!")((getInstruction() && "Not a call, invoke or callbr instruction!"
) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call, invoke or callbr instruction!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/CallSite.h"
, 213, __PRETTY_FUNCTION__))
;
214 assert(isArgOperand(U) && "Argument # out of range!")((isArgOperand(U) && "Argument # out of range!") ? static_cast
<void> (0) : __assert_fail ("isArgOperand(U) && \"Argument # out of range!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/CallSite.h"
, 214, __PRETTY_FUNCTION__))
;
215 return U - arg_begin();
216 }
217
218 /// The type of iterator to use when looping over actual arguments at this
219 /// call site.
220 using arg_iterator = IterTy;
221
222 iterator_range<IterTy> args() const {
223 return make_range(arg_begin(), arg_end());
224 }
225 bool arg_empty() const { return arg_end() == arg_begin(); }
226 unsigned arg_size() const { return unsigned(arg_end() - arg_begin()); }
227
228 /// Given a value use iterator, return the data operand corresponding to it.
229 /// Iterator must actually correspond to a data operand.
230 unsigned getDataOperandNo(Value::const_user_iterator UI) const {
231 return getDataOperandNo(&UI.getUse());
232 }
233
234 /// Given a use for a data operand, get the data operand number that
235 /// corresponds to it.
236 unsigned getDataOperandNo(const Use *U) const {
237 assert(getInstruction() && "Not a call, invoke or callbr instruction!")((getInstruction() && "Not a call, invoke or callbr instruction!"
) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call, invoke or callbr instruction!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/CallSite.h"
, 237, __PRETTY_FUNCTION__))
;
238 assert(isDataOperand(U) && "Data operand # out of range!")((isDataOperand(U) && "Data operand # out of range!")
? static_cast<void> (0) : __assert_fail ("isDataOperand(U) && \"Data operand # out of range!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/CallSite.h"
, 238, __PRETTY_FUNCTION__))
;
239 return U - data_operands_begin();
240 }
241
242 /// Type of iterator to use when looping over data operands at this call site
243 /// (see below).
244 using data_operand_iterator = IterTy;
245
246 /// data_operands_begin/data_operands_end - Return iterators iterating over
247 /// the call / invoke / callbr argument list and bundle operands. For invokes,
248 /// this is the set of instruction operands except the invoke target and the
249 /// two successor blocks; for calls this is the set of instruction operands
250 /// except the call target; for callbrs the number of labels to skip must be
251 /// determined first.
252
253 IterTy data_operands_begin() const {
254 assert(getInstruction() && "Not a call or invoke instruction!")((getInstruction() && "Not a call or invoke instruction!"
) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call or invoke instruction!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/CallSite.h"
, 254, __PRETTY_FUNCTION__))
;
255 return cast<CallBase>(getInstruction())->data_operands_begin();
256 }
257 IterTy data_operands_end() const {
258 assert(getInstruction() && "Not a call or invoke instruction!")((getInstruction() && "Not a call or invoke instruction!"
) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call or invoke instruction!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/CallSite.h"
, 258, __PRETTY_FUNCTION__))
;
259 return cast<CallBase>(getInstruction())->data_operands_end();
260 }
261 iterator_range<IterTy> data_ops() const {
262 return make_range(data_operands_begin(), data_operands_end());
263 }
264 bool data_operands_empty() const {
265 return data_operands_end() == data_operands_begin();
266 }
267 unsigned data_operands_size() const {
268 return std::distance(data_operands_begin(), data_operands_end());
269 }
270
271 /// Return the type of the instruction that generated this call site.
272 Type *getType() const { return (*this)->getType(); }
273
274 /// Return the caller function for this call site.
275 FunTy *getCaller() const { return (*this)->getParent()->getParent(); }
276
277 /// Tests if this call site must be tail call optimized. Only a CallInst can
278 /// be tail call optimized.
279 bool isMustTailCall() const {
280 return isCall() && cast<CallInst>(getInstruction())->isMustTailCall();
281 }
282
283 /// Tests if this call site is marked as a tail call.
284 bool isTailCall() const {
285 return isCall() && cast<CallInst>(getInstruction())->isTailCall();
286 }
287
288#define CALLSITE_DELEGATE_GETTER(METHOD) \
289 InstrTy *II = getInstruction(); \
290 return isCall() ? cast<CallInst>(II)->METHOD \
291 : isCallBr() ? cast<CallBrInst>(II)->METHOD \
292 : cast<InvokeInst>(II)->METHOD
293
294#define CALLSITE_DELEGATE_SETTER(METHOD) \
295 InstrTy *II = getInstruction(); \
296 if (isCall()) \
297 cast<CallInst>(II)->METHOD; \
298 else if (isCallBr()) \
299 cast<CallBrInst>(II)->METHOD; \
300 else \
301 cast<InvokeInst>(II)->METHOD
302
303 unsigned getNumArgOperands() const {
304 CALLSITE_DELEGATE_GETTER(getNumArgOperands());
305 }
306
307 ValTy *getArgOperand(unsigned i) const {
308 CALLSITE_DELEGATE_GETTER(getArgOperand(i));
309 }
310
311 ValTy *getReturnedArgOperand() const {
312 CALLSITE_DELEGATE_GETTER(getReturnedArgOperand());
313 }
314
315 bool isInlineAsm() const {
316 return cast<CallBase>(getInstruction())->isInlineAsm();
317 }
318
319 /// Get the calling convention of the call.
320 CallingConv::ID getCallingConv() const {
321 CALLSITE_DELEGATE_GETTER(getCallingConv());
322 }
323 /// Set the calling convention of the call.
324 void setCallingConv(CallingConv::ID CC) {
325 CALLSITE_DELEGATE_SETTER(setCallingConv(CC));
326 }
327
328 FunctionType *getFunctionType() const {
329 CALLSITE_DELEGATE_GETTER(getFunctionType());
330 }
331
332 void mutateFunctionType(FunctionType *Ty) const {
333 CALLSITE_DELEGATE_SETTER(mutateFunctionType(Ty));
334 }
335
336 /// Get the parameter attributes of the call.
337 AttributeList getAttributes() const {
338 CALLSITE_DELEGATE_GETTER(getAttributes());
339 }
340 /// Set the parameter attributes of the call.
341 void setAttributes(AttributeList PAL) {
342 CALLSITE_DELEGATE_SETTER(setAttributes(PAL));
343 }
344
345 void addAttribute(unsigned i, Attribute::AttrKind Kind) {
346 CALLSITE_DELEGATE_SETTER(addAttribute(i, Kind));
347 }
348
349 void addAttribute(unsigned i, Attribute Attr) {
350 CALLSITE_DELEGATE_SETTER(addAttribute(i, Attr));
351 }
352
353 void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
354 CALLSITE_DELEGATE_SETTER(addParamAttr(ArgNo, Kind));
355 }
356
357 void removeAttribute(unsigned i, Attribute::AttrKind Kind) {
358 CALLSITE_DELEGATE_SETTER(removeAttribute(i, Kind));
359 }
360
361 void removeAttribute(unsigned i, StringRef Kind) {
362 CALLSITE_DELEGATE_SETTER(removeAttribute(i, Kind));
363 }
364
365 void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
366 CALLSITE_DELEGATE_SETTER(removeParamAttr(ArgNo, Kind));
367 }
368
369 /// Return true if this function has the given attribute.
370 bool hasFnAttr(Attribute::AttrKind Kind) const {
371 CALLSITE_DELEGATE_GETTER(hasFnAttr(Kind));
372 }
373
374 /// Return true if this function has the given attribute.
375 bool hasFnAttr(StringRef Kind) const {
376 CALLSITE_DELEGATE_GETTER(hasFnAttr(Kind));
377 }
378
379 /// Return true if this return value has the given attribute.
380 bool hasRetAttr(Attribute::AttrKind Kind) const {
381 CALLSITE_DELEGATE_GETTER(hasRetAttr(Kind));
382 }
383
384 /// Return true if the call or the callee has the given attribute.
385 bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const {
386 CALLSITE_DELEGATE_GETTER(paramHasAttr(ArgNo, Kind));
387 }
388
389 Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const {
390 CALLSITE_DELEGATE_GETTER(getAttribute(i, Kind));
391 }
392
393 Attribute getAttribute(unsigned i, StringRef Kind) const {
394 CALLSITE_DELEGATE_GETTER(getAttribute(i, Kind));
395 }
396
397 /// Return true if the data operand at index \p i directly or indirectly has
398 /// the attribute \p A.
399 ///
400 /// Normal call, invoke or callbr arguments have per operand attributes, as
401 /// specified in the attribute set attached to this instruction, while operand
402 /// bundle operands may have some attributes implied by the type of its
403 /// containing operand bundle.
404 bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind Kind) const {
405 CALLSITE_DELEGATE_GETTER(dataOperandHasImpliedAttr(i, Kind));
406 }
407
408 /// Extract the alignment of the return value.
409 unsigned getRetAlignment() const {
410 CALLSITE_DELEGATE_GETTER(getRetAlignment());
411 }
412
413 /// Extract the alignment for a call or parameter (0=unknown).
414 unsigned getParamAlignment(unsigned ArgNo) const {
415 CALLSITE_DELEGATE_GETTER(getParamAlignment(ArgNo));
416 }
417
418 /// Extract the byval type for a call or parameter (nullptr=unknown).
419 Type *getParamByValType(unsigned ArgNo) const {
420 CALLSITE_DELEGATE_GETTER(getParamByValType(ArgNo));
421 }
422
423 /// Extract the number of dereferenceable bytes for a call or parameter
424 /// (0=unknown).
425 uint64_t getDereferenceableBytes(unsigned i) const {
426 CALLSITE_DELEGATE_GETTER(getDereferenceableBytes(i));
427 }
428
429 /// Extract the number of dereferenceable_or_null bytes for a call or
430 /// parameter (0=unknown).
431 uint64_t getDereferenceableOrNullBytes(unsigned i) const {
432 CALLSITE_DELEGATE_GETTER(getDereferenceableOrNullBytes(i));
433 }
434
435 /// Determine if the return value is marked with NoAlias attribute.
436 bool returnDoesNotAlias() const {
437 CALLSITE_DELEGATE_GETTER(returnDoesNotAlias());
438 }
439
440 /// Return true if the call should not be treated as a call to a builtin.
441 bool isNoBuiltin() const {
442 CALLSITE_DELEGATE_GETTER(isNoBuiltin());
443 }
444
445 /// Return true if the call requires strict floating point semantics.
446 bool isStrictFP() const {
447 CALLSITE_DELEGATE_GETTER(isStrictFP());
448 }
449
450 /// Return true if the call should not be inlined.
451 bool isNoInline() const {
452 CALLSITE_DELEGATE_GETTER(isNoInline());
453 }
454 void setIsNoInline(bool Value = true) {
455 CALLSITE_DELEGATE_SETTER(setIsNoInline(Value));
456 }
457
458 /// Determine if the call does not access memory.
459 bool doesNotAccessMemory() const {
460 CALLSITE_DELEGATE_GETTER(doesNotAccessMemory());
461 }
462 void setDoesNotAccessMemory() {
463 CALLSITE_DELEGATE_SETTER(setDoesNotAccessMemory());
464 }
465
466 /// Determine if the call does not access or only reads memory.
467 bool onlyReadsMemory() const {
468 CALLSITE_DELEGATE_GETTER(onlyReadsMemory());
469 }
470 void setOnlyReadsMemory() {
471 CALLSITE_DELEGATE_SETTER(setOnlyReadsMemory());
472 }
473
474 /// Determine if the call does not access or only writes memory.
475 bool doesNotReadMemory() const {
476 CALLSITE_DELEGATE_GETTER(doesNotReadMemory());
477 }
478 void setDoesNotReadMemory() {
479 CALLSITE_DELEGATE_SETTER(setDoesNotReadMemory());
480 }
481
482 /// Determine if the call can access memmory only using pointers based
483 /// on its arguments.
484 bool onlyAccessesArgMemory() const {
485 CALLSITE_DELEGATE_GETTER(onlyAccessesArgMemory());
486 }
487 void setOnlyAccessesArgMemory() {
488 CALLSITE_DELEGATE_SETTER(setOnlyAccessesArgMemory());
489 }
490
491 /// Determine if the function may only access memory that is
492 /// inaccessible from the IR.
493 bool onlyAccessesInaccessibleMemory() const {
494 CALLSITE_DELEGATE_GETTER(onlyAccessesInaccessibleMemory());
495 }
496 void setOnlyAccessesInaccessibleMemory() {
497 CALLSITE_DELEGATE_SETTER(setOnlyAccessesInaccessibleMemory());
498 }
499
500 /// Determine if the function may only access memory that is
501 /// either inaccessible from the IR or pointed to by its arguments.
502 bool onlyAccessesInaccessibleMemOrArgMem() const {
503 CALLSITE_DELEGATE_GETTER(onlyAccessesInaccessibleMemOrArgMem());
504 }
505 void setOnlyAccessesInaccessibleMemOrArgMem() {
506 CALLSITE_DELEGATE_SETTER(setOnlyAccessesInaccessibleMemOrArgMem());
507 }
508
509 /// Determine if the call cannot return.
510 bool doesNotReturn() const {
511 CALLSITE_DELEGATE_GETTER(doesNotReturn());
512 }
513 void setDoesNotReturn() {
514 CALLSITE_DELEGATE_SETTER(setDoesNotReturn());
515 }
516
517 /// Determine if the call cannot unwind.
518 bool doesNotThrow() const {
519 CALLSITE_DELEGATE_GETTER(doesNotThrow());
520 }
521 void setDoesNotThrow() {
522 CALLSITE_DELEGATE_SETTER(setDoesNotThrow());
523 }
524
525 /// Determine if the call can be duplicated.
526 bool cannotDuplicate() const {
527 CALLSITE_DELEGATE_GETTER(cannotDuplicate());
528 }
529 void setCannotDuplicate() {
530 CALLSITE_DELEGATE_SETTER(setCannotDuplicate());
531 }
532
533 /// Determine if the call is convergent.
534 bool isConvergent() const {
535 CALLSITE_DELEGATE_GETTER(isConvergent());
536 }
537 void setConvergent() {
538 CALLSITE_DELEGATE_SETTER(setConvergent());
539 }
540 void setNotConvergent() {
541 CALLSITE_DELEGATE_SETTER(setNotConvergent());
542 }
543
544 unsigned getNumOperandBundles() const {
545 CALLSITE_DELEGATE_GETTER(getNumOperandBundles());
546 }
547
548 bool hasOperandBundles() const {
549 CALLSITE_DELEGATE_GETTER(hasOperandBundles());
550 }
551
552 unsigned getBundleOperandsStartIndex() const {
553 CALLSITE_DELEGATE_GETTER(getBundleOperandsStartIndex());
554 }
555
556 unsigned getBundleOperandsEndIndex() const {
557 CALLSITE_DELEGATE_GETTER(getBundleOperandsEndIndex());
558 }
559
560 unsigned getNumTotalBundleOperands() const {
561 CALLSITE_DELEGATE_GETTER(getNumTotalBundleOperands());
562 }
563
564 OperandBundleUse getOperandBundleAt(unsigned Index) const {
565 CALLSITE_DELEGATE_GETTER(getOperandBundleAt(Index));
566 }
567
568 Optional<OperandBundleUse> getOperandBundle(StringRef Name) const {
569 CALLSITE_DELEGATE_GETTER(getOperandBundle(Name));
570 }
571
572 Optional<OperandBundleUse> getOperandBundle(uint32_t ID) const {
573 CALLSITE_DELEGATE_GETTER(getOperandBundle(ID));
574 }
575
576 unsigned countOperandBundlesOfType(uint32_t ID) const {
577 CALLSITE_DELEGATE_GETTER(countOperandBundlesOfType(ID));
578 }
579
580 bool isBundleOperand(unsigned Idx) const {
581 CALLSITE_DELEGATE_GETTER(isBundleOperand(Idx));
582 }
583
584 IterTy arg_begin() const {
585 CALLSITE_DELEGATE_GETTER(arg_begin());
586 }
587
588 IterTy arg_end() const {
589 CALLSITE_DELEGATE_GETTER(arg_end());
590 }
591
592#undef CALLSITE_DELEGATE_GETTER
593#undef CALLSITE_DELEGATE_SETTER
594
595 void getOperandBundlesAsDefs(SmallVectorImpl<OperandBundleDef> &Defs) const {
596 // Since this is actually a getter that "looks like" a setter, don't use the
597 // above macros to avoid confusion.
598 cast<CallBase>(getInstruction())->getOperandBundlesAsDefs(Defs);
599 }
600
601 /// Determine whether this data operand is not captured.
602 bool doesNotCapture(unsigned OpNo) const {
603 return dataOperandHasImpliedAttr(OpNo + 1, Attribute::NoCapture);
604 }
605
606 /// Determine whether this argument is passed by value.
607 bool isByValArgument(unsigned ArgNo) const {
608 return paramHasAttr(ArgNo, Attribute::ByVal);
609 }
610
611 /// Determine whether this argument is passed in an alloca.
612 bool isInAllocaArgument(unsigned ArgNo) const {
613 return paramHasAttr(ArgNo, Attribute::InAlloca);
614 }
615
616 /// Determine whether this argument is passed by value or in an alloca.
617 bool isByValOrInAllocaArgument(unsigned ArgNo) const {
618 return paramHasAttr(ArgNo, Attribute::ByVal) ||
619 paramHasAttr(ArgNo, Attribute::InAlloca);
620 }
621
622 /// Determine if there are is an inalloca argument. Only the last argument can
623 /// have the inalloca attribute.
624 bool hasInAllocaArgument() const {
625 return !arg_empty() && paramHasAttr(arg_size() - 1, Attribute::InAlloca);
626 }
627
628 bool doesNotAccessMemory(unsigned OpNo) const {
629 return dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone);
630 }
631
632 bool onlyReadsMemory(unsigned OpNo) const {
633 return dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadOnly) ||
634 dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone);
635 }
636
637 bool doesNotReadMemory(unsigned OpNo) const {
638 return dataOperandHasImpliedAttr(OpNo + 1, Attribute::WriteOnly) ||
639 dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone);
640 }
641
642 /// Return true if the return value is known to be not null.
643 /// This may be because it has the nonnull attribute, or because at least
644 /// one byte is dereferenceable and the pointer is in addrspace(0).
645 bool isReturnNonNull() const {
646 if (hasRetAttr(Attribute::NonNull))
647 return true;
648 else if (getDereferenceableBytes(AttributeList::ReturnIndex) > 0 &&
649 !NullPointerIsDefined(getCaller(),
650 getType()->getPointerAddressSpace()))
651 return true;
652
653 return false;
654 }
655
656 /// Returns true if this CallSite passes the given Value* as an argument to
657 /// the called function.
658 bool hasArgument(const Value *Arg) const {
659 for (arg_iterator AI = this->arg_begin(), E = this->arg_end(); AI != E;
660 ++AI)
661 if (AI->get() == Arg)
662 return true;
663 return false;
664 }
665
666private:
667 IterTy getCallee() const {
668 return cast<CallBase>(getInstruction())->op_end() - 1;
669 }
670};
671
672class CallSite : public CallSiteBase<Function, BasicBlock, Value, User, Use,
673 Instruction, CallInst, InvokeInst,
674 CallBrInst, User::op_iterator> {
675public:
676 CallSite() = default;
677 CallSite(CallSiteBase B) : CallSiteBase(B) {}
678 CallSite(CallInst *CI) : CallSiteBase(CI) {}
679 CallSite(InvokeInst *II) : CallSiteBase(II) {}
680 CallSite(CallBrInst *CBI) : CallSiteBase(CBI) {}
681 explicit CallSite(Instruction *II) : CallSiteBase(II) {}
682 explicit CallSite(Value *V) : CallSiteBase(V) {}
683
684 bool operator==(const CallSite &CS) const { return I == CS.I; }
685 bool operator!=(const CallSite &CS) const { return I != CS.I; }
686 bool operator<(const CallSite &CS) const {
687 return getInstruction() < CS.getInstruction();
688 }
689
690private:
691 friend struct DenseMapInfo<CallSite>;
692
693 User::op_iterator getCallee() const;
694};
695
696/// AbstractCallSite
697///
698/// An abstract call site is a wrapper that allows to treat direct,
699/// indirect, and callback calls the same. If an abstract call site
700/// represents a direct or indirect call site it behaves like a stripped
701/// down version of a normal call site object. The abstract call site can
702/// also represent a callback call, thus the fact that the initially
703/// called function (=broker) may invoke a third one (=callback callee).
704/// In this case, the abstract call site hides the middle man, hence the
705/// broker function. The result is a representation of the callback call,
706/// inside the broker, but in the context of the original call to the broker.
707///
708/// There are up to three functions involved when we talk about callback call
709/// sites. The caller (1), which invokes the broker function. The broker
710/// function (2), that will invoke the callee zero or more times. And finally
711/// the callee (3), which is the target of the callback call.
712///
713/// The abstract call site will handle the mapping from parameters to arguments
714/// depending on the semantic of the broker function. However, it is important
715/// to note that the mapping is often partial. Thus, some arguments of the
716/// call/invoke instruction are mapped to parameters of the callee while others
717/// are not.
718class AbstractCallSite {
719public:
720
721 /// The encoding of a callback with regards to the underlying instruction.
722 struct CallbackInfo {
723
724 /// For direct/indirect calls the parameter encoding is empty. If it is not,
725 /// the abstract call site represents a callback. In that case, the first
726 /// element of the encoding vector represents which argument of the call
727 /// site CS is the callback callee. The remaining elements map parameters
728 /// (identified by their position) to the arguments that will be passed
729 /// through (also identified by position but in the call site instruction).
730 ///
731 /// NOTE that we use LLVM argument numbers (starting at 0) and not
732 /// clang/source argument numbers (starting at 1). The -1 entries represent
733 /// unknown values that are passed to the callee.
734 using ParameterEncodingTy = SmallVector<int, 0>;
735 ParameterEncodingTy ParameterEncoding;
736
737 };
738
739private:
740
741 /// The underlying call site:
742 /// caller -> callee, if this is a direct or indirect call site
743 /// caller -> broker function, if this is a callback call site
744 CallSite CS;
745
746 /// The encoding of a callback with regards to the underlying instruction.
747 CallbackInfo CI;
748
749public:
750 /// Sole constructor for abstract call sites (ACS).
751 ///
752 /// An abstract call site can only be constructed through a llvm::Use because
753 /// each operand (=use) of an instruction could potentially be a different
754 /// abstract call site. Furthermore, even if the value of the llvm::Use is the
755 /// same, and the user is as well, the abstract call sites might not be.
756 ///
757 /// If a use is not associated with an abstract call site the constructed ACS
758 /// will evaluate to false if converted to a boolean.
759 ///
760 /// If the use is the callee use of a call or invoke instruction, the
761 /// constructed abstract call site will behave as a llvm::CallSite would.
762 ///
763 /// If the use is not a callee use of a call or invoke instruction, the
764 /// callback metadata is used to determine the argument <-> parameter mapping
765 /// as well as the callee of the abstract call site.
766 AbstractCallSite(const Use *U);
767
768 /// Conversion operator to conveniently check for a valid/initialized ACS.
769 explicit operator bool() const { return (bool)CS; }
770
771 /// Return the underlying instruction.
772 Instruction *getInstruction() const { return CS.getInstruction(); }
773
774 /// Return the call site abstraction for the underlying instruction.
775 CallSite getCallSite() const { return CS; }
776
777 /// Return true if this ACS represents a direct call.
778 bool isDirectCall() const {
779 return !isCallbackCall() && !CS.isIndirectCall();
780 }
781
782 /// Return true if this ACS represents an indirect call.
783 bool isIndirectCall() const {
784 return !isCallbackCall() && CS.isIndirectCall();
785 }
786
787 /// Return true if this ACS represents a callback call.
788 bool isCallbackCall() const {
789 // For a callback call site the callee is ALWAYS stored first in the
790 // transitive values vector. Thus, a non-empty vector indicates a callback.
791 return !CI.ParameterEncoding.empty();
792 }
793
794 /// Return true if @p UI is the use that defines the callee of this ACS.
795 bool isCallee(Value::const_user_iterator UI) const {
796 return isCallee(&UI.getUse());
797 }
798
799 /// Return true if @p U is the use that defines the callee of this ACS.
800 bool isCallee(const Use *U) const {
801 if (isDirectCall())
802 return CS.isCallee(U);
803
804 assert(!CI.ParameterEncoding.empty() &&((!CI.ParameterEncoding.empty() && "Callback without parameter encoding!"
) ? static_cast<void> (0) : __assert_fail ("!CI.ParameterEncoding.empty() && \"Callback without parameter encoding!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/CallSite.h"
, 805, __PRETTY_FUNCTION__))
805 "Callback without parameter encoding!")((!CI.ParameterEncoding.empty() && "Callback without parameter encoding!"
) ? static_cast<void> (0) : __assert_fail ("!CI.ParameterEncoding.empty() && \"Callback without parameter encoding!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/CallSite.h"
, 805, __PRETTY_FUNCTION__))
;
806
807 return (int)CS.getArgumentNo(U) == CI.ParameterEncoding[0];
808 }
809
810 /// Return the number of parameters of the callee.
811 unsigned getNumArgOperands() const {
812 if (isDirectCall())
813 return CS.getNumArgOperands();
814 // Subtract 1 for the callee encoding.
815 return CI.ParameterEncoding.size() - 1;
816 }
817
818 /// Return the operand index of the underlying instruction associated with @p
819 /// Arg.
820 int getCallArgOperandNo(Argument &Arg) const {
821 return getCallArgOperandNo(Arg.getArgNo());
822 }
823
824 /// Return the operand index of the underlying instruction associated with
825 /// the function parameter number @p ArgNo or -1 if there is none.
826 int getCallArgOperandNo(unsigned ArgNo) const {
827 if (isDirectCall())
828 return ArgNo;
829 // Add 1 for the callee encoding.
830 return CI.ParameterEncoding[ArgNo + 1];
831 }
832
833 /// Return the operand of the underlying instruction associated with @p Arg.
834 Value *getCallArgOperand(Argument &Arg) const {
835 return getCallArgOperand(Arg.getArgNo());
836 }
837
838 /// Return the operand of the underlying instruction associated with the
839 /// function parameter number @p ArgNo or nullptr if there is none.
840 Value *getCallArgOperand(unsigned ArgNo) const {
841 if (isDirectCall())
842 return CS.getArgOperand(ArgNo);
843 // Add 1 for the callee encoding.
844 return CI.ParameterEncoding[ArgNo + 1] >= 0
845 ? CS.getArgOperand(CI.ParameterEncoding[ArgNo + 1])
846 : nullptr;
847 }
848
849 /// Return the operand index of the underlying instruction associated with the
850 /// callee of this ACS. Only valid for callback calls!
851 int getCallArgOperandNoForCallee() const {
852 assert(isCallbackCall())((isCallbackCall()) ? static_cast<void> (0) : __assert_fail
("isCallbackCall()", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/CallSite.h"
, 852, __PRETTY_FUNCTION__))
;
853 assert(CI.ParameterEncoding.size() && CI.ParameterEncoding[0] >= 0)((CI.ParameterEncoding.size() && CI.ParameterEncoding
[0] >= 0) ? static_cast<void> (0) : __assert_fail ("CI.ParameterEncoding.size() && CI.ParameterEncoding[0] >= 0"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/CallSite.h"
, 853, __PRETTY_FUNCTION__))
;
854 return CI.ParameterEncoding[0];
855 }
856
857 /// Return the use of the callee value in the underlying instruction. Only
858 /// valid for callback calls!
859 const Use &getCalleeUseForCallback() const {
860 int CalleeArgIdx = getCallArgOperandNoForCallee();
861 assert(CalleeArgIdx >= 0 &&((CalleeArgIdx >= 0 && unsigned(CalleeArgIdx) <
getInstruction()->getNumOperands()) ? static_cast<void
> (0) : __assert_fail ("CalleeArgIdx >= 0 && unsigned(CalleeArgIdx) < getInstruction()->getNumOperands()"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/CallSite.h"
, 862, __PRETTY_FUNCTION__))
862 unsigned(CalleeArgIdx) < getInstruction()->getNumOperands())((CalleeArgIdx >= 0 && unsigned(CalleeArgIdx) <
getInstruction()->getNumOperands()) ? static_cast<void
> (0) : __assert_fail ("CalleeArgIdx >= 0 && unsigned(CalleeArgIdx) < getInstruction()->getNumOperands()"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/CallSite.h"
, 862, __PRETTY_FUNCTION__))
;
863 return getInstruction()->getOperandUse(CalleeArgIdx);
864 }
865
866 /// Return the pointer to function that is being called.
867 Value *getCalledValue() const {
868 if (isDirectCall())
869 return CS.getCalledValue();
870 return CS.getArgOperand(getCallArgOperandNoForCallee());
871 }
872
873 /// Return the function being called if this is a direct call, otherwise
874 /// return null (if it's an indirect call).
875 Function *getCalledFunction() const {
876 Value *V = getCalledValue();
877 return V ? dyn_cast<Function>(V->stripPointerCasts()) : nullptr;
878 }
879};
880
881template <> struct DenseMapInfo<CallSite> {
882 using BaseInfo = DenseMapInfo<decltype(CallSite::I)>;
883
884 static CallSite getEmptyKey() {
885 CallSite CS;
886 CS.I = BaseInfo::getEmptyKey();
887 return CS;
888 }
889
890 static CallSite getTombstoneKey() {
891 CallSite CS;
892 CS.I = BaseInfo::getTombstoneKey();
893 return CS;
894 }
895
896 static unsigned getHashValue(const CallSite &CS) {
897 return BaseInfo::getHashValue(CS.I);
898 }
899
900 static bool isEqual(const CallSite &LHS, const CallSite &RHS) {
901 return LHS == RHS;
902 }
903};
904
905/// Establish a view to a call site for examination.
906class ImmutableCallSite : public CallSiteBase<> {
907public:
908 ImmutableCallSite() = default;
909 ImmutableCallSite(const CallInst *CI) : CallSiteBase(CI) {}
910 ImmutableCallSite(const InvokeInst *II) : CallSiteBase(II) {}
911 ImmutableCallSite(const CallBrInst *CBI) : CallSiteBase(CBI) {}
912 explicit ImmutableCallSite(const Instruction *II) : CallSiteBase(II) {}
913 explicit ImmutableCallSite(const Value *V) : CallSiteBase(V) {}
914 ImmutableCallSite(CallSite CS) : CallSiteBase(CS.getInstruction()) {}
915};
916
917} // end namespace llvm
918
919#endif // LLVM_IR_CALLSITE_H

/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/ADT/PointerIntPair.h

1//===- llvm/ADT/PointerIntPair.h - Pair for pointer and int -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the PointerIntPair class.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_ADT_POINTERINTPAIR_H
14#define LLVM_ADT_POINTERINTPAIR_H
15
16#include "llvm/Support/Compiler.h"
17#include "llvm/Support/PointerLikeTypeTraits.h"
18#include "llvm/Support/type_traits.h"
19#include <cassert>
20#include <cstdint>
21#include <limits>
22
23namespace llvm {
24
25template <typename T> struct DenseMapInfo;
26template <typename PointerT, unsigned IntBits, typename PtrTraits>
27struct PointerIntPairInfo;
28
29/// PointerIntPair - This class implements a pair of a pointer and small
30/// integer. It is designed to represent this in the space required by one
31/// pointer by bitmangling the integer into the low part of the pointer. This
32/// can only be done for small integers: typically up to 3 bits, but it depends
33/// on the number of bits available according to PointerLikeTypeTraits for the
34/// type.
35///
36/// Note that PointerIntPair always puts the IntVal part in the highest bits
37/// possible. For example, PointerIntPair<void*, 1, bool> will put the bit for
38/// the bool into bit #2, not bit #0, which allows the low two bits to be used
39/// for something else. For example, this allows:
40/// PointerIntPair<PointerIntPair<void*, 1, bool>, 1, bool>
41/// ... and the two bools will land in different bits.
42template <typename PointerTy, unsigned IntBits, typename IntType = unsigned,
43 typename PtrTraits = PointerLikeTypeTraits<PointerTy>,
44 typename Info = PointerIntPairInfo<PointerTy, IntBits, PtrTraits>>
45class PointerIntPair {
46 // Used by MSVC visualizer and generally helpful for debugging/visualizing.
47 using InfoTy = Info;
48 intptr_t Value = 0;
49
50public:
51 constexpr PointerIntPair() = default;
52
53 PointerIntPair(PointerTy PtrVal, IntType IntVal) {
54 setPointerAndInt(PtrVal, IntVal);
55 }
56
57 explicit PointerIntPair(PointerTy PtrVal) { initWithPointer(PtrVal); }
58
59 PointerTy getPointer() const { return Info::getPointer(Value); }
14
Calling 'PointerIntPairInfo::getPointer'
22
Returning from 'PointerIntPairInfo::getPointer'
23
Returning null pointer, which participates in a condition later
60
61 IntType getInt() const { return (IntType)Info::getInt(Value); }
62
63 void setPointer(PointerTy PtrVal) LLVM_LVALUE_FUNCTION& {
64 Value = Info::updatePointer(Value, PtrVal);
65 }
66
67 void setInt(IntType IntVal) LLVM_LVALUE_FUNCTION& {
68 Value = Info::updateInt(Value, static_cast<intptr_t>(IntVal));
69 }
70
71 void initWithPointer(PointerTy PtrVal) LLVM_LVALUE_FUNCTION& {
72 Value = Info::updatePointer(0, PtrVal);
73 }
74
75 void setPointerAndInt(PointerTy PtrVal, IntType IntVal) LLVM_LVALUE_FUNCTION& {
76 Value = Info::updateInt(Info::updatePointer(0, PtrVal),
77 static_cast<intptr_t>(IntVal));
78 }
79
80 PointerTy const *getAddrOfPointer() const {
81 return const_cast<PointerIntPair *>(this)->getAddrOfPointer();
82 }
83
84 PointerTy *getAddrOfPointer() {
85 assert(Value == reinterpret_cast<intptr_t>(getPointer()) &&((Value == reinterpret_cast<intptr_t>(getPointer()) &&
"Can only return the address if IntBits is cleared and " "PtrTraits doesn't change the pointer"
) ? static_cast<void> (0) : __assert_fail ("Value == reinterpret_cast<intptr_t>(getPointer()) && \"Can only return the address if IntBits is cleared and \" \"PtrTraits doesn't change the pointer\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/ADT/PointerIntPair.h"
, 87, __PRETTY_FUNCTION__))
86 "Can only return the address if IntBits is cleared and "((Value == reinterpret_cast<intptr_t>(getPointer()) &&
"Can only return the address if IntBits is cleared and " "PtrTraits doesn't change the pointer"
) ? static_cast<void> (0) : __assert_fail ("Value == reinterpret_cast<intptr_t>(getPointer()) && \"Can only return the address if IntBits is cleared and \" \"PtrTraits doesn't change the pointer\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/ADT/PointerIntPair.h"
, 87, __PRETTY_FUNCTION__))
87 "PtrTraits doesn't change the pointer")((Value == reinterpret_cast<intptr_t>(getPointer()) &&
"Can only return the address if IntBits is cleared and " "PtrTraits doesn't change the pointer"
) ? static_cast<void> (0) : __assert_fail ("Value == reinterpret_cast<intptr_t>(getPointer()) && \"Can only return the address if IntBits is cleared and \" \"PtrTraits doesn't change the pointer\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/ADT/PointerIntPair.h"
, 87, __PRETTY_FUNCTION__))
;
88 return reinterpret_cast<PointerTy *>(&Value);
89 }
90
91 void *getOpaqueValue() const { return reinterpret_cast<void *>(Value); }
92
93 void setFromOpaqueValue(void *Val) LLVM_LVALUE_FUNCTION& {
94 Value = reinterpret_cast<intptr_t>(Val);
95 }
96
97 static PointerIntPair getFromOpaqueValue(void *V) {
98 PointerIntPair P;
99 P.setFromOpaqueValue(V);
100 return P;
101 }
102
103 // Allow PointerIntPairs to be created from const void * if and only if the
104 // pointer type could be created from a const void *.
105 static PointerIntPair getFromOpaqueValue(const void *V) {
106 (void)PtrTraits::getFromVoidPointer(V);
107 return getFromOpaqueValue(const_cast<void *>(V));
108 }
109
110 bool operator==(const PointerIntPair &RHS) const {
111 return Value == RHS.Value;
112 }
113
114 bool operator!=(const PointerIntPair &RHS) const {
115 return Value != RHS.Value;
116 }
117
118 bool operator<(const PointerIntPair &RHS) const { return Value < RHS.Value; }
119 bool operator>(const PointerIntPair &RHS) const { return Value > RHS.Value; }
120
121 bool operator<=(const PointerIntPair &RHS) const {
122 return Value <= RHS.Value;
123 }
124
125 bool operator>=(const PointerIntPair &RHS) const {
126 return Value >= RHS.Value;
127 }
128};
129
130// Specialize is_trivially_copyable to avoid limitation of llvm::is_trivially_copyable
131// when compiled with gcc 4.9.
132template <typename PointerTy, unsigned IntBits, typename IntType,
133 typename PtrTraits,
134 typename Info>
135struct is_trivially_copyable<PointerIntPair<PointerTy, IntBits, IntType, PtrTraits, Info>> : std::true_type {
136#ifdef HAVE_STD_IS_TRIVIALLY_COPYABLE
137 static_assert(std::is_trivially_copyable<PointerIntPair<PointerTy, IntBits, IntType, PtrTraits, Info>>::value,
138 "inconsistent behavior between llvm:: and std:: implementation of is_trivially_copyable");
139#endif
140};
141
142
143template <typename PointerT, unsigned IntBits, typename PtrTraits>
144struct PointerIntPairInfo {
145 static_assert(PtrTraits::NumLowBitsAvailable <
146 std::numeric_limits<uintptr_t>::digits,
147 "cannot use a pointer type that has all bits free");
148 static_assert(IntBits <= PtrTraits::NumLowBitsAvailable,
149 "PointerIntPair with integer size too large for pointer");
150 enum : uintptr_t {
151 /// PointerBitMask - The bits that come from the pointer.
152 PointerBitMask =
153 ~(uintptr_t)(((intptr_t)1 << PtrTraits::NumLowBitsAvailable) - 1),
154
155 /// IntShift - The number of low bits that we reserve for other uses, and
156 /// keep zero.
157 IntShift = (uintptr_t)PtrTraits::NumLowBitsAvailable - IntBits,
158
159 /// IntMask - This is the unshifted mask for valid bits of the int type.
160 IntMask = (uintptr_t)(((intptr_t)1 << IntBits) - 1),
161
162 // ShiftedIntMask - This is the bits for the integer shifted in place.
163 ShiftedIntMask = (uintptr_t)(IntMask << IntShift)
164 };
165
166 static PointerT getPointer(intptr_t Value) {
167 return PtrTraits::getFromVoidPointer(
15
Calling 'PointerLikeTypeTraits::getFromVoidPointer'
20
Returning from 'PointerLikeTypeTraits::getFromVoidPointer'
21
Returning null pointer, which participates in a condition later
168 reinterpret_cast<void *>(Value & PointerBitMask));
169 }
170
171 static intptr_t getInt(intptr_t Value) {
172 return (Value >> IntShift) & IntMask;
173 }
174
175 static intptr_t updatePointer(intptr_t OrigValue, PointerT Ptr) {
176 intptr_t PtrWord =
177 reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(Ptr));
178 assert((PtrWord & ~PointerBitMask) == 0 &&(((PtrWord & ~PointerBitMask) == 0 && "Pointer is not sufficiently aligned"
) ? static_cast<void> (0) : __assert_fail ("(PtrWord & ~PointerBitMask) == 0 && \"Pointer is not sufficiently aligned\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/ADT/PointerIntPair.h"
, 179, __PRETTY_FUNCTION__))
179 "Pointer is not sufficiently aligned")(((PtrWord & ~PointerBitMask) == 0 && "Pointer is not sufficiently aligned"
) ? static_cast<void> (0) : __assert_fail ("(PtrWord & ~PointerBitMask) == 0 && \"Pointer is not sufficiently aligned\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/ADT/PointerIntPair.h"
, 179, __PRETTY_FUNCTION__))
;
180 // Preserve all low bits, just update the pointer.
181 return PtrWord | (OrigValue & ~PointerBitMask);
182 }
183
184 static intptr_t updateInt(intptr_t OrigValue, intptr_t Int) {
185 intptr_t IntWord = static_cast<intptr_t>(Int);
186 assert((IntWord & ~IntMask) == 0 && "Integer too large for field")(((IntWord & ~IntMask) == 0 && "Integer too large for field"
) ? static_cast<void> (0) : __assert_fail ("(IntWord & ~IntMask) == 0 && \"Integer too large for field\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/ADT/PointerIntPair.h"
, 186, __PRETTY_FUNCTION__))
;
187
188 // Preserve all bits other than the ones we are updating.
189 return (OrigValue & ~ShiftedIntMask) | IntWord << IntShift;
190 }
191};
192
193// Provide specialization of DenseMapInfo for PointerIntPair.
194template <typename PointerTy, unsigned IntBits, typename IntType>
195struct DenseMapInfo<PointerIntPair<PointerTy, IntBits, IntType>> {
196 using Ty = PointerIntPair<PointerTy, IntBits, IntType>;
197
198 static Ty getEmptyKey() {
199 uintptr_t Val = static_cast<uintptr_t>(-1);
200 Val <<= PointerLikeTypeTraits<Ty>::NumLowBitsAvailable;
201 return Ty::getFromOpaqueValue(reinterpret_cast<void *>(Val));
202 }
203
204 static Ty getTombstoneKey() {
205 uintptr_t Val = static_cast<uintptr_t>(-2);
206 Val <<= PointerLikeTypeTraits<PointerTy>::NumLowBitsAvailable;
207 return Ty::getFromOpaqueValue(reinterpret_cast<void *>(Val));
208 }
209
210 static unsigned getHashValue(Ty V) {
211 uintptr_t IV = reinterpret_cast<uintptr_t>(V.getOpaqueValue());
212 return unsigned(IV) ^ unsigned(IV >> 9);
213 }
214
215 static bool isEqual(const Ty &LHS, const Ty &RHS) { return LHS == RHS; }
216};
217
218// Teach SmallPtrSet that PointerIntPair is "basically a pointer".
219template <typename PointerTy, unsigned IntBits, typename IntType,
220 typename PtrTraits>
221struct PointerLikeTypeTraits<
222 PointerIntPair<PointerTy, IntBits, IntType, PtrTraits>> {
223 static inline void *
224 getAsVoidPointer(const PointerIntPair<PointerTy, IntBits, IntType> &P) {
225 return P.getOpaqueValue();
226 }
227
228 static inline PointerIntPair<PointerTy, IntBits, IntType>
229 getFromVoidPointer(void *P) {
230 return PointerIntPair<PointerTy, IntBits, IntType>::getFromOpaqueValue(P);
231 }
232
233 static inline PointerIntPair<PointerTy, IntBits, IntType>
234 getFromVoidPointer(const void *P) {
235 return PointerIntPair<PointerTy, IntBits, IntType>::getFromOpaqueValue(P);
236 }
237
238 enum { NumLowBitsAvailable = PtrTraits::NumLowBitsAvailable - IntBits };
239};
240
241} // end namespace llvm
242
243#endif // LLVM_ADT_POINTERINTPAIR_H

/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/Support/PointerLikeTypeTraits.h

1//===- llvm/Support/PointerLikeTypeTraits.h - Pointer Traits ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the PointerLikeTypeTraits class. This allows data
10// structures to reason about pointers and other things that are pointer sized.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_SUPPORT_POINTERLIKETYPETRAITS_H
15#define LLVM_SUPPORT_POINTERLIKETYPETRAITS_H
16
17#include "llvm/Support/DataTypes.h"
18#include <assert.h>
19#include <type_traits>
20
21namespace llvm {
22
23/// A traits type that is used to handle pointer types and things that are just
24/// wrappers for pointers as a uniform entity.
25template <typename T> struct PointerLikeTypeTraits;
26
27namespace detail {
28/// A tiny meta function to compute the log2 of a compile time constant.
29template <size_t N>
30struct ConstantLog2
31 : std::integral_constant<size_t, ConstantLog2<N / 2>::value + 1> {};
32template <> struct ConstantLog2<1> : std::integral_constant<size_t, 0> {};
33
34// Provide a trait to check if T is pointer-like.
35template <typename T, typename U = void> struct HasPointerLikeTypeTraits {
36 static const bool value = false;
37};
38
39// sizeof(T) is valid only for a complete T.
40template <typename T> struct HasPointerLikeTypeTraits<
41 T, decltype((sizeof(PointerLikeTypeTraits<T>) + sizeof(T)), void())> {
42 static const bool value = true;
43};
44
45template <typename T> struct IsPointerLike {
46 static const bool value = HasPointerLikeTypeTraits<T>::value;
47};
48
49template <typename T> struct IsPointerLike<T *> {
50 static const bool value = true;
51};
52} // namespace detail
53
54// Provide PointerLikeTypeTraits for non-cvr pointers.
55template <typename T> struct PointerLikeTypeTraits<T *> {
56 static inline void *getAsVoidPointer(T *P) { return P; }
57 static inline T *getFromVoidPointer(void *P) { return static_cast<T *>(P); }
17
Returning null pointer (loaded from 'P'), which participates in a condition later
58
59 enum { NumLowBitsAvailable = detail::ConstantLog2<alignof(T)>::value };
60};
61
62template <> struct PointerLikeTypeTraits<void *> {
63 static inline void *getAsVoidPointer(void *P) { return P; }
64 static inline void *getFromVoidPointer(void *P) { return P; }
65
66 /// Note, we assume here that void* is related to raw malloc'ed memory and
67 /// that malloc returns objects at least 4-byte aligned. However, this may be
68 /// wrong, or pointers may be from something other than malloc. In this case,
69 /// you should specify a real typed pointer or avoid this template.
70 ///
71 /// All clients should use assertions to do a run-time check to ensure that
72 /// this is actually true.
73 enum { NumLowBitsAvailable = 2 };
74};
75
76// Provide PointerLikeTypeTraits for const things.
77template <typename T> struct PointerLikeTypeTraits<const T> {
78 typedef PointerLikeTypeTraits<T> NonConst;
79
80 static inline const void *getAsVoidPointer(const T P) {
81 return NonConst::getAsVoidPointer(P);
82 }
83 static inline const T getFromVoidPointer(const void *P) {
84 return NonConst::getFromVoidPointer(const_cast<void *>(P));
85 }
86 enum { NumLowBitsAvailable = NonConst::NumLowBitsAvailable };
87};
88
89// Provide PointerLikeTypeTraits for const pointers.
90template <typename T> struct PointerLikeTypeTraits<const T *> {
91 typedef PointerLikeTypeTraits<T *> NonConst;
92
93 static inline const void *getAsVoidPointer(const T *P) {
94 return NonConst::getAsVoidPointer(const_cast<T *>(P));
95 }
96 static inline const T *getFromVoidPointer(const void *P) {
97 return NonConst::getFromVoidPointer(const_cast<void *>(P));
16
Calling 'PointerLikeTypeTraits::getFromVoidPointer'
18
Returning from 'PointerLikeTypeTraits::getFromVoidPointer'
19
Returning null pointer, which participates in a condition later
98 }
99 enum { NumLowBitsAvailable = NonConst::NumLowBitsAvailable };
100};
101
102// Provide PointerLikeTypeTraits for uintptr_t.
103template <> struct PointerLikeTypeTraits<uintptr_t> {
104 static inline void *getAsVoidPointer(uintptr_t P) {
105 return reinterpret_cast<void *>(P);
106 }
107 static inline uintptr_t getFromVoidPointer(void *P) {
108 return reinterpret_cast<uintptr_t>(P);
109 }
110 // No bits are available!
111 enum { NumLowBitsAvailable = 0 };
112};
113
114/// Provide suitable custom traits struct for function pointers.
115///
116/// Function pointers can't be directly given these traits as functions can't
117/// have their alignment computed with `alignof` and we need different casting.
118///
119/// To rely on higher alignment for a specialized use, you can provide a
120/// customized form of this template explicitly with higher alignment, and
121/// potentially use alignment attributes on functions to satisfy that.
122template <int Alignment, typename FunctionPointerT>
123struct FunctionPointerLikeTypeTraits {
124 enum { NumLowBitsAvailable = detail::ConstantLog2<Alignment>::value };
125 static inline void *getAsVoidPointer(FunctionPointerT P) {
126 assert((reinterpret_cast<uintptr_t>(P) &(((reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1
<< NumLowBitsAvailable)) == 0 && "Alignment not satisfied for an actual function pointer!"
) ? static_cast<void> (0) : __assert_fail ("(reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 && \"Alignment not satisfied for an actual function pointer!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/Support/PointerLikeTypeTraits.h"
, 128, __PRETTY_FUNCTION__))
127 ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 &&(((reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1
<< NumLowBitsAvailable)) == 0 && "Alignment not satisfied for an actual function pointer!"
) ? static_cast<void> (0) : __assert_fail ("(reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 && \"Alignment not satisfied for an actual function pointer!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/Support/PointerLikeTypeTraits.h"
, 128, __PRETTY_FUNCTION__))
128 "Alignment not satisfied for an actual function pointer!")(((reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1
<< NumLowBitsAvailable)) == 0 && "Alignment not satisfied for an actual function pointer!"
) ? static_cast<void> (0) : __assert_fail ("(reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 && \"Alignment not satisfied for an actual function pointer!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/Support/PointerLikeTypeTraits.h"
, 128, __PRETTY_FUNCTION__))
;
129 return reinterpret_cast<void *>(P);
130 }
131 static inline FunctionPointerT getFromVoidPointer(void *P) {
132 return reinterpret_cast<FunctionPointerT>(P);
133 }
134};
135
136/// Provide a default specialization for function pointers that assumes 4-byte
137/// alignment.
138///
139/// We assume here that functions used with this are always at least 4-byte
140/// aligned. This means that, for example, thumb functions won't work or systems
141/// with weird unaligned function pointers won't work. But all practical systems
142/// we support satisfy this requirement.
143template <typename ReturnT, typename... ParamTs>
144struct PointerLikeTypeTraits<ReturnT (*)(ParamTs...)>
145 : FunctionPointerLikeTypeTraits<4, ReturnT (*)(ParamTs...)> {};
146
147} // end namespace llvm
148
149#endif

/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/Operator.h

1//===-- llvm/Operator.h - Operator utility subclass -------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines various classes for working with Instructions and
10// ConstantExprs.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_IR_OPERATOR_H
15#define LLVM_IR_OPERATOR_H
16
17#include "llvm/ADT/None.h"
18#include "llvm/ADT/Optional.h"
19#include "llvm/IR/Constants.h"
20#include "llvm/IR/Instruction.h"
21#include "llvm/IR/Type.h"
22#include "llvm/IR/Value.h"
23#include "llvm/Support/Casting.h"
24#include <cstddef>
25
26namespace llvm {
27
28/// This is a utility class that provides an abstraction for the common
29/// functionality between Instructions and ConstantExprs.
30class Operator : public User {
31public:
32 // The Operator class is intended to be used as a utility, and is never itself
33 // instantiated.
34 Operator() = delete;
35 ~Operator() = delete;
36
37 void *operator new(size_t s) = delete;
38
39 /// Return the opcode for this Instruction or ConstantExpr.
40 unsigned getOpcode() const {
41 if (const Instruction *I = dyn_cast<Instruction>(this))
42 return I->getOpcode();
43 return cast<ConstantExpr>(this)->getOpcode();
44 }
45
46 /// If V is an Instruction or ConstantExpr, return its opcode.
47 /// Otherwise return UserOp1.
48 static unsigned getOpcode(const Value *V) {
49 if (const Instruction *I
33.1
'I' is null
33.1
'I' is null
33.1
'I' is null
33.1
'I' is null
33.1
'I' is null
33.1
'I' is null
33.1
'I' is null
= dyn_cast<Instruction>(V))
33
Assuming 'V' is not a 'Instruction'
34
Taking false branch
50 return I->getOpcode();
51 if (const ConstantExpr *CE
35.1
'CE' is non-null
35.1
'CE' is non-null
35.1
'CE' is non-null
35.1
'CE' is non-null
35.1
'CE' is non-null
35.1
'CE' is non-null
35.1
'CE' is non-null
= dyn_cast<ConstantExpr>(V))
35
Assuming 'V' is a 'ConstantExpr'
36
Taking true branch
52 return CE->getOpcode();
37
Returning value, which participates in a condition later
53 return Instruction::UserOp1;
54 }
55
56 static bool classof(const Instruction *) { return true; }
57 static bool classof(const ConstantExpr *) { return true; }
58 static bool classof(const Value *V) {
59 return isa<Instruction>(V) || isa<ConstantExpr>(V);
60 }
61};
62
63/// Utility class for integer operators which may exhibit overflow - Add, Sub,
64/// Mul, and Shl. It does not include SDiv, despite that operator having the
65/// potential for overflow.
66class OverflowingBinaryOperator : public Operator {
67public:
68 enum {
69 NoUnsignedWrap = (1 << 0),
70 NoSignedWrap = (1 << 1)
71 };
72
73private:
74 friend class Instruction;
75 friend class ConstantExpr;
76
77 void setHasNoUnsignedWrap(bool B) {
78 SubclassOptionalData =
79 (SubclassOptionalData & ~NoUnsignedWrap) | (B * NoUnsignedWrap);
80 }
81 void setHasNoSignedWrap(bool B) {
82 SubclassOptionalData =
83 (SubclassOptionalData & ~NoSignedWrap) | (B * NoSignedWrap);
84 }
85
86public:
87 /// Test whether this operation is known to never
88 /// undergo unsigned overflow, aka the nuw property.
89 bool hasNoUnsignedWrap() const {
90 return SubclassOptionalData & NoUnsignedWrap;
91 }
92
93 /// Test whether this operation is known to never
94 /// undergo signed overflow, aka the nsw property.
95 bool hasNoSignedWrap() const {
96 return (SubclassOptionalData & NoSignedWrap) != 0;
97 }
98
99 static bool classof(const Instruction *I) {
100 return I->getOpcode() == Instruction::Add ||
101 I->getOpcode() == Instruction::Sub ||
102 I->getOpcode() == Instruction::Mul ||
103 I->getOpcode() == Instruction::Shl;
104 }
105 static bool classof(const ConstantExpr *CE) {
106 return CE->getOpcode() == Instruction::Add ||
107 CE->getOpcode() == Instruction::Sub ||
108 CE->getOpcode() == Instruction::Mul ||
109 CE->getOpcode() == Instruction::Shl;
110 }
111 static bool classof(const Value *V) {
112 return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
113 (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
114 }
115};
116
117/// A udiv or sdiv instruction, which can be marked as "exact",
118/// indicating that no bits are destroyed.
119class PossiblyExactOperator : public Operator {
120public:
121 enum {
122 IsExact = (1 << 0)
123 };
124
125private:
126 friend class Instruction;
127 friend class ConstantExpr;
128
129 void setIsExact(bool B) {
130 SubclassOptionalData = (SubclassOptionalData & ~IsExact) | (B * IsExact);
131 }
132
133public:
134 /// Test whether this division is known to be exact, with zero remainder.
135 bool isExact() const {
136 return SubclassOptionalData & IsExact;
137 }
138
139 static bool isPossiblyExactOpcode(unsigned OpC) {
140 return OpC == Instruction::SDiv ||
141 OpC == Instruction::UDiv ||
142 OpC == Instruction::AShr ||
143 OpC == Instruction::LShr;
144 }
145
146 static bool classof(const ConstantExpr *CE) {
147 return isPossiblyExactOpcode(CE->getOpcode());
148 }
149 static bool classof(const Instruction *I) {
150 return isPossiblyExactOpcode(I->getOpcode());
151 }
152 static bool classof(const Value *V) {
153 return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
154 (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
155 }
156};
157
158/// Convenience struct for specifying and reasoning about fast-math flags.
159class FastMathFlags {
160private:
161 friend class FPMathOperator;
162
163 unsigned Flags = 0;
164
165 FastMathFlags(unsigned F) {
166 // If all 7 bits are set, turn this into -1. If the number of bits grows,
167 // this must be updated. This is intended to provide some forward binary
168 // compatibility insurance for the meaning of 'fast' in case bits are added.
169 if (F == 0x7F) Flags = ~0U;
170 else Flags = F;
171 }
172
173public:
174 // This is how the bits are used in Value::SubclassOptionalData so they
175 // should fit there too.
176 // WARNING: We're out of space. SubclassOptionalData only has 7 bits. New
177 // functionality will require a change in how this information is stored.
178 enum {
179 AllowReassoc = (1 << 0),
180 NoNaNs = (1 << 1),
181 NoInfs = (1 << 2),
182 NoSignedZeros = (1 << 3),
183 AllowReciprocal = (1 << 4),
184 AllowContract = (1 << 5),
185 ApproxFunc = (1 << 6)
186 };
187
188 FastMathFlags() = default;
189
190 static FastMathFlags getFast() {
191 FastMathFlags FMF;
192 FMF.setFast();
193 return FMF;
194 }
195
196 bool any() const { return Flags != 0; }
197 bool none() const { return Flags == 0; }
198 bool all() const { return Flags == ~0U; }
199
200 void clear() { Flags = 0; }
201 void set() { Flags = ~0U; }
202
203 /// Flag queries
204 bool allowReassoc() const { return 0 != (Flags & AllowReassoc); }
205 bool noNaNs() const { return 0 != (Flags & NoNaNs); }
206 bool noInfs() const { return 0 != (Flags & NoInfs); }
207 bool noSignedZeros() const { return 0 != (Flags & NoSignedZeros); }
208 bool allowReciprocal() const { return 0 != (Flags & AllowReciprocal); }
209 bool allowContract() const { return 0 != (Flags & AllowContract); }
210 bool approxFunc() const { return 0 != (Flags & ApproxFunc); }
211 /// 'Fast' means all bits are set.
212 bool isFast() const { return all(); }
213
214 /// Flag setters
215 void setAllowReassoc(bool B = true) {
216 Flags = (Flags & ~AllowReassoc) | B * AllowReassoc;
217 }
218 void setNoNaNs(bool B = true) {
219 Flags = (Flags & ~NoNaNs) | B * NoNaNs;
220 }
221 void setNoInfs(bool B = true) {
222 Flags = (Flags & ~NoInfs) | B * NoInfs;
223 }
224 void setNoSignedZeros(bool B = true) {
225 Flags = (Flags & ~NoSignedZeros) | B * NoSignedZeros;
226 }
227 void setAllowReciprocal(bool B = true) {
228 Flags = (Flags & ~AllowReciprocal) | B * AllowReciprocal;
229 }
230 void setAllowContract(bool B = true) {
231 Flags = (Flags & ~AllowContract) | B * AllowContract;
232 }
233 void setApproxFunc(bool B = true) {
234 Flags = (Flags & ~ApproxFunc) | B * ApproxFunc;
235 }
236 void setFast(bool B = true) { B ? set() : clear(); }
237
238 void operator&=(const FastMathFlags &OtherFlags) {
239 Flags &= OtherFlags.Flags;
240 }
241};
242
243/// Utility class for floating point operations which can have
244/// information about relaxed accuracy requirements attached to them.
245class FPMathOperator : public Operator {
246private:
247 friend class Instruction;
248
249 /// 'Fast' means all bits are set.
250 void setFast(bool B) {
251 setHasAllowReassoc(B);
252 setHasNoNaNs(B);
253 setHasNoInfs(B);
254 setHasNoSignedZeros(B);
255 setHasAllowReciprocal(B);
256 setHasAllowContract(B);
257 setHasApproxFunc(B);
258 }
259
260 void setHasAllowReassoc(bool B) {
261 SubclassOptionalData =
262 (SubclassOptionalData & ~FastMathFlags::AllowReassoc) |
263 (B * FastMathFlags::AllowReassoc);
264 }
265
266 void setHasNoNaNs(bool B) {
267 SubclassOptionalData =
268 (SubclassOptionalData & ~FastMathFlags::NoNaNs) |
269 (B * FastMathFlags::NoNaNs);
270 }
271
272 void setHasNoInfs(bool B) {
273 SubclassOptionalData =
274 (SubclassOptionalData & ~FastMathFlags::NoInfs) |
275 (B * FastMathFlags::NoInfs);
276 }
277
278 void setHasNoSignedZeros(bool B) {
279 SubclassOptionalData =
280 (SubclassOptionalData & ~FastMathFlags::NoSignedZeros) |
281 (B * FastMathFlags::NoSignedZeros);
282 }
283
284 void setHasAllowReciprocal(bool B) {
285 SubclassOptionalData =
286 (SubclassOptionalData & ~FastMathFlags::AllowReciprocal) |
287 (B * FastMathFlags::AllowReciprocal);
288 }
289
290 void setHasAllowContract(bool B) {
291 SubclassOptionalData =
292 (SubclassOptionalData & ~FastMathFlags::AllowContract) |
293 (B * FastMathFlags::AllowContract);
294 }
295
296 void setHasApproxFunc(bool B) {
297 SubclassOptionalData =
298 (SubclassOptionalData & ~FastMathFlags::ApproxFunc) |
299 (B * FastMathFlags::ApproxFunc);
300 }
301
302 /// Convenience function for setting multiple fast-math flags.
303 /// FMF is a mask of the bits to set.
304 void setFastMathFlags(FastMathFlags FMF) {
305 SubclassOptionalData |= FMF.Flags;
306 }
307
308 /// Convenience function for copying all fast-math flags.
309 /// All values in FMF are transferred to this operator.
310 void copyFastMathFlags(FastMathFlags FMF) {
311 SubclassOptionalData = FMF.Flags;
312 }
313
314public:
315 /// Test if this operation allows all non-strict floating-point transforms.
316 bool isFast() const {
317 return ((SubclassOptionalData & FastMathFlags::AllowReassoc) != 0 &&
318 (SubclassOptionalData & FastMathFlags::NoNaNs) != 0 &&
319 (SubclassOptionalData & FastMathFlags::NoInfs) != 0 &&
320 (SubclassOptionalData & FastMathFlags::NoSignedZeros) != 0 &&
321 (SubclassOptionalData & FastMathFlags::AllowReciprocal) != 0 &&
322 (SubclassOptionalData & FastMathFlags::AllowContract) != 0 &&
323 (SubclassOptionalData & FastMathFlags::ApproxFunc) != 0);
324 }
325
326 /// Test if this operation may be simplified with reassociative transforms.
327 bool hasAllowReassoc() const {
328 return (SubclassOptionalData & FastMathFlags::AllowReassoc) != 0;
329 }
330
331 /// Test if this operation's arguments and results are assumed not-NaN.
332 bool hasNoNaNs() const {
333 return (SubclassOptionalData & FastMathFlags::NoNaNs) != 0;
334 }
335
336 /// Test if this operation's arguments and results are assumed not-infinite.
337 bool hasNoInfs() const {
338 return (SubclassOptionalData & FastMathFlags::NoInfs) != 0;
339 }
340
341 /// Test if this operation can ignore the sign of zero.
342 bool hasNoSignedZeros() const {
343 return (SubclassOptionalData & FastMathFlags::NoSignedZeros) != 0;
344 }
345
346 /// Test if this operation can use reciprocal multiply instead of division.
347 bool hasAllowReciprocal() const {
348 return (SubclassOptionalData & FastMathFlags::AllowReciprocal) != 0;
349 }
350
351 /// Test if this operation can be floating-point contracted (FMA).
352 bool hasAllowContract() const {
353 return (SubclassOptionalData & FastMathFlags::AllowContract) != 0;
354 }
355
356 /// Test if this operation allows approximations of math library functions or
357 /// intrinsics.
358 bool hasApproxFunc() const {
359 return (SubclassOptionalData & FastMathFlags::ApproxFunc) != 0;
360 }
361
362 /// Convenience function for getting all the fast-math flags
363 FastMathFlags getFastMathFlags() const {
364 return FastMathFlags(SubclassOptionalData);
365 }
366
367 /// Get the maximum error permitted by this operation in ULPs. An accuracy of
368 /// 0.0 means that the operation should be performed with the default
369 /// precision.
370 float getFPAccuracy() const;
371
372 static bool classof(const Value *V) {
373 unsigned Opcode;
374 if (auto *I = dyn_cast<Instruction>(V))
375 Opcode = I->getOpcode();
376 else if (auto *CE = dyn_cast<ConstantExpr>(V))
377 Opcode = CE->getOpcode();
378 else
379 return false;
380
381 switch (Opcode) {
382 case Instruction::FNeg:
383 case Instruction::FAdd:
384 case Instruction::FSub:
385 case Instruction::FMul:
386 case Instruction::FDiv:
387 case Instruction::FRem:
388 // FIXME: To clean up and correct the semantics of fast-math-flags, FCmp
389 // should not be treated as a math op, but the other opcodes should.
390 // This would make things consistent with Select/PHI (FP value type
391 // determines whether they are math ops and, therefore, capable of
392 // having fast-math-flags).
393 case Instruction::FCmp:
394 return true;
395 case Instruction::PHI:
396 case Instruction::Select:
397 case Instruction::Call: {
398 Type *Ty = V->getType();
399 while (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty))
400 Ty = ArrTy->getElementType();
401 return Ty->isFPOrFPVectorTy();
402 }
403 default:
404 return false;
405 }
406 }
407};
408
409/// A helper template for defining operators for individual opcodes.
410template<typename SuperClass, unsigned Opc>
411class ConcreteOperator : public SuperClass {
412public:
413 static bool classof(const Instruction *I) {
414 return I->getOpcode() == Opc;
415 }
416 static bool classof(const ConstantExpr *CE) {
417 return CE->getOpcode() == Opc;
418 }
419 static bool classof(const Value *V) {
420 return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
421 (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
422 }
423};
424
425class AddOperator
426 : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Add> {
427};
428class SubOperator
429 : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Sub> {
430};
431class MulOperator
432 : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Mul> {
433};
434class ShlOperator
435 : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Shl> {
436};
437
438class SDivOperator
439 : public ConcreteOperator<PossiblyExactOperator, Instruction::SDiv> {
440};
441class UDivOperator
442 : public ConcreteOperator<PossiblyExactOperator, Instruction::UDiv> {
443};
444class AShrOperator
445 : public ConcreteOperator<PossiblyExactOperator, Instruction::AShr> {
446};
447class LShrOperator
448 : public ConcreteOperator<PossiblyExactOperator, Instruction::LShr> {
449};
450
451class ZExtOperator : public ConcreteOperator<Operator, Instruction::ZExt> {};
452
453class GEPOperator
454 : public ConcreteOperator<Operator, Instruction::GetElementPtr> {
455 friend class GetElementPtrInst;
456 friend class ConstantExpr;
457
458 enum {
459 IsInBounds = (1 << 0),
460 // InRangeIndex: bits 1-6
461 };
462
463 void setIsInBounds(bool B) {
464 SubclassOptionalData =
465 (SubclassOptionalData & ~IsInBounds) | (B * IsInBounds);
466 }
467
468public:
469 /// Test whether this is an inbounds GEP, as defined by LangRef.html.
470 bool isInBounds() const {
471 return SubclassOptionalData & IsInBounds;
472 }
473
474 /// Returns the offset of the index with an inrange attachment, or None if
475 /// none.
476 Optional<unsigned> getInRangeIndex() const {
477 if (SubclassOptionalData >> 1 == 0) return None;
478 return (SubclassOptionalData >> 1) - 1;
479 }
480
481 inline op_iterator idx_begin() { return op_begin()+1; }
482 inline const_op_iterator idx_begin() const { return op_begin()+1; }
483 inline op_iterator idx_end() { return op_end(); }
484 inline const_op_iterator idx_end() const { return op_end(); }
485
486 Value *getPointerOperand() {
487 return getOperand(0);
488 }
489 const Value *getPointerOperand() const {
490 return getOperand(0);
491 }
492 static unsigned getPointerOperandIndex() {
493 return 0U; // get index for modifying correct operand
494 }
495
496 /// Method to return the pointer operand as a PointerType.
497 Type *getPointerOperandType() const {
498 return getPointerOperand()->getType();
499 }
500
501 Type *getSourceElementType() const;
502 Type *getResultElementType() const;
503
504 /// Method to return the address space of the pointer operand.
505 unsigned getPointerAddressSpace() const {
506 return getPointerOperandType()->getPointerAddressSpace();
507 }
508
509 unsigned getNumIndices() const { // Note: always non-negative
510 return getNumOperands() - 1;
511 }
512
513 bool hasIndices() const {
514 return getNumOperands() > 1;
515 }
516
517 /// Return true if all of the indices of this GEP are zeros.
518 /// If so, the result pointer and the first operand have the same
519 /// value, just potentially different types.
520 bool hasAllZeroIndices() const {
521 for (const_op_iterator I = idx_begin(), E = idx_end(); I != E; ++I) {
522 if (ConstantInt *C = dyn_cast<ConstantInt>(I))
523 if (C->isZero())
524 continue;
525 return false;
526 }
527 return true;
528 }
529
530 /// Return true if all of the indices of this GEP are constant integers.
531 /// If so, the result pointer and the first operand have
532 /// a constant offset between them.
533 bool hasAllConstantIndices() const {
534 for (const_op_iterator I = idx_begin(), E = idx_end(); I != E; ++I) {
535 if (!isa<ConstantInt>(I))
536 return false;
537 }
538 return true;
539 }
540
541 unsigned countNonConstantIndices() const {
542 return count_if(make_range(idx_begin(), idx_end()), [](const Use& use) {
543 return !isa<ConstantInt>(*use);
544 });
545 }
546
547 /// Accumulate the constant address offset of this GEP if possible.
548 ///
549 /// This routine accepts an APInt into which it will accumulate the constant
550 /// offset of this GEP if the GEP is in fact constant. If the GEP is not
551 /// all-constant, it returns false and the value of the offset APInt is
552 /// undefined (it is *not* preserved!). The APInt passed into this routine
553 /// must be at exactly as wide as the IntPtr type for the address space of the
554 /// base GEP pointer.
555 bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset) const;
556};
557
558class PtrToIntOperator
559 : public ConcreteOperator<Operator, Instruction::PtrToInt> {
560 friend class PtrToInt;
561 friend class ConstantExpr;
562
563public:
564 Value *getPointerOperand() {
565 return getOperand(0);
566 }
567 const Value *getPointerOperand() const {
568 return getOperand(0);
569 }
570
571 static unsigned getPointerOperandIndex() {
572 return 0U; // get index for modifying correct operand
573 }
574
575 /// Method to return the pointer operand as a PointerType.
576 Type *getPointerOperandType() const {
577 return getPointerOperand()->getType();
578 }
579
580 /// Method to return the address space of the pointer operand.
581 unsigned getPointerAddressSpace() const {
582 return cast<PointerType>(getPointerOperandType())->getAddressSpace();
583 }
584};
585
586class BitCastOperator
587 : public ConcreteOperator<Operator, Instruction::BitCast> {
588 friend class BitCastInst;
589 friend class ConstantExpr;
590
591public:
592 Type *getSrcTy() const {
593 return getOperand(0)->getType();
594 }
595
596 Type *getDestTy() const {
597 return getType();
598 }
599};
600
601class FreezeOperator : public ConcreteOperator<Operator, Instruction::Freeze>
602{};
603
604} // end namespace llvm
605
606#endif // LLVM_IR_OPERATOR_H

/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h

1//===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file provides a helper that implements much of the TTI interface in
11/// terms of the target-independent code generator and TargetLowering
12/// interfaces.
13//
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_CODEGEN_BASICTTIIMPL_H
17#define LLVM_CODEGEN_BASICTTIIMPL_H
18
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/BitVector.h"
22#include "llvm/ADT/SmallPtrSet.h"
23#include "llvm/ADT/SmallVector.h"
24#include "llvm/Analysis/LoopInfo.h"
25#include "llvm/Analysis/TargetTransformInfo.h"
26#include "llvm/Analysis/TargetTransformInfoImpl.h"
27#include "llvm/CodeGen/ISDOpcodes.h"
28#include "llvm/CodeGen/TargetLowering.h"
29#include "llvm/CodeGen/TargetSubtargetInfo.h"
30#include "llvm/CodeGen/ValueTypes.h"
31#include "llvm/IR/BasicBlock.h"
32#include "llvm/IR/CallSite.h"
33#include "llvm/IR/Constant.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DataLayout.h"
36#include "llvm/IR/DerivedTypes.h"
37#include "llvm/IR/InstrTypes.h"
38#include "llvm/IR/Instruction.h"
39#include "llvm/IR/Instructions.h"
40#include "llvm/IR/Intrinsics.h"
41#include "llvm/IR/Operator.h"
42#include "llvm/IR/Type.h"
43#include "llvm/IR/Value.h"
44#include "llvm/MC/MCSchedule.h"
45#include "llvm/Support/Casting.h"
46#include "llvm/Support/CommandLine.h"
47#include "llvm/Support/ErrorHandling.h"
48#include "llvm/Support/MachineValueType.h"
49#include "llvm/Support/MathExtras.h"
50#include <algorithm>
51#include <cassert>
52#include <cstdint>
53#include <limits>
54#include <utility>
55
56namespace llvm {
57
58class Function;
59class GlobalValue;
60class LLVMContext;
61class ScalarEvolution;
62class SCEV;
63class TargetMachine;
64
65extern cl::opt<unsigned> PartialUnrollingThreshold;
66
67/// Base class which can be used to help build a TTI implementation.
68///
69/// This class provides as much implementation of the TTI interface as is
70/// possible using the target independent parts of the code generator.
71///
72/// In order to subclass it, your class must implement a getST() method to
73/// return the subtarget, and a getTLI() method to return the target lowering.
74/// We need these methods implemented in the derived class so that this class
75/// doesn't have to duplicate storage for them.
76template <typename T>
77class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
78private:
79 using BaseT = TargetTransformInfoImplCRTPBase<T>;
80 using TTI = TargetTransformInfo;
81
82 /// Estimate a cost of Broadcast as an extract and sequence of insert
83 /// operations.
84 unsigned getBroadcastShuffleOverhead(Type *Ty) {
85 assert(Ty->isVectorTy() && "Can only shuffle vectors")((Ty->isVectorTy() && "Can only shuffle vectors") ?
static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only shuffle vectors\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 85, __PRETTY_FUNCTION__))
;
86 unsigned Cost = 0;
87 // Broadcast cost is equal to the cost of extracting the zero'th element
88 // plus the cost of inserting it into every element of the result vector.
89 Cost += static_cast<T *>(this)->getVectorInstrCost(
90 Instruction::ExtractElement, Ty, 0);
91
92 for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
93 Cost += static_cast<T *>(this)->getVectorInstrCost(
94 Instruction::InsertElement, Ty, i);
95 }
96 return Cost;
97 }
98
99 /// Estimate a cost of shuffle as a sequence of extract and insert
100 /// operations.
101 unsigned getPermuteShuffleOverhead(Type *Ty) {
102 assert(Ty->isVectorTy() && "Can only shuffle vectors")((Ty->isVectorTy() && "Can only shuffle vectors") ?
static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only shuffle vectors\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 102, __PRETTY_FUNCTION__))
;
103 unsigned Cost = 0;
104 // Shuffle cost is equal to the cost of extracting element from its argument
105 // plus the cost of inserting them onto the result vector.
106
107 // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
108 // index 0 of first vector, index 1 of second vector,index 2 of first
109 // vector and finally index 3 of second vector and insert them at index
110 // <0,1,2,3> of result vector.
111 for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
112 Cost += static_cast<T *>(this)
113 ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
114 Cost += static_cast<T *>(this)
115 ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
116 }
117 return Cost;
118 }
119
120 /// Estimate a cost of subvector extraction as a sequence of extract and
121 /// insert operations.
122 unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
123 assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only extract subvectors from vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only extract subvectors from vectors\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 124, __PRETTY_FUNCTION__))
124 "Can only extract subvectors from vectors")((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only extract subvectors from vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only extract subvectors from vectors\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 124, __PRETTY_FUNCTION__))
;
125 int NumSubElts = SubTy->getVectorNumElements();
126 assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_ExtractSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_ExtractSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 127, __PRETTY_FUNCTION__))
127 "SK_ExtractSubvector index out of range")(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_ExtractSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_ExtractSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 127, __PRETTY_FUNCTION__))
;
128
129 unsigned Cost = 0;
130 // Subvector extraction cost is equal to the cost of extracting element from
131 // the source type plus the cost of inserting them into the result vector
132 // type.
133 for (int i = 0; i != NumSubElts; ++i) {
134 Cost += static_cast<T *>(this)->getVectorInstrCost(
135 Instruction::ExtractElement, Ty, i + Index);
136 Cost += static_cast<T *>(this)->getVectorInstrCost(
137 Instruction::InsertElement, SubTy, i);
138 }
139 return Cost;
140 }
141
142 /// Estimate a cost of subvector insertion as a sequence of extract and
143 /// insert operations.
144 unsigned getInsertSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
145 assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only insert subvectors into vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only insert subvectors into vectors\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 146, __PRETTY_FUNCTION__))
146 "Can only insert subvectors into vectors")((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only insert subvectors into vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only insert subvectors into vectors\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 146, __PRETTY_FUNCTION__))
;
147 int NumSubElts = SubTy->getVectorNumElements();
148 assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_InsertSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_InsertSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 149, __PRETTY_FUNCTION__))
149 "SK_InsertSubvector index out of range")(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_InsertSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_InsertSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 149, __PRETTY_FUNCTION__))
;
150
151 unsigned Cost = 0;
152 // Subvector insertion cost is equal to the cost of extracting element from
153 // the source type plus the cost of inserting them into the result vector
154 // type.
155 for (int i = 0; i != NumSubElts; ++i) {
156 Cost += static_cast<T *>(this)->getVectorInstrCost(
157 Instruction::ExtractElement, SubTy, i);
158 Cost += static_cast<T *>(this)->getVectorInstrCost(
159 Instruction::InsertElement, Ty, i + Index);
160 }
161 return Cost;
162 }
163
164 /// Local query method delegates up to T which *must* implement this!
165 const TargetSubtargetInfo *getST() const {
166 return static_cast<const T *>(this)->getST();
167 }
168
169 /// Local query method delegates up to T which *must* implement this!
170 const TargetLoweringBase *getTLI() const {
171 return static_cast<const T *>(this)->getTLI();
172 }
173
174 static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
175 switch (M) {
176 case TTI::MIM_Unindexed:
177 return ISD::UNINDEXED;
178 case TTI::MIM_PreInc:
179 return ISD::PRE_INC;
180 case TTI::MIM_PreDec:
181 return ISD::PRE_DEC;
182 case TTI::MIM_PostInc:
183 return ISD::POST_INC;
184 case TTI::MIM_PostDec:
185 return ISD::POST_DEC;
186 }
187 llvm_unreachable("Unexpected MemIndexedMode")::llvm::llvm_unreachable_internal("Unexpected MemIndexedMode"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 187)
;
188 }
189
190protected:
191 explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
192 : BaseT(DL) {}
193 virtual ~BasicTTIImplBase() = default;
194
195 using TargetTransformInfoImplBase::DL;
196
197public:
198 /// \name Scalar TTI Implementations
199 /// @{
200 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
201 unsigned AddressSpace, unsigned Alignment,
202 bool *Fast) const {
203 EVT E = EVT::getIntegerVT(Context, BitWidth);
204 return getTLI()->allowsMisalignedMemoryAccesses(
205 E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);
206 }
207
208 bool hasBranchDivergence() { return false; }
209
210 bool isSourceOfDivergence(const Value *V) { return false; }
211
212 bool isAlwaysUniform(const Value *V) { return false; }
213
214 unsigned getFlatAddressSpace() {
215 // Return an invalid address space.
216 return -1;
217 }
218
219 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
220 Intrinsic::ID IID) const {
221 return false;
222 }
223
224 bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
225 Value *OldV, Value *NewV) const {
226 return false;
227 }
228
229 bool isLegalAddImmediate(int64_t imm) {
230 return getTLI()->isLegalAddImmediate(imm);
231 }
232
233 bool isLegalICmpImmediate(int64_t imm) {
234 return getTLI()->isLegalICmpImmediate(imm);
235 }
236
237 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
238 bool HasBaseReg, int64_t Scale,
239 unsigned AddrSpace, Instruction *I = nullptr) {
240 TargetLoweringBase::AddrMode AM;
241 AM.BaseGV = BaseGV;
242 AM.BaseOffs = BaseOffset;
243 AM.HasBaseReg = HasBaseReg;
244 AM.Scale = Scale;
245 return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
246 }
247
248 bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty,
249 const DataLayout &DL) const {
250 EVT VT = getTLI()->getValueType(DL, Ty);
251 return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
252 }
253
254 bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty,
255 const DataLayout &DL) const {
256 EVT VT = getTLI()->getValueType(DL, Ty);
257 return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
258 }
259
260 bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
261 return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
262 }
263
264 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
265 bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
266 TargetLoweringBase::AddrMode AM;
267 AM.BaseGV = BaseGV;
268 AM.BaseOffs = BaseOffset;
269 AM.HasBaseReg = HasBaseReg;
270 AM.Scale = Scale;
271 return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
272 }
273
274 bool isTruncateFree(Type *Ty1, Type *Ty2) {
275 return getTLI()->isTruncateFree(Ty1, Ty2);
276 }
277
278 bool isProfitableToHoist(Instruction *I) {
279 return getTLI()->isProfitableToHoist(I);
280 }
281
282 bool useAA() const { return getST()->useAA(); }
283
284 bool isTypeLegal(Type *Ty) {
285 EVT VT = getTLI()->getValueType(DL, Ty);
286 return getTLI()->isTypeLegal(VT);
287 }
288
289 int getGEPCost(Type *PointeeType, const Value *Ptr,
290 ArrayRef<const Value *> Operands) {
291 return BaseT::getGEPCost(PointeeType, Ptr, Operands);
292 }
293
294 int getExtCost(const Instruction *I, const Value *Src) {
295 if (getTLI()->isExtFree(I))
296 return TargetTransformInfo::TCC_Free;
297
298 if (isa<ZExtInst>(I) || isa<SExtInst>(I))
299 if (const LoadInst *LI = dyn_cast<LoadInst>(Src))
300 if (getTLI()->isExtLoad(LI, I, DL))
301 return TargetTransformInfo::TCC_Free;
302
303 return TargetTransformInfo::TCC_Basic;
304 }
305
306 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
307 ArrayRef<const Value *> Arguments, const User *U) {
308 return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U);
309 }
310
311 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
312 ArrayRef<Type *> ParamTys, const User *U) {
313 if (IID == Intrinsic::cttz) {
314 if (getTLI()->isCheapToSpeculateCttz())
315 return TargetTransformInfo::TCC_Basic;
316 return TargetTransformInfo::TCC_Expensive;
317 }
318
319 if (IID == Intrinsic::ctlz) {
320 if (getTLI()->isCheapToSpeculateCtlz())
321 return TargetTransformInfo::TCC_Basic;
322 return TargetTransformInfo::TCC_Expensive;
323 }
324
325 return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U);
326 }
327
328 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
329 unsigned &JumpTableSize,
330 ProfileSummaryInfo *PSI,
331 BlockFrequencyInfo *BFI) {
332 /// Try to find the estimated number of clusters. Note that the number of
333 /// clusters identified in this function could be different from the actual
334 /// numbers found in lowering. This function ignore switches that are
335 /// lowered with a mix of jump table / bit test / BTree. This function was
336 /// initially intended to be used when estimating the cost of switch in
337 /// inline cost heuristic, but it's a generic cost model to be used in other
338 /// places (e.g., in loop unrolling).
339 unsigned N = SI.getNumCases();
340 const TargetLoweringBase *TLI = getTLI();
341 const DataLayout &DL = this->getDataLayout();
342
343 JumpTableSize = 0;
344 bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
345
346 // Early exit if both a jump table and bit test are not allowed.
347 if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N))
348 return N;
349
350 APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
351 APInt MinCaseVal = MaxCaseVal;
352 for (auto CI : SI.cases()) {
353 const APInt &CaseVal = CI.getCaseValue()->getValue();
354 if (CaseVal.sgt(MaxCaseVal))
355 MaxCaseVal = CaseVal;
356 if (CaseVal.slt(MinCaseVal))
357 MinCaseVal = CaseVal;
358 }
359
360 // Check if suitable for a bit test
361 if (N <= DL.getIndexSizeInBits(0u)) {
362 SmallPtrSet<const BasicBlock *, 4> Dests;
363 for (auto I : SI.cases())
364 Dests.insert(I.getCaseSuccessor());
365
366 if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
367 DL))
368 return 1;
369 }
370
371 // Check if suitable for a jump table.
372 if (IsJTAllowed) {
373 if (N < 2 || N < TLI->getMinimumJumpTableEntries())
374 return N;
375 uint64_t Range =
376 (MaxCaseVal - MinCaseVal)
377 .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
378 // Check whether a range of clusters is dense enough for a jump table
379 if (TLI->isSuitableForJumpTable(&SI, N, Range, PSI, BFI)) {
380 JumpTableSize = Range;
381 return 1;
382 }
383 }
384 return N;
385 }
386
387 bool shouldBuildLookupTables() {
388 const TargetLoweringBase *TLI = getTLI();
389 return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
390 TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
391 }
392
393 bool haveFastSqrt(Type *Ty) {
394 const TargetLoweringBase *TLI = getTLI();
395 EVT VT = TLI->getValueType(DL, Ty);
396 return TLI->isTypeLegal(VT) &&
397 TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
398 }
399
400 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) {
401 return true;
402 }
403
404 unsigned getFPOpCost(Type *Ty) {
405 // Check whether FADD is available, as a proxy for floating-point in
406 // general.
407 const TargetLoweringBase *TLI = getTLI();
408 EVT VT = TLI->getValueType(DL, Ty);
409 if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT))
410 return TargetTransformInfo::TCC_Basic;
411 return TargetTransformInfo::TCC_Expensive;
412 }
413
414 unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
415 const TargetLoweringBase *TLI = getTLI();
416 switch (Opcode) {
43
Control jumps to the 'default' case at line 417
417 default: break;
44
Execution continues on line 434
418 case Instruction::Trunc:
419 if (TLI->isTruncateFree(OpTy, Ty))
420 return TargetTransformInfo::TCC_Free;
421 return TargetTransformInfo::TCC_Basic;
422 case Instruction::ZExt:
423 if (TLI->isZExtFree(OpTy, Ty))
424 return TargetTransformInfo::TCC_Free;
425 return TargetTransformInfo::TCC_Basic;
426
427 case Instruction::AddrSpaceCast:
428 if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(),
429 Ty->getPointerAddressSpace()))
430 return TargetTransformInfo::TCC_Free;
431 return TargetTransformInfo::TCC_Basic;
432 }
433
434 return BaseT::getOperationCost(Opcode, Ty, OpTy);
45
Passing null pointer value via 3rd parameter 'OpTy'
46
Calling 'TargetTransformInfoImplBase::getOperationCost'
435 }
436
437 unsigned getInliningThresholdMultiplier() { return 1; }
438
439 int getInlinerVectorBonusPercent() { return 150; }
440
441 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
442 TTI::UnrollingPreferences &UP) {
443 // This unrolling functionality is target independent, but to provide some
444 // motivation for its intended use, for x86:
445
446 // According to the Intel 64 and IA-32 Architectures Optimization Reference
447 // Manual, Intel Core models and later have a loop stream detector (and
448 // associated uop queue) that can benefit from partial unrolling.
449 // The relevant requirements are:
450 // - The loop must have no more than 4 (8 for Nehalem and later) branches
451 // taken, and none of them may be calls.
452 // - The loop can have no more than 18 (28 for Nehalem and later) uops.
453
454 // According to the Software Optimization Guide for AMD Family 15h
455 // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
456 // and loop buffer which can benefit from partial unrolling.
457 // The relevant requirements are:
458 // - The loop must have fewer than 16 branches
459 // - The loop must have less than 40 uops in all executed loop branches
460
461 // The number of taken branches in a loop is hard to estimate here, and
462 // benchmarking has revealed that it is better not to be conservative when
463 // estimating the branch count. As a result, we'll ignore the branch limits
464 // until someone finds a case where it matters in practice.
465
466 unsigned MaxOps;
467 const TargetSubtargetInfo *ST = getST();
468 if (PartialUnrollingThreshold.getNumOccurrences() > 0)
469 MaxOps = PartialUnrollingThreshold;
470 else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
471 MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
472 else
473 return;
474
475 // Scan the loop: don't unroll loops with calls.
476 for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E;
477 ++I) {
478 BasicBlock *BB = *I;
479
480 for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
481 if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
482 ImmutableCallSite CS(&*J);
483 if (const Function *F = CS.getCalledFunction()) {
484 if (!static_cast<T *>(this)->isLoweredToCall(F))
485 continue;
486 }
487
488 return;
489 }
490 }
491
492 // Enable runtime and partial unrolling up to the specified size.
493 // Enable using trip count upper bound to unroll loops.
494 UP.Partial = UP.Runtime = UP.UpperBound = true;
495 UP.PartialThreshold = MaxOps;
496
497 // Avoid unrolling when optimizing for size.
498 UP.OptSizeThreshold = 0;
499 UP.PartialOptSizeThreshold = 0;
500
501 // Set number of instructions optimized when "back edge"
502 // becomes "fall through" to default value of 2.
503 UP.BEInsns = 2;
504 }
505
506 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
507 AssumptionCache &AC,
508 TargetLibraryInfo *LibInfo,
509 HardwareLoopInfo &HWLoopInfo) {
510 return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
511 }
512
513 bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
514 AssumptionCache &AC, TargetLibraryInfo *TLI,
515 DominatorTree *DT,
516 const LoopAccessInfo *LAI) {
517 return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
518 }
519
520 int getInstructionLatency(const Instruction *I) {
521 if (isa<LoadInst>(I))
522 return getST()->getSchedModel().DefaultLoadLatency;
523
524 return BaseT::getInstructionLatency(I);
525 }
526
527 virtual Optional<unsigned>
528 getCacheSize(TargetTransformInfo::CacheLevel Level) const {
529 return Optional<unsigned>(
530 getST()->getCacheSize(static_cast<unsigned>(Level)));
531 }
532
533 virtual Optional<unsigned>
534 getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const {
535 Optional<unsigned> TargetResult =
536 getST()->getCacheAssociativity(static_cast<unsigned>(Level));
537
538 if (TargetResult)
539 return TargetResult;
540
541 return BaseT::getCacheAssociativity(Level);
542 }
543
544 virtual unsigned getCacheLineSize() const {
545 return getST()->getCacheLineSize();
546 }
547
548 virtual unsigned getPrefetchDistance() const {
549 return getST()->getPrefetchDistance();
550 }
551
552 virtual unsigned getMinPrefetchStride() const {
553 return getST()->getMinPrefetchStride();
554 }
555
556 virtual unsigned getMaxPrefetchIterationsAhead() const {
557 return getST()->getMaxPrefetchIterationsAhead();
558 }
559
560 /// @}
561
562 /// \name Vector TTI Implementations
563 /// @{
564
565 unsigned getRegisterBitWidth(bool Vector) const { return 32; }
566
567 /// Estimate the overhead of scalarizing an instruction. Insert and Extract
568 /// are set if the result needs to be inserted and/or extracted from vectors.
569 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
570 assert(Ty->isVectorTy() && "Can only scalarize vectors")((Ty->isVectorTy() && "Can only scalarize vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only scalarize vectors\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 570, __PRETTY_FUNCTION__))
;
571 unsigned Cost = 0;
572
573 for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
574 if (Insert)
575 Cost += static_cast<T *>(this)
576 ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
577 if (Extract)
578 Cost += static_cast<T *>(this)
579 ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
580 }
581
582 return Cost;
583 }
584
585 /// Estimate the overhead of scalarizing an instructions unique
586 /// non-constant operands. The types of the arguments are ordinarily
587 /// scalar, in which case the costs are multiplied with VF.
588 unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
589 unsigned VF) {
590 unsigned Cost = 0;
591 SmallPtrSet<const Value*, 4> UniqueOperands;
592 for (const Value *A : Args) {
593 if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
594 Type *VecTy = nullptr;
595 if (A->getType()->isVectorTy()) {
596 VecTy = A->getType();
597 // If A is a vector operand, VF should be 1 or correspond to A.
598 assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&(((VF == 1 || VF == VecTy->getVectorNumElements()) &&
"Vector argument does not match VF") ? static_cast<void>
(0) : __assert_fail ("(VF == 1 || VF == VecTy->getVectorNumElements()) && \"Vector argument does not match VF\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 599, __PRETTY_FUNCTION__))
599 "Vector argument does not match VF")(((VF == 1 || VF == VecTy->getVectorNumElements()) &&
"Vector argument does not match VF") ? static_cast<void>
(0) : __assert_fail ("(VF == 1 || VF == VecTy->getVectorNumElements()) && \"Vector argument does not match VF\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 599, __PRETTY_FUNCTION__))
;
600 }
601 else
602 VecTy = VectorType::get(A->getType(), VF);
603
604 Cost += getScalarizationOverhead(VecTy, false, true);
605 }
606 }
607
608 return Cost;
609 }
610
611 unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) {
612 assert(VecTy->isVectorTy())((VecTy->isVectorTy()) ? static_cast<void> (0) : __assert_fail
("VecTy->isVectorTy()", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 612, __PRETTY_FUNCTION__))
;
613
614 unsigned Cost = 0;
615
616 Cost += getScalarizationOverhead(VecTy, true, false);
617 if (!Args.empty())
618 Cost += getOperandsScalarizationOverhead(Args,
619 VecTy->getVectorNumElements());
620 else
621 // When no information on arguments is provided, we add the cost
622 // associated with one argument as a heuristic.
623 Cost += getScalarizationOverhead(VecTy, false, true);
624
625 return Cost;
626 }
627
628 unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
629
630 unsigned getArithmeticInstrCost(
631 unsigned Opcode, Type *Ty,
632 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
633 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
634 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
635 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
636 ArrayRef<const Value *> Args = ArrayRef<const Value *>()) {
637 // Check if any of the operands are vector operands.
638 const TargetLoweringBase *TLI = getTLI();
639 int ISD = TLI->InstructionOpcodeToISD(Opcode);
640 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 640, __PRETTY_FUNCTION__))
;
641
642 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
643
644 bool IsFloat = Ty->isFPOrFPVectorTy();
645 // Assume that floating point arithmetic operations cost twice as much as
646 // integer operations.
647 unsigned OpCost = (IsFloat ? 2 : 1);
648
649 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
650 // The operation is legal. Assume it costs 1.
651 // TODO: Once we have extract/insert subvector cost we need to use them.
652 return LT.first * OpCost;
653 }
654
655 if (!TLI->isOperationExpand(ISD, LT.second)) {
656 // If the operation is custom lowered, then assume that the code is twice
657 // as expensive.
658 return LT.first * 2 * OpCost;
659 }
660
661 // Else, assume that we need to scalarize this op.
662 // TODO: If one of the types get legalized by splitting, handle this
663 // similarly to what getCastInstrCost() does.
664 if (Ty->isVectorTy()) {
665 unsigned Num = Ty->getVectorNumElements();
666 unsigned Cost = static_cast<T *>(this)
667 ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
668 // Return the cost of multiple scalar invocation plus the cost of
669 // inserting and extracting the values.
670 return getScalarizationOverhead(Ty, Args) + Num * Cost;
671 }
672
673 // We don't know anything about this scalar instruction.
674 return OpCost;
675 }
676
677 unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
678 Type *SubTp) {
679 switch (Kind) {
680 case TTI::SK_Broadcast:
681 return getBroadcastShuffleOverhead(Tp);
682 case TTI::SK_Select:
683 case TTI::SK_Reverse:
684 case TTI::SK_Transpose:
685 case TTI::SK_PermuteSingleSrc:
686 case TTI::SK_PermuteTwoSrc:
687 return getPermuteShuffleOverhead(Tp);
688 case TTI::SK_ExtractSubvector:
689 return getExtractSubvectorOverhead(Tp, Index, SubTp);
690 case TTI::SK_InsertSubvector:
691 return getInsertSubvectorOverhead(Tp, Index, SubTp);
692 }
693 llvm_unreachable("Unknown TTI::ShuffleKind")::llvm::llvm_unreachable_internal("Unknown TTI::ShuffleKind",
"/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 693)
;
694 }
695
696 unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
697 const Instruction *I = nullptr) {
698 const TargetLoweringBase *TLI = getTLI();
699 int ISD = TLI->InstructionOpcodeToISD(Opcode);
700 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 700, __PRETTY_FUNCTION__))
;
701 std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
702 std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
703
704 // Check for NOOP conversions.
705 if (SrcLT.first == DstLT.first &&
706 SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
707
708 // Bitcast between types that are legalized to the same type are free.
709 if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
710 return 0;
711 }
712
713 if (Opcode == Instruction::Trunc &&
714 TLI->isTruncateFree(SrcLT.second, DstLT.second))
715 return 0;
716
717 if (Opcode == Instruction::ZExt &&
718 TLI->isZExtFree(SrcLT.second, DstLT.second))
719 return 0;
720
721 if (Opcode == Instruction::AddrSpaceCast &&
722 TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(),
723 Dst->getPointerAddressSpace()))
724 return 0;
725
726 // If this is a zext/sext of a load, return 0 if the corresponding
727 // extending load exists on target.
728 if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
729 I && isa<LoadInst>(I->getOperand(0))) {
730 EVT ExtVT = EVT::getEVT(Dst);
731 EVT LoadVT = EVT::getEVT(Src);
732 unsigned LType =
733 ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
734 if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
735 return 0;
736 }
737
738 // If the cast is marked as legal (or promote) then assume low cost.
739 if (SrcLT.first == DstLT.first &&
740 TLI->isOperationLegalOrPromote(ISD, DstLT.second))
741 return 1;
742
743 // Handle scalar conversions.
744 if (!Src->isVectorTy() && !Dst->isVectorTy()) {
745 // Scalar bitcasts are usually free.
746 if (Opcode == Instruction::BitCast)
747 return 0;
748
749 // Just check the op cost. If the operation is legal then assume it costs
750 // 1.
751 if (!TLI->isOperationExpand(ISD, DstLT.second))
752 return 1;
753
754 // Assume that illegal scalar instruction are expensive.
755 return 4;
756 }
757
758 // Check vector-to-vector casts.
759 if (Dst->isVectorTy() && Src->isVectorTy()) {
760 // If the cast is between same-sized registers, then the check is simple.
761 if (SrcLT.first == DstLT.first &&
762 SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
763
764 // Assume that Zext is done using AND.
765 if (Opcode == Instruction::ZExt)
766 return 1;
767
768 // Assume that sext is done using SHL and SRA.
769 if (Opcode == Instruction::SExt)
770 return 2;
771
772 // Just check the op cost. If the operation is legal then assume it
773 // costs
774 // 1 and multiply by the type-legalization overhead.
775 if (!TLI->isOperationExpand(ISD, DstLT.second))
776 return SrcLT.first * 1;
777 }
778
779 // If we are legalizing by splitting, query the concrete TTI for the cost
780 // of casting the original vector twice. We also need to factor in the
781 // cost of the split itself. Count that as 1, to be consistent with
782 // TLI->getTypeLegalizationCost().
783 if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
784 TargetLowering::TypeSplitVector) ||
785 (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
786 TargetLowering::TypeSplitVector)) {
787 Type *SplitDst = VectorType::get(Dst->getVectorElementType(),
788 Dst->getVectorNumElements() / 2);
789 Type *SplitSrc = VectorType::get(Src->getVectorElementType(),
790 Src->getVectorNumElements() / 2);
791 T *TTI = static_cast<T *>(this);
792 return TTI->getVectorSplitCost() +
793 (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I));
794 }
795
796 // In other cases where the source or destination are illegal, assume
797 // the operation will get scalarized.
798 unsigned Num = Dst->getVectorNumElements();
799 unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
800 Opcode, Dst->getScalarType(), Src->getScalarType(), I);
801
802 // Return the cost of multiple scalar invocation plus the cost of
803 // inserting and extracting the values.
804 return getScalarizationOverhead(Dst, true, true) + Num * Cost;
805 }
806
807 // We already handled vector-to-vector and scalar-to-scalar conversions.
808 // This
809 // is where we handle bitcast between vectors and scalars. We need to assume
810 // that the conversion is scalarized in one way or another.
811 if (Opcode == Instruction::BitCast)
812 // Illegal bitcasts are done by storing and loading from a stack slot.
813 return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true)
814 : 0) +
815 (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false)
816 : 0);
817
818 llvm_unreachable("Unhandled cast")::llvm::llvm_unreachable_internal("Unhandled cast", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 818)
;
819 }
820
821 unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
822 VectorType *VecTy, unsigned Index) {
823 return static_cast<T *>(this)->getVectorInstrCost(
824 Instruction::ExtractElement, VecTy, Index) +
825 static_cast<T *>(this)->getCastInstrCost(Opcode, Dst,
826 VecTy->getElementType());
827 }
828
829 unsigned getCFInstrCost(unsigned Opcode) {
830 // Branches are assumed to be predicted.
831 return 0;
832 }
833
834 unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
835 const Instruction *I) {
836 const TargetLoweringBase *TLI = getTLI();
837 int ISD = TLI->InstructionOpcodeToISD(Opcode);
838 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 838, __PRETTY_FUNCTION__))
;
839
840 // Selects on vectors are actually vector selects.
841 if (ISD == ISD::SELECT) {
842 assert(CondTy && "CondTy must exist")((CondTy && "CondTy must exist") ? static_cast<void
> (0) : __assert_fail ("CondTy && \"CondTy must exist\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 842, __PRETTY_FUNCTION__))
;
843 if (CondTy->isVectorTy())
844 ISD = ISD::VSELECT;
845 }
846 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
847
848 if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
849 !TLI->isOperationExpand(ISD, LT.second)) {
850 // The operation is legal. Assume it costs 1. Multiply
851 // by the type-legalization overhead.
852 return LT.first * 1;
853 }
854
855 // Otherwise, assume that the cast is scalarized.
856 // TODO: If one of the types get legalized by splitting, handle this
857 // similarly to what getCastInstrCost() does.
858 if (ValTy->isVectorTy()) {
859 unsigned Num = ValTy->getVectorNumElements();
860 if (CondTy)
861 CondTy = CondTy->getScalarType();
862 unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
863 Opcode, ValTy->getScalarType(), CondTy, I);
864
865 // Return the cost of multiple scalar invocation plus the cost of
866 // inserting and extracting the values.
867 return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
868 }
869
870 // Unknown scalar opcode.
871 return 1;
872 }
873
874 unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
875 std::pair<unsigned, MVT> LT =
876 getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
877
878 return LT.first;
879 }
880
881 unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
882 unsigned AddressSpace,
883 const Instruction *I = nullptr) {
884 assert(!Src->isVoidTy() && "Invalid type")((!Src->isVoidTy() && "Invalid type") ? static_cast
<void> (0) : __assert_fail ("!Src->isVoidTy() && \"Invalid type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 884, __PRETTY_FUNCTION__))
;
885 std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
886
887 // Assuming that all loads of legal types cost 1.
888 unsigned Cost = LT.first;
889
890 if (Src->isVectorTy() &&
891 Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) {
892 // This is a vector load that legalizes to a larger type than the vector
893 // itself. Unless the corresponding extending load or truncating store is
894 // legal, then this will scalarize.
895 TargetLowering::LegalizeAction LA = TargetLowering::Expand;
896 EVT MemVT = getTLI()->getValueType(DL, Src);
897 if (Opcode == Instruction::Store)
898 LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
899 else
900 LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
901
902 if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
903 // This is a vector load/store for some illegal type that is scalarized.
904 // We must account for the cost of building or decomposing the vector.
905 Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
906 Opcode == Instruction::Store);
907 }
908 }
909
910 return Cost;
911 }
912
913 unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
914 unsigned Factor,
915 ArrayRef<unsigned> Indices,
916 unsigned Alignment, unsigned AddressSpace,
917 bool UseMaskForCond = false,
918 bool UseMaskForGaps = false) {
919 VectorType *VT = dyn_cast<VectorType>(VecTy);
920 assert(VT && "Expect a vector type for interleaved memory op")((VT && "Expect a vector type for interleaved memory op"
) ? static_cast<void> (0) : __assert_fail ("VT && \"Expect a vector type for interleaved memory op\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 920, __PRETTY_FUNCTION__))
;
921
922 unsigned NumElts = VT->getNumElements();
923 assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor")((Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor"
) ? static_cast<void> (0) : __assert_fail ("Factor > 1 && NumElts % Factor == 0 && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 923, __PRETTY_FUNCTION__))
;
924
925 unsigned NumSubElts = NumElts / Factor;
926 VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
927
928 // Firstly, the cost of load/store operation.
929 unsigned Cost;
930 if (UseMaskForCond || UseMaskForGaps)
931 Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
932 Opcode, VecTy, Alignment, AddressSpace);
933 else
934 Cost = static_cast<T *>(this)->getMemoryOpCost(
935 Opcode, VecTy, MaybeAlign(Alignment), AddressSpace);
936
937 // Legalize the vector type, and get the legalized and unlegalized type
938 // sizes.
939 MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
940 unsigned VecTySize =
941 static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy);
942 unsigned VecTyLTSize = VecTyLT.getStoreSize();
943
944 // Return the ceiling of dividing A by B.
945 auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
946
947 // Scale the cost of the memory operation by the fraction of legalized
948 // instructions that will actually be used. We shouldn't account for the
949 // cost of dead instructions since they will be removed.
950 //
951 // E.g., An interleaved load of factor 8:
952 // %vec = load <16 x i64>, <16 x i64>* %ptr
953 // %v0 = shufflevector %vec, undef, <0, 8>
954 //
955 // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
956 // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
957 // type). The other loads are unused.
958 //
959 // We only scale the cost of loads since interleaved store groups aren't
960 // allowed to have gaps.
961 if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) {
962 // The number of loads of a legal type it will take to represent a load
963 // of the unlegalized vector type.
964 unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize);
965
966 // The number of elements of the unlegalized type that correspond to a
967 // single legal instruction.
968 unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts);
969
970 // Determine which legal instructions will be used.
971 BitVector UsedInsts(NumLegalInsts, false);
972 for (unsigned Index : Indices)
973 for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
974 UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
975
976 // Scale the cost of the load by the fraction of legal instructions that
977 // will be used.
978 Cost *= UsedInsts.count() / NumLegalInsts;
979 }
980
981 // Then plus the cost of interleave operation.
982 if (Opcode == Instruction::Load) {
983 // The interleave cost is similar to extract sub vectors' elements
984 // from the wide vector, and insert them into sub vectors.
985 //
986 // E.g. An interleaved load of factor 2 (with one member of index 0):
987 // %vec = load <8 x i32>, <8 x i32>* %ptr
988 // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
989 // The cost is estimated as extract elements at 0, 2, 4, 6 from the
990 // <8 x i32> vector and insert them into a <4 x i32> vector.
991
992 assert(Indices.size() <= Factor &&((Indices.size() <= Factor && "Interleaved memory op has too many members"
) ? static_cast<void> (0) : __assert_fail ("Indices.size() <= Factor && \"Interleaved memory op has too many members\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 993, __PRETTY_FUNCTION__))
993 "Interleaved memory op has too many members")((Indices.size() <= Factor && "Interleaved memory op has too many members"
) ? static_cast<void> (0) : __assert_fail ("Indices.size() <= Factor && \"Interleaved memory op has too many members\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 993, __PRETTY_FUNCTION__))
;
994
995 for (unsigned Index : Indices) {
996 assert(Index < Factor && "Invalid index for interleaved memory op")((Index < Factor && "Invalid index for interleaved memory op"
) ? static_cast<void> (0) : __assert_fail ("Index < Factor && \"Invalid index for interleaved memory op\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 996, __PRETTY_FUNCTION__))
;
997
998 // Extract elements from loaded vector for each sub vector.
999 for (unsigned i = 0; i < NumSubElts; i++)
1000 Cost += static_cast<T *>(this)->getVectorInstrCost(
1001 Instruction::ExtractElement, VT, Index + i * Factor);
1002 }
1003
1004 unsigned InsSubCost = 0;
1005 for (unsigned i = 0; i < NumSubElts; i++)
1006 InsSubCost += static_cast<T *>(this)->getVectorInstrCost(
1007 Instruction::InsertElement, SubVT, i);
1008
1009 Cost += Indices.size() * InsSubCost;
1010 } else {
1011 // The interleave cost is extract all elements from sub vectors, and
1012 // insert them into the wide vector.
1013 //
1014 // E.g. An interleaved store of factor 2:
1015 // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
1016 // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
1017 // The cost is estimated as extract all elements from both <4 x i32>
1018 // vectors and insert into the <8 x i32> vector.
1019
1020 unsigned ExtSubCost = 0;
1021 for (unsigned i = 0; i < NumSubElts; i++)
1022 ExtSubCost += static_cast<T *>(this)->getVectorInstrCost(
1023 Instruction::ExtractElement, SubVT, i);
1024 Cost += ExtSubCost * Factor;
1025
1026 for (unsigned i = 0; i < NumElts; i++)
1027 Cost += static_cast<T *>(this)
1028 ->getVectorInstrCost(Instruction::InsertElement, VT, i);
1029 }
1030
1031 if (!UseMaskForCond)
1032 return Cost;
1033
1034 Type *I8Type = Type::getInt8Ty(VT->getContext());
1035 VectorType *MaskVT = VectorType::get(I8Type, NumElts);
1036 SubVT = VectorType::get(I8Type, NumSubElts);
1037
1038 // The Mask shuffling cost is extract all the elements of the Mask
1039 // and insert each of them Factor times into the wide vector:
1040 //
1041 // E.g. an interleaved group with factor 3:
1042 // %mask = icmp ult <8 x i32> %vec1, %vec2
1043 // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
1044 // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
1045 // The cost is estimated as extract all mask elements from the <8xi1> mask
1046 // vector and insert them factor times into the <24xi1> shuffled mask
1047 // vector.
1048 for (unsigned i = 0; i < NumSubElts; i++)
1049 Cost += static_cast<T *>(this)->getVectorInstrCost(
1050 Instruction::ExtractElement, SubVT, i);
1051
1052 for (unsigned i = 0; i < NumElts; i++)
1053 Cost += static_cast<T *>(this)->getVectorInstrCost(
1054 Instruction::InsertElement, MaskVT, i);
1055
1056 // The Gaps mask is invariant and created outside the loop, therefore the
1057 // cost of creating it is not accounted for here. However if we have both
1058 // a MaskForGaps and some other mask that guards the execution of the
1059 // memory access, we need to account for the cost of And-ing the two masks
1060 // inside the loop.
1061 if (UseMaskForGaps)
1062 Cost += static_cast<T *>(this)->getArithmeticInstrCost(
1063 BinaryOperator::And, MaskVT);
1064
1065 return Cost;
1066 }
1067
1068 /// Get intrinsic cost based on arguments.
1069 unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
1070 ArrayRef<Value *> Args, FastMathFlags FMF,
1071 unsigned VF = 1) {
1072 unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
1073 assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type")(((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type"
) ? static_cast<void> (0) : __assert_fail ("(RetVF == 1 || VF == 1) && \"VF > 1 and RetVF is a vector type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1073, __PRETTY_FUNCTION__))
;
1074 auto *ConcreteTTI = static_cast<T *>(this);
1075
1076 switch (IID) {
1077 default: {
1078 // Assume that we need to scalarize this intrinsic.
1079 SmallVector<Type *, 4> Types;
1080 for (Value *Op : Args) {
1081 Type *OpTy = Op->getType();
1082 assert(VF == 1 || !OpTy->isVectorTy())((VF == 1 || !OpTy->isVectorTy()) ? static_cast<void>
(0) : __assert_fail ("VF == 1 || !OpTy->isVectorTy()", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1082, __PRETTY_FUNCTION__))
;
1083 Types.push_back(VF == 1 ? OpTy : VectorType::get(OpTy, VF));
1084 }
1085
1086 if (VF > 1 && !RetTy->isVoidTy())
1087 RetTy = VectorType::get(RetTy, VF);
1088
1089 // Compute the scalarization overhead based on Args for a vector
1090 // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
1091 // CostModel will pass a vector RetTy and VF is 1.
1092 unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
1093 if (RetVF > 1 || VF > 1) {
1094 ScalarizationCost = 0;
1095 if (!RetTy->isVoidTy())
1096 ScalarizationCost += getScalarizationOverhead(RetTy, true, false);
1097 ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
1098 }
1099
1100 return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF,
1101 ScalarizationCost);
1102 }
1103 case Intrinsic::masked_scatter: {
1104 assert(VF == 1 && "Can't vectorize types here.")((VF == 1 && "Can't vectorize types here.") ? static_cast
<void> (0) : __assert_fail ("VF == 1 && \"Can't vectorize types here.\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1104, __PRETTY_FUNCTION__))
;
1105 Value *Mask = Args[3];
1106 bool VarMask = !isa<Constant>(Mask);
1107 unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
1108 return ConcreteTTI->getGatherScatterOpCost(
1109 Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment);
1110 }
1111 case Intrinsic::masked_gather: {
1112 assert(VF == 1 && "Can't vectorize types here.")((VF == 1 && "Can't vectorize types here.") ? static_cast
<void> (0) : __assert_fail ("VF == 1 && \"Can't vectorize types here.\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1112, __PRETTY_FUNCTION__))
;
1113 Value *Mask = Args[2];
1114 bool VarMask = !isa<Constant>(Mask);
1115 unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
1116 return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy,
1117 Args[0], VarMask, Alignment);
1118 }
1119 case Intrinsic::experimental_vector_reduce_add:
1120 case Intrinsic::experimental_vector_reduce_mul:
1121 case Intrinsic::experimental_vector_reduce_and:
1122 case Intrinsic::experimental_vector_reduce_or:
1123 case Intrinsic::experimental_vector_reduce_xor:
1124 case Intrinsic::experimental_vector_reduce_v2_fadd:
1125 case Intrinsic::experimental_vector_reduce_v2_fmul:
1126 case Intrinsic::experimental_vector_reduce_smax:
1127 case Intrinsic::experimental_vector_reduce_smin:
1128 case Intrinsic::experimental_vector_reduce_fmax:
1129 case Intrinsic::experimental_vector_reduce_fmin:
1130 case Intrinsic::experimental_vector_reduce_umax:
1131 case Intrinsic::experimental_vector_reduce_umin:
1132 return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF);
1133 case Intrinsic::fshl:
1134 case Intrinsic::fshr: {
1135 Value *X = Args[0];
1136 Value *Y = Args[1];
1137 Value *Z = Args[2];
1138 TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW;
1139 TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX);
1140 TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY);
1141 TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ);
1142 TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue;
1143 OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
1144 : TTI::OP_None;
1145 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
1146 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
1147 unsigned Cost = 0;
1148 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy);
1149 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy);
1150 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy,
1151 OpKindX, OpKindZ, OpPropsX);
1152 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
1153 OpKindY, OpKindZ, OpPropsY);
1154 // Non-constant shift amounts requires a modulo.
1155 if (OpKindZ != TTI::OK_UniformConstantValue &&
1156 OpKindZ != TTI::OK_NonUniformConstantValue)
1157 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1158 OpKindZ, OpKindBW, OpPropsZ,
1159 OpPropsBW);
1160 // For non-rotates (X != Y) we must add shift-by-zero handling costs.
1161 if (X != Y) {
1162 Type *CondTy = RetTy->getWithNewBitWidth(1);
1163 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1164 CondTy, nullptr);
1165 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1166 CondTy, nullptr);
1167 }
1168 return Cost;
1169 }
1170 }
1171 }
1172
1173 /// Get intrinsic cost based on argument types.
1174 /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the
1175 /// cost of scalarizing the arguments and the return value will be computed
1176 /// based on types.
1177 unsigned getIntrinsicInstrCost(
1178 Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
1179 unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
1180 auto *ConcreteTTI = static_cast<T *>(this);
1181
1182 SmallVector<unsigned, 2> ISDs;
1183 unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
1184 switch (IID) {
1185 default: {
1186 // Assume that we need to scalarize this intrinsic.
1187 unsigned ScalarizationCost = ScalarizationCostPassed;
1188 unsigned ScalarCalls = 1;
1189 Type *ScalarRetTy = RetTy;
1190 if (RetTy->isVectorTy()) {
1191 if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1192 ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
1193 ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
1194 ScalarRetTy = RetTy->getScalarType();
1195 }
1196 SmallVector<Type *, 4> ScalarTys;
1197 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1198 Type *Ty = Tys[i];
1199 if (Ty->isVectorTy()) {
1200 if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1201 ScalarizationCost += getScalarizationOverhead(Ty, false, true);
1202 ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
1203 Ty = Ty->getScalarType();
1204 }
1205 ScalarTys.push_back(Ty);
1206 }
1207 if (ScalarCalls == 1)
1208 return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
1209
1210 unsigned ScalarCost =
1211 ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF);
1212
1213 return ScalarCalls * ScalarCost + ScalarizationCost;
1214 }
1215 // Look for intrinsics that can be lowered directly or turned into a scalar
1216 // intrinsic call.
1217 case Intrinsic::sqrt:
1218 ISDs.push_back(ISD::FSQRT);
1219 break;
1220 case Intrinsic::sin:
1221 ISDs.push_back(ISD::FSIN);
1222 break;
1223 case Intrinsic::cos:
1224 ISDs.push_back(ISD::FCOS);
1225 break;
1226 case Intrinsic::exp:
1227 ISDs.push_back(ISD::FEXP);
1228 break;
1229 case Intrinsic::exp2:
1230 ISDs.push_back(ISD::FEXP2);
1231 break;
1232 case Intrinsic::log:
1233 ISDs.push_back(ISD::FLOG);
1234 break;
1235 case Intrinsic::log10:
1236 ISDs.push_back(ISD::FLOG10);
1237 break;
1238 case Intrinsic::log2:
1239 ISDs.push_back(ISD::FLOG2);
1240 break;
1241 case Intrinsic::fabs:
1242 ISDs.push_back(ISD::FABS);
1243 break;
1244 case Intrinsic::canonicalize:
1245 ISDs.push_back(ISD::FCANONICALIZE);
1246 break;
1247 case Intrinsic::minnum:
1248 ISDs.push_back(ISD::FMINNUM);
1249 if (FMF.noNaNs())
1250 ISDs.push_back(ISD::FMINIMUM);
1251 break;
1252 case Intrinsic::maxnum:
1253 ISDs.push_back(ISD::FMAXNUM);
1254 if (FMF.noNaNs())
1255 ISDs.push_back(ISD::FMAXIMUM);
1256 break;
1257 case Intrinsic::copysign:
1258 ISDs.push_back(ISD::FCOPYSIGN);
1259 break;
1260 case Intrinsic::floor:
1261 ISDs.push_back(ISD::FFLOOR);
1262 break;
1263 case Intrinsic::ceil:
1264 ISDs.push_back(ISD::FCEIL);
1265 break;
1266 case Intrinsic::trunc:
1267 ISDs.push_back(ISD::FTRUNC);
1268 break;
1269 case Intrinsic::nearbyint:
1270 ISDs.push_back(ISD::FNEARBYINT);
1271 break;
1272 case Intrinsic::rint:
1273 ISDs.push_back(ISD::FRINT);
1274 break;
1275 case Intrinsic::round:
1276 ISDs.push_back(ISD::FROUND);
1277 break;
1278 case Intrinsic::pow:
1279 ISDs.push_back(ISD::FPOW);
1280 break;
1281 case Intrinsic::fma:
1282 ISDs.push_back(ISD::FMA);
1283 break;
1284 case Intrinsic::fmuladd:
1285 ISDs.push_back(ISD::FMA);
1286 break;
1287 // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1288 case Intrinsic::lifetime_start:
1289 case Intrinsic::lifetime_end:
1290 case Intrinsic::sideeffect:
1291 return 0;
1292 case Intrinsic::masked_store:
1293 return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0,
1294 0);
1295 case Intrinsic::masked_load:
1296 return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
1297 case Intrinsic::experimental_vector_reduce_add:
1298 return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0],
1299 /*IsPairwiseForm=*/false);
1300 case Intrinsic::experimental_vector_reduce_mul:
1301 return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0],
1302 /*IsPairwiseForm=*/false);
1303 case Intrinsic::experimental_vector_reduce_and:
1304 return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0],
1305 /*IsPairwiseForm=*/false);
1306 case Intrinsic::experimental_vector_reduce_or:
1307 return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0],
1308 /*IsPairwiseForm=*/false);
1309 case Intrinsic::experimental_vector_reduce_xor:
1310 return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0],
1311 /*IsPairwiseForm=*/false);
1312 case Intrinsic::experimental_vector_reduce_v2_fadd:
1313 return ConcreteTTI->getArithmeticReductionCost(
1314 Instruction::FAdd, Tys[0],
1315 /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1316 // reductions.
1317 case Intrinsic::experimental_vector_reduce_v2_fmul:
1318 return ConcreteTTI->getArithmeticReductionCost(
1319 Instruction::FMul, Tys[0],
1320 /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1321 // reductions.
1322 case Intrinsic::experimental_vector_reduce_smax:
1323 case Intrinsic::experimental_vector_reduce_smin:
1324 case Intrinsic::experimental_vector_reduce_fmax:
1325 case Intrinsic::experimental_vector_reduce_fmin:
1326 return ConcreteTTI->getMinMaxReductionCost(
1327 Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1328 /*IsUnsigned=*/true);
1329 case Intrinsic::experimental_vector_reduce_umax:
1330 case Intrinsic::experimental_vector_reduce_umin:
1331 return ConcreteTTI->getMinMaxReductionCost(
1332 Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1333 /*IsUnsigned=*/false);
1334 case Intrinsic::sadd_sat:
1335 case Intrinsic::ssub_sat: {
1336 Type *CondTy = RetTy->getWithNewBitWidth(1);
1337
1338 Type *OpTy = StructType::create({RetTy, CondTy});
1339 Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
1340 ? Intrinsic::sadd_with_overflow
1341 : Intrinsic::ssub_with_overflow;
1342
1343 // SatMax -> Overflow && SumDiff < 0
1344 // SatMin -> Overflow && SumDiff >= 0
1345 unsigned Cost = 0;
1346 Cost += ConcreteTTI->getIntrinsicInstrCost(
1347 OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1348 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1349 CondTy, nullptr);
1350 Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1351 CondTy, nullptr);
1352 return Cost;
1353 }
1354 case Intrinsic::uadd_sat:
1355 case Intrinsic::usub_sat: {
1356 Type *CondTy = RetTy->getWithNewBitWidth(1);
1357
1358 Type *OpTy = StructType::create({RetTy, CondTy});
1359 Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
1360 ? Intrinsic::uadd_with_overflow
1361 : Intrinsic::usub_with_overflow;
1362
1363 unsigned Cost = 0;
1364 Cost += ConcreteTTI->getIntrinsicInstrCost(
1365 OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1366 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1367 CondTy, nullptr);
1368 return Cost;
1369 }
1370 case Intrinsic::smul_fix:
1371 case Intrinsic::umul_fix: {
1372 unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
1373 Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
1374
1375 unsigned ExtOp =
1376 IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1377
1378 unsigned Cost = 0;
1379 Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy);
1380 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1381 Cost +=
1382 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy);
1383 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy,
1384 TTI::OK_AnyValue,
1385 TTI::OK_UniformConstantValue);
1386 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy,
1387 TTI::OK_AnyValue,
1388 TTI::OK_UniformConstantValue);
1389 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy);
1390 return Cost;
1391 }
1392 case Intrinsic::sadd_with_overflow:
1393 case Intrinsic::ssub_with_overflow: {
1394 Type *SumTy = RetTy->getContainedType(0);
1395 Type *OverflowTy = RetTy->getContainedType(1);
1396 unsigned Opcode = IID == Intrinsic::sadd_with_overflow
1397 ? BinaryOperator::Add
1398 : BinaryOperator::Sub;
1399
1400 // LHSSign -> LHS >= 0
1401 // RHSSign -> RHS >= 0
1402 // SumSign -> Sum >= 0
1403 //
1404 // Add:
1405 // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
1406 // Sub:
1407 // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
1408 unsigned Cost = 0;
1409 Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1410 Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1411 OverflowTy, nullptr);
1412 Cost += 2 * ConcreteTTI->getCmpSelInstrCost(
1413 BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr);
1414 Cost +=
1415 ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy);
1416 return Cost;
1417 }
1418 case Intrinsic::uadd_with_overflow:
1419 case Intrinsic::usub_with_overflow: {
1420 Type *SumTy = RetTy->getContainedType(0);
1421 Type *OverflowTy = RetTy->getContainedType(1);
1422 unsigned Opcode = IID == Intrinsic::uadd_with_overflow
1423 ? BinaryOperator::Add
1424 : BinaryOperator::Sub;
1425
1426 unsigned Cost = 0;
1427 Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1428 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1429 OverflowTy, nullptr);
1430 return Cost;
1431 }
1432 case Intrinsic::smul_with_overflow:
1433 case Intrinsic::umul_with_overflow: {
1434 Type *MulTy = RetTy->getContainedType(0);
1435 Type *OverflowTy = RetTy->getContainedType(1);
1436 unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
1437 Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
1438
1439 unsigned ExtOp =
1440 IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1441
1442 unsigned Cost = 0;
1443 Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy);
1444 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1445 Cost +=
1446 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy);
1447 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy,
1448 TTI::OK_AnyValue,
1449 TTI::OK_UniformConstantValue);
1450
1451 if (IID == Intrinsic::smul_with_overflow)
1452 Cost += ConcreteTTI->getArithmeticInstrCost(
1453 Instruction::AShr, MulTy, TTI::OK_AnyValue,
1454 TTI::OK_UniformConstantValue);
1455
1456 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy,
1457 OverflowTy, nullptr);
1458 return Cost;
1459 }
1460 case Intrinsic::ctpop:
1461 ISDs.push_back(ISD::CTPOP);
1462 // In case of legalization use TCC_Expensive. This is cheaper than a
1463 // library call but still not a cheap instruction.
1464 SingleCallCost = TargetTransformInfo::TCC_Expensive;
1465 break;
1466 // FIXME: ctlz, cttz, ...
1467 }
1468
1469 const TargetLoweringBase *TLI = getTLI();
1470 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
1471
1472 SmallVector<unsigned, 2> LegalCost;
1473 SmallVector<unsigned, 2> CustomCost;
1474 for (unsigned ISD : ISDs) {
1475 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
1476 if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
1477 TLI->isFAbsFree(LT.second)) {
1478 return 0;
1479 }
1480
1481 // The operation is legal. Assume it costs 1.
1482 // If the type is split to multiple registers, assume that there is some
1483 // overhead to this.
1484 // TODO: Once we have extract/insert subvector cost we need to use them.
1485 if (LT.first > 1)
1486 LegalCost.push_back(LT.first * 2);
1487 else
1488 LegalCost.push_back(LT.first * 1);
1489 } else if (!TLI->isOperationExpand(ISD, LT.second)) {
1490 // If the operation is custom lowered then assume
1491 // that the code is twice as expensive.
1492 CustomCost.push_back(LT.first * 2);
1493 }
1494 }
1495
1496 auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
1497 if (MinLegalCostI != LegalCost.end())
1498 return *MinLegalCostI;
1499
1500 auto MinCustomCostI =
1501 std::min_element(CustomCost.begin(), CustomCost.end());
1502 if (MinCustomCostI != CustomCost.end())
1503 return *MinCustomCostI;
1504
1505 // If we can't lower fmuladd into an FMA estimate the cost as a floating
1506 // point mul followed by an add.
1507 if (IID == Intrinsic::fmuladd)
1508 return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
1509 ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
1510
1511 // Else, assume that we need to scalarize this intrinsic. For math builtins
1512 // this will emit a costly libcall, adding call overhead and spills. Make it
1513 // very expensive.
1514 if (RetTy->isVectorTy()) {
1515 unsigned ScalarizationCost =
1516 ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max())
1517 ? ScalarizationCostPassed
1518 : getScalarizationOverhead(RetTy, true, false));
1519 unsigned ScalarCalls = RetTy->getVectorNumElements();
1520 SmallVector<Type *, 4> ScalarTys;
1521 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1522 Type *Ty = Tys[i];
1523 if (Ty->isVectorTy())
1524 Ty = Ty->getScalarType();
1525 ScalarTys.push_back(Ty);
1526 }
1527 unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost(
1528 IID, RetTy->getScalarType(), ScalarTys, FMF);
1529 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1530 if (Tys[i]->isVectorTy()) {
1531 if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1532 ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
1533 ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
1534 }
1535 }
1536
1537 return ScalarCalls * ScalarCost + ScalarizationCost;
1538 }
1539
1540 // This is going to be turned into a library call, make it expensive.
1541 return SingleCallCost;
1542 }
1543
1544 /// Compute a cost of the given call instruction.
1545 ///
1546 /// Compute the cost of calling function F with return type RetTy and
1547 /// argument types Tys. F might be nullptr, in this case the cost of an
1548 /// arbitrary call with the specified signature will be returned.
1549 /// This is used, for instance, when we estimate call of a vector
1550 /// counterpart of the given function.
1551 /// \param F Called function, might be nullptr.
1552 /// \param RetTy Return value types.
1553 /// \param Tys Argument types.
1554 /// \returns The cost of Call instruction.
1555 unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) {
1556 return 10;
1557 }
1558
1559 unsigned getNumberOfParts(Type *Tp) {
1560 std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
1561 return LT.first;
1562 }
1563
1564 unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *,
1565 const SCEV *) {
1566 return 0;
1567 }
1568
1569 /// Try to calculate arithmetic and shuffle op costs for reduction operations.
1570 /// We're assuming that reduction operation are performing the following way:
1571 /// 1. Non-pairwise reduction
1572 /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1573 /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
1574 /// \----------------v-------------/ \----------v------------/
1575 /// n/2 elements n/2 elements
1576 /// %red1 = op <n x t> %val, <n x t> val1
1577 /// After this operation we have a vector %red1 where only the first n/2
1578 /// elements are meaningful, the second n/2 elements are undefined and can be
1579 /// dropped. All other operations are actually working with the vector of
1580 /// length n/2, not n, though the real vector length is still n.
1581 /// %val2 = shufflevector<n x t> %red1, <n x t> %undef,
1582 /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
1583 /// \----------------v-------------/ \----------v------------/
1584 /// n/4 elements 3*n/4 elements
1585 /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of
1586 /// length n/2, the resulting vector has length n/4 etc.
1587 /// 2. Pairwise reduction:
1588 /// Everything is the same except for an additional shuffle operation which
1589 /// is used to produce operands for pairwise kind of reductions.
1590 /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1591 /// <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef>
1592 /// \-------------v----------/ \----------v------------/
1593 /// n/2 elements n/2 elements
1594 /// %val2 = shufflevector<n x t> %val, <n x t> %undef,
1595 /// <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef>
1596 /// \-------------v----------/ \----------v------------/
1597 /// n/2 elements n/2 elements
1598 /// %red1 = op <n x t> %val1, <n x t> val2
1599 /// Again, the operation is performed on <n x t> vector, but the resulting
1600 /// vector %red1 is <n/2 x t> vector.
1601 ///
1602 /// The cost model should take into account that the actual length of the
1603 /// vector is reduced on each iteration.
1604 unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1605 bool IsPairwise) {
1606 assert(Ty->isVectorTy() && "Expect a vector type")((Ty->isVectorTy() && "Expect a vector type") ? static_cast
<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1606, __PRETTY_FUNCTION__))
;
1607 Type *ScalarTy = Ty->getVectorElementType();
1608 unsigned NumVecElts = Ty->getVectorNumElements();
1609 unsigned NumReduxLevels = Log2_32(NumVecElts);
1610 unsigned ArithCost = 0;
1611 unsigned ShuffleCost = 0;
1612 auto *ConcreteTTI = static_cast<T *>(this);
1613 std::pair<unsigned, MVT> LT =
1614 ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1615 unsigned LongVectorCount = 0;
1616 unsigned MVTLen =
1617 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1618 while (NumVecElts > MVTLen) {
1619 NumVecElts /= 2;
1620 Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1621 // Assume the pairwise shuffles add a cost.
1622 ShuffleCost += (IsPairwise + 1) *
1623 ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1624 NumVecElts, SubTy);
1625 ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy);
1626 Ty = SubTy;
1627 ++LongVectorCount;
1628 }
1629
1630 NumReduxLevels -= LongVectorCount;
1631
1632 // The minimal length of the vector is limited by the real length of vector
1633 // operations performed on the current platform. That's why several final
1634 // reduction operations are performed on the vectors with the same
1635 // architecture-dependent length.
1636
1637 // Non pairwise reductions need one shuffle per reduction level. Pairwise
1638 // reductions need two shuffles on every level, but the last one. On that
1639 // level one of the shuffles is <0, u, u, ...> which is identity.
1640 unsigned NumShuffles = NumReduxLevels;
1641 if (IsPairwise && NumReduxLevels >= 1)
1642 NumShuffles += NumReduxLevels - 1;
1643 ShuffleCost += NumShuffles *
1644 ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1645 0, Ty);
1646 ArithCost += NumReduxLevels *
1647 ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
1648 return ShuffleCost + ArithCost +
1649 ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1650 }
1651
1652 /// Try to calculate op costs for min/max reduction operations.
1653 /// \param CondTy Conditional type for the Select instruction.
1654 unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise,
1655 bool) {
1656 assert(Ty->isVectorTy() && "Expect a vector type")((Ty->isVectorTy() && "Expect a vector type") ? static_cast
<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1656, __PRETTY_FUNCTION__))
;
1657 Type *ScalarTy = Ty->getVectorElementType();
1658 Type *ScalarCondTy = CondTy->getVectorElementType();
1659 unsigned NumVecElts = Ty->getVectorNumElements();
1660 unsigned NumReduxLevels = Log2_32(NumVecElts);
1661 unsigned CmpOpcode;
1662 if (Ty->isFPOrFPVectorTy()) {
1663 CmpOpcode = Instruction::FCmp;
1664 } else {
1665 assert(Ty->isIntOrIntVectorTy() &&((Ty->isIntOrIntVectorTy() && "expecting floating point or integer type for min/max reduction"
) ? static_cast<void> (0) : __assert_fail ("Ty->isIntOrIntVectorTy() && \"expecting floating point or integer type for min/max reduction\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1666, __PRETTY_FUNCTION__))
1666 "expecting floating point or integer type for min/max reduction")((Ty->isIntOrIntVectorTy() && "expecting floating point or integer type for min/max reduction"
) ? static_cast<void> (0) : __assert_fail ("Ty->isIntOrIntVectorTy() && \"expecting floating point or integer type for min/max reduction\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1666, __PRETTY_FUNCTION__))
;
1667 CmpOpcode = Instruction::ICmp;
1668 }
1669 unsigned MinMaxCost = 0;
1670 unsigned ShuffleCost = 0;
1671 auto *ConcreteTTI = static_cast<T *>(this);
1672 std::pair<unsigned, MVT> LT =
1673 ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1674 unsigned LongVectorCount = 0;
1675 unsigned MVTLen =
1676 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1677 while (NumVecElts > MVTLen) {
1678 NumVecElts /= 2;
1679 Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1680 CondTy = VectorType::get(ScalarCondTy, NumVecElts);
1681
1682 // Assume the pairwise shuffles add a cost.
1683 ShuffleCost += (IsPairwise + 1) *
1684 ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1685 NumVecElts, SubTy);
1686 MinMaxCost +=
1687 ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) +
1688 ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
1689 nullptr);
1690 Ty = SubTy;
1691 ++LongVectorCount;
1692 }
1693
1694 NumReduxLevels -= LongVectorCount;
1695
1696 // The minimal length of the vector is limited by the real length of vector
1697 // operations performed on the current platform. That's why several final
1698 // reduction opertions are perfomed on the vectors with the same
1699 // architecture-dependent length.
1700
1701 // Non pairwise reductions need one shuffle per reduction level. Pairwise
1702 // reductions need two shuffles on every level, but the last one. On that
1703 // level one of the shuffles is <0, u, u, ...> which is identity.
1704 unsigned NumShuffles = NumReduxLevels;
1705 if (IsPairwise && NumReduxLevels >= 1)
1706 NumShuffles += NumReduxLevels - 1;
1707 ShuffleCost += NumShuffles *
1708 ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1709 0, Ty);
1710 MinMaxCost +=
1711 NumReduxLevels *
1712 (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
1713 ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
1714 nullptr));
1715 // The last min/max should be in vector registers and we counted it above.
1716 // So just need a single extractelement.
1717 return ShuffleCost + MinMaxCost +
1718 ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1719 }
1720
1721 unsigned getVectorSplitCost() { return 1; }
1722
1723 /// @}
1724};
1725
1726/// Concrete BasicTTIImpl that can be used if no further customization
1727/// is needed.
1728class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
1729 using BaseT = BasicTTIImplBase<BasicTTIImpl>;
1730
1731 friend class BasicTTIImplBase<BasicTTIImpl>;
1732
1733 const TargetSubtargetInfo *ST;
1734 const TargetLoweringBase *TLI;
1735
1736 const TargetSubtargetInfo *getST() const { return ST; }
1737 const TargetLoweringBase *getTLI() const { return TLI; }
1738
1739public:
1740 explicit BasicTTIImpl(const TargetMachine *TM, const Function &F);
1741};
1742
1743} // end namespace llvm
1744
1745#endif // LLVM_CODEGEN_BASICTTIIMPL_H