Bug Summary

File:llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Warning:line 76, column 25
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AMDGPUTargetTransformInfo.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/build-llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/build-llvm/include -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/build-llvm/lib/Target/AMDGPU -fdebug-prefix-map=/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-01-13-084841-49055-1 -x c++ /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

1//===- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// This file implements a TargetTransformInfo analysis pass specific to the
11// AMDGPU target machine. It uses the target's detailed information to provide
12// more precise answers to certain TTI queries, while letting the target
13// independent and default TTI implementations handle the rest.
14//
15//===----------------------------------------------------------------------===//
16
17#include "AMDGPUTargetTransformInfo.h"
18#include "AMDGPUSubtarget.h"
19#include "Utils/AMDGPUBaseInfo.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/Analysis/LoopInfo.h"
22#include "llvm/Analysis/TargetTransformInfo.h"
23#include "llvm/Analysis/ValueTracking.h"
24#include "llvm/CodeGen/ISDOpcodes.h"
25#include "llvm/CodeGen/ValueTypes.h"
26#include "llvm/IR/Argument.h"
27#include "llvm/IR/Attributes.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/CallingConv.h"
30#include "llvm/IR/DataLayout.h"
31#include "llvm/IR/DerivedTypes.h"
32#include "llvm/IR/Function.h"
33#include "llvm/IR/Instruction.h"
34#include "llvm/IR/Instructions.h"
35#include "llvm/IR/IntrinsicInst.h"
36#include "llvm/IR/Module.h"
37#include "llvm/IR/PatternMatch.h"
38#include "llvm/IR/Type.h"
39#include "llvm/IR/Value.h"
40#include "llvm/MC/SubtargetFeature.h"
41#include "llvm/Support/Casting.h"
42#include "llvm/Support/CommandLine.h"
43#include "llvm/Support/Debug.h"
44#include "llvm/Support/ErrorHandling.h"
45#include "llvm/Support/MachineValueType.h"
46#include "llvm/Support/raw_ostream.h"
47#include "llvm/Target/TargetMachine.h"
48#include <algorithm>
49#include <cassert>
50#include <limits>
51#include <utility>
52
53using namespace llvm;
54
55#define DEBUG_TYPE"AMDGPUtti" "AMDGPUtti"
56
57static cl::opt<unsigned> UnrollThresholdPrivate(
58 "amdgpu-unroll-threshold-private",
59 cl::desc("Unroll threshold for AMDGPU if private memory used in a loop"),
60 cl::init(2700), cl::Hidden);
61
62static cl::opt<unsigned> UnrollThresholdLocal(
63 "amdgpu-unroll-threshold-local",
64 cl::desc("Unroll threshold for AMDGPU if local memory used in a loop"),
65 cl::init(1000), cl::Hidden);
66
67static cl::opt<unsigned> UnrollThresholdIf(
68 "amdgpu-unroll-threshold-if",
69 cl::desc("Unroll threshold increment for AMDGPU for each if statement inside loop"),
70 cl::init(150), cl::Hidden);
71
72static bool dependsOnLocalPhi(const Loop *L, const Value *Cond,
73 unsigned Depth = 0) {
74 const Instruction *I = dyn_cast<Instruction>(Cond);
75 if (!I)
76 return false;
77
78 for (const Value *V : I->operand_values()) {
79 if (!L->contains(I))
80 continue;
81 if (const PHINode *PHI = dyn_cast<PHINode>(V)) {
82 if (llvm::none_of(L->getSubLoops(), [PHI](const Loop* SubLoop) {
83 return SubLoop->contains(PHI); }))
84 return true;
85 } else if (Depth < 10 && dependsOnLocalPhi(L, V, Depth+1))
86 return true;
87 }
88 return false;
89}
90
91void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
92 TTI::UnrollingPreferences &UP) {
93 const Function &F = *L->getHeader()->getParent();
94 UP.Threshold = AMDGPU::getIntegerAttribute(F, "amdgpu-unroll-threshold", 300);
95 UP.MaxCount = std::numeric_limits<unsigned>::max();
96 UP.Partial = true;
97
98 // TODO: Do we want runtime unrolling?
99
100 // Maximum alloca size than can fit registers. Reserve 16 registers.
101 const unsigned MaxAlloca = (256 - 16) * 4;
102 unsigned ThresholdPrivate = UnrollThresholdPrivate;
103 unsigned ThresholdLocal = UnrollThresholdLocal;
104 unsigned MaxBoost = std::max(ThresholdPrivate, ThresholdLocal);
105 for (const BasicBlock *BB : L->getBlocks()) {
106 const DataLayout &DL = BB->getModule()->getDataLayout();
107 unsigned LocalGEPsSeen = 0;
108
109 if (llvm::any_of(L->getSubLoops(), [BB](const Loop* SubLoop) {
110 return SubLoop->contains(BB); }))
111 continue; // Block belongs to an inner loop.
112
113 for (const Instruction &I : *BB) {
114 // Unroll a loop which contains an "if" statement whose condition
115 // defined by a PHI belonging to the loop. This may help to eliminate
116 // if region and potentially even PHI itself, saving on both divergence
117 // and registers used for the PHI.
118 // Add a small bonus for each of such "if" statements.
119 if (const BranchInst *Br = dyn_cast<BranchInst>(&I)) {
120 if (UP.Threshold < MaxBoost && Br->isConditional()) {
121 BasicBlock *Succ0 = Br->getSuccessor(0);
122 BasicBlock *Succ1 = Br->getSuccessor(1);
123 if ((L->contains(Succ0) && L->isLoopExiting(Succ0)) ||
124 (L->contains(Succ1) && L->isLoopExiting(Succ1)))
125 continue;
126 if (dependsOnLocalPhi(L, Br->getCondition())) {
127 UP.Threshold += UnrollThresholdIf;
128 LLVM_DEBUG(dbgs() << "Set unroll threshold " << UP.Thresholddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("AMDGPUtti")) { dbgs() << "Set unroll threshold " <<
UP.Threshold << " for loop:\n" << *L << " due to "
<< *Br << '\n'; } } while (false)
129 << " for loop:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("AMDGPUtti")) { dbgs() << "Set unroll threshold " <<
UP.Threshold << " for loop:\n" << *L << " due to "
<< *Br << '\n'; } } while (false)
130 << *L << " due to " << *Br << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("AMDGPUtti")) { dbgs() << "Set unroll threshold " <<
UP.Threshold << " for loop:\n" << *L << " due to "
<< *Br << '\n'; } } while (false)
;
131 if (UP.Threshold >= MaxBoost)
132 return;
133 }
134 }
135 continue;
136 }
137
138 const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I);
139 if (!GEP)
140 continue;
141
142 unsigned AS = GEP->getAddressSpace();
143 unsigned Threshold = 0;
144 if (AS == AMDGPUAS::PRIVATE_ADDRESS)
145 Threshold = ThresholdPrivate;
146 else if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS)
147 Threshold = ThresholdLocal;
148 else
149 continue;
150
151 if (UP.Threshold >= Threshold)
152 continue;
153
154 if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
155 const Value *Ptr = GEP->getPointerOperand();
156 const AllocaInst *Alloca =
157 dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr, DL));
158 if (!Alloca || !Alloca->isStaticAlloca())
159 continue;
160 Type *Ty = Alloca->getAllocatedType();
161 unsigned AllocaSize = Ty->isSized() ? DL.getTypeAllocSize(Ty) : 0;
162 if (AllocaSize > MaxAlloca)
163 continue;
164 } else if (AS == AMDGPUAS::LOCAL_ADDRESS ||
165 AS == AMDGPUAS::REGION_ADDRESS) {
166 LocalGEPsSeen++;
167 // Inhibit unroll for local memory if we have seen addressing not to
168 // a variable, most likely we will be unable to combine it.
169 // Do not unroll too deep inner loops for local memory to give a chance
170 // to unroll an outer loop for a more important reason.
171 if (LocalGEPsSeen > 1 || L->getLoopDepth() > 2 ||
172 (!isa<GlobalVariable>(GEP->getPointerOperand()) &&
173 !isa<Argument>(GEP->getPointerOperand())))
174 continue;
175 }
176
177 // Check if GEP depends on a value defined by this loop itself.
178 bool HasLoopDef = false;
179 for (const Value *Op : GEP->operands()) {
180 const Instruction *Inst = dyn_cast<Instruction>(Op);
181 if (!Inst || L->isLoopInvariant(Op))
182 continue;
183
184 if (llvm::any_of(L->getSubLoops(), [Inst](const Loop* SubLoop) {
185 return SubLoop->contains(Inst); }))
186 continue;
187 HasLoopDef = true;
188 break;
189 }
190 if (!HasLoopDef)
191 continue;
192
193 // We want to do whatever we can to limit the number of alloca
194 // instructions that make it through to the code generator. allocas
195 // require us to use indirect addressing, which is slow and prone to
196 // compiler bugs. If this loop does an address calculation on an
197 // alloca ptr, then we want to use a higher than normal loop unroll
198 // threshold. This will give SROA a better chance to eliminate these
199 // allocas.
200 //
201 // We also want to have more unrolling for local memory to let ds
202 // instructions with different offsets combine.
203 //
204 // Don't use the maximum allowed value here as it will make some
205 // programs way too big.
206 UP.Threshold = Threshold;
207 LLVM_DEBUG(dbgs() << "Set unroll threshold " << Thresholddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("AMDGPUtti")) { dbgs() << "Set unroll threshold " <<
Threshold << " for loop:\n" << *L << " due to "
<< *GEP << '\n'; } } while (false)
208 << " for loop:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("AMDGPUtti")) { dbgs() << "Set unroll threshold " <<
Threshold << " for loop:\n" << *L << " due to "
<< *GEP << '\n'; } } while (false)
209 << *L << " due to " << *GEP << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("AMDGPUtti")) { dbgs() << "Set unroll threshold " <<
Threshold << " for loop:\n" << *L << " due to "
<< *GEP << '\n'; } } while (false)
;
210 if (UP.Threshold >= MaxBoost)
211 return;
212 }
213 }
214}
215
216unsigned GCNTTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
217 // The concept of vector registers doesn't really exist. Some packed vector
218 // operations operate on the normal 32-bit registers.
219 return 256;
220}
221
222unsigned GCNTTIImpl::getNumberOfRegisters(bool Vec) const {
223 // This is really the number of registers to fill when vectorizing /
224 // interleaving loops, so we lie to avoid trying to use all registers.
225 return getHardwareNumberOfRegisters(Vec) >> 3;
226}
227
228unsigned GCNTTIImpl::getRegisterBitWidth(bool Vector) const {
229 return 32;
230}
231
232unsigned GCNTTIImpl::getMinVectorRegisterBitWidth() const {
233 return 32;
234}
235
236unsigned GCNTTIImpl::getLoadVectorFactor(unsigned VF, unsigned LoadSize,
237 unsigned ChainSizeInBytes,
238 VectorType *VecTy) const {
239 unsigned VecRegBitWidth = VF * LoadSize;
240 if (VecRegBitWidth > 128 && VecTy->getScalarSizeInBits() < 32)
241 // TODO: Support element-size less than 32bit?
242 return 128 / LoadSize;
243
244 return VF;
245}
246
247unsigned GCNTTIImpl::getStoreVectorFactor(unsigned VF, unsigned StoreSize,
248 unsigned ChainSizeInBytes,
249 VectorType *VecTy) const {
250 unsigned VecRegBitWidth = VF * StoreSize;
251 if (VecRegBitWidth > 128)
252 return 128 / StoreSize;
253
254 return VF;
255}
256
257unsigned GCNTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
258 if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
259 AddrSpace == AMDGPUAS::CONSTANT_ADDRESS ||
260 AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
261 AddrSpace == AMDGPUAS::BUFFER_FAT_POINTER) {
262 return 512;
263 }
264
265 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS ||
266 AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
267 AddrSpace == AMDGPUAS::REGION_ADDRESS)
268 return 128;
269
270 if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
271 return 8 * ST->getMaxPrivateElementSize();
272
273 llvm_unreachable("unhandled address space")::llvm::llvm_unreachable_internal("unhandled address space", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp"
, 273)
;
274}
275
276bool GCNTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
277 unsigned Alignment,
278 unsigned AddrSpace) const {
279 // We allow vectorization of flat stores, even though we may need to decompose
280 // them later if they may access private memory. We don't have enough context
281 // here, and legalization can handle it.
282 if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) {
283 return (Alignment >= 4 || ST->hasUnalignedScratchAccess()) &&
284 ChainSizeInBytes <= ST->getMaxPrivateElementSize();
285 }
286 return true;
287}
288
289bool GCNTTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
290 unsigned Alignment,
291 unsigned AddrSpace) const {
292 return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
293}
294
295bool GCNTTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
296 unsigned Alignment,
297 unsigned AddrSpace) const {
298 return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
299}
300
301unsigned GCNTTIImpl::getMaxInterleaveFactor(unsigned VF) {
302 // Disable unrolling if the loop is not vectorized.
303 // TODO: Enable this again.
304 if (VF == 1)
305 return 1;
306
307 return 8;
308}
309
310bool GCNTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
311 MemIntrinsicInfo &Info) const {
312 switch (Inst->getIntrinsicID()) {
313 case Intrinsic::amdgcn_atomic_inc:
314 case Intrinsic::amdgcn_atomic_dec:
315 case Intrinsic::amdgcn_ds_ordered_add:
316 case Intrinsic::amdgcn_ds_ordered_swap:
317 case Intrinsic::amdgcn_ds_fadd:
318 case Intrinsic::amdgcn_ds_fmin:
319 case Intrinsic::amdgcn_ds_fmax: {
320 auto *Ordering = dyn_cast<ConstantInt>(Inst->getArgOperand(2));
321 auto *Volatile = dyn_cast<ConstantInt>(Inst->getArgOperand(4));
322 if (!Ordering || !Volatile)
323 return false; // Invalid.
324
325 unsigned OrderingVal = Ordering->getZExtValue();
326 if (OrderingVal > static_cast<unsigned>(AtomicOrdering::SequentiallyConsistent))
327 return false;
328
329 Info.PtrVal = Inst->getArgOperand(0);
330 Info.Ordering = static_cast<AtomicOrdering>(OrderingVal);
331 Info.ReadMem = true;
332 Info.WriteMem = true;
333 Info.IsVolatile = !Volatile->isNullValue();
334 return true;
335 }
336 default:
337 return false;
338 }
339}
340
341int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
342 TTI::OperandValueKind Opd1Info,
343 TTI::OperandValueKind Opd2Info,
344 TTI::OperandValueProperties Opd1PropInfo,
345 TTI::OperandValueProperties Opd2PropInfo,
346 ArrayRef<const Value *> Args,
347 const Instruction *CxtI) {
348 EVT OrigTy = TLI->getValueType(DL, Ty);
349 if (!OrigTy.isSimple()) {
350 return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
351 Opd1PropInfo, Opd2PropInfo);
352 }
353
354 // Legalize the type.
355 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
356 int ISD = TLI->InstructionOpcodeToISD(Opcode);
357
358 // Because we don't have any legal vector operations, but the legal types, we
359 // need to account for split vectors.
360 unsigned NElts = LT.second.isVector() ?
361 LT.second.getVectorNumElements() : 1;
362
363 MVT::SimpleValueType SLT = LT.second.getScalarType().SimpleTy;
364
365 switch (ISD) {
366 case ISD::SHL:
367 case ISD::SRL:
368 case ISD::SRA:
369 if (SLT == MVT::i64)
370 return get64BitInstrCost() * LT.first * NElts;
371
372 if (ST->has16BitInsts() && SLT == MVT::i16)
373 NElts = (NElts + 1) / 2;
374
375 // i32
376 return getFullRateInstrCost() * LT.first * NElts;
377 case ISD::ADD:
378 case ISD::SUB:
379 case ISD::AND:
380 case ISD::OR:
381 case ISD::XOR:
382 if (SLT == MVT::i64) {
383 // and, or and xor are typically split into 2 VALU instructions.
384 return 2 * getFullRateInstrCost() * LT.first * NElts;
385 }
386
387 if (ST->has16BitInsts() && SLT == MVT::i16)
388 NElts = (NElts + 1) / 2;
389
390 return LT.first * NElts * getFullRateInstrCost();
391 case ISD::MUL: {
392 const int QuarterRateCost = getQuarterRateInstrCost();
393 if (SLT == MVT::i64) {
394 const int FullRateCost = getFullRateInstrCost();
395 return (4 * QuarterRateCost + (2 * 2) * FullRateCost) * LT.first * NElts;
396 }
397
398 if (ST->has16BitInsts() && SLT == MVT::i16)
399 NElts = (NElts + 1) / 2;
400
401 // i32
402 return QuarterRateCost * NElts * LT.first;
403 }
404 case ISD::FADD:
405 case ISD::FSUB:
406 case ISD::FMUL:
407 if (SLT == MVT::f64)
408 return LT.first * NElts * get64BitInstrCost();
409
410 if (ST->has16BitInsts() && SLT == MVT::f16)
411 NElts = (NElts + 1) / 2;
412
413 if (SLT == MVT::f32 || SLT == MVT::f16)
414 return LT.first * NElts * getFullRateInstrCost();
415 break;
416 case ISD::FDIV:
417 case ISD::FREM:
418 // FIXME: frem should be handled separately. The fdiv in it is most of it,
419 // but the current lowering is also not entirely correct.
420 if (SLT == MVT::f64) {
421 int Cost = 4 * get64BitInstrCost() + 7 * getQuarterRateInstrCost();
422 // Add cost of workaround.
423 if (!ST->hasUsableDivScaleConditionOutput())
424 Cost += 3 * getFullRateInstrCost();
425
426 return LT.first * Cost * NElts;
427 }
428
429 if (!Args.empty() && match(Args[0], PatternMatch::m_FPOne())) {
430 // TODO: This is more complicated, unsafe flags etc.
431 if ((SLT == MVT::f32 && !HasFP32Denormals) ||
432 (SLT == MVT::f16 && ST->has16BitInsts())) {
433 return LT.first * getQuarterRateInstrCost() * NElts;
434 }
435 }
436
437 if (SLT == MVT::f16 && ST->has16BitInsts()) {
438 // 2 x v_cvt_f32_f16
439 // f32 rcp
440 // f32 fmul
441 // v_cvt_f16_f32
442 // f16 div_fixup
443 int Cost = 4 * getFullRateInstrCost() + 2 * getQuarterRateInstrCost();
444 return LT.first * Cost * NElts;
445 }
446
447 if (SLT == MVT::f32 || SLT == MVT::f16) {
448 int Cost = 7 * getFullRateInstrCost() + 1 * getQuarterRateInstrCost();
449
450 if (!HasFP32Denormals) {
451 // FP mode switches.
452 Cost += 2 * getFullRateInstrCost();
453 }
454
455 return LT.first * NElts * Cost;
456 }
457 break;
458 default:
459 break;
460 }
461
462 return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
463 Opd1PropInfo, Opd2PropInfo);
464}
465
466template <typename T>
467int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
468 ArrayRef<T *> Args,
469 FastMathFlags FMF, unsigned VF) {
470 if (ID != Intrinsic::fma)
471 return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
472
473 EVT OrigTy = TLI->getValueType(DL, RetTy);
474 if (!OrigTy.isSimple()) {
475 return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
476 }
477
478 // Legalize the type.
479 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
480
481 unsigned NElts = LT.second.isVector() ?
482 LT.second.getVectorNumElements() : 1;
483
484 MVT::SimpleValueType SLT = LT.second.getScalarType().SimpleTy;
485
486 if (SLT == MVT::f64)
487 return LT.first * NElts * get64BitInstrCost();
488
489 if (ST->has16BitInsts() && SLT == MVT::f16)
490 NElts = (NElts + 1) / 2;
491
492 return LT.first * NElts * (ST->hasFastFMAF32() ? getHalfRateInstrCost()
493 : getQuarterRateInstrCost());
494}
495
496int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
497 ArrayRef<Value*> Args, FastMathFlags FMF,
498 unsigned VF) {
499 return getIntrinsicInstrCost<Value>(ID, RetTy, Args, FMF, VF);
500}
501
502int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
503 ArrayRef<Type *> Tys, FastMathFlags FMF,
504 unsigned ScalarizationCostPassed) {
505 return getIntrinsicInstrCost<Type>(ID, RetTy, Tys, FMF,
506 ScalarizationCostPassed);
507}
508
509unsigned GCNTTIImpl::getCFInstrCost(unsigned Opcode) {
510 // XXX - For some reason this isn't called for switch.
511 switch (Opcode) {
512 case Instruction::Br:
513 case Instruction::Ret:
514 return 10;
515 default:
516 return BaseT::getCFInstrCost(Opcode);
517 }
518}
519
520int GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *Ty,
521 bool IsPairwise) {
522 EVT OrigTy = TLI->getValueType(DL, Ty);
523
524 // Computes cost on targets that have packed math instructions(which support
525 // 16-bit types only).
526 if (IsPairwise ||
527 !ST->hasVOP3PInsts() ||
528 OrigTy.getScalarSizeInBits() != 16)
529 return BaseT::getArithmeticReductionCost(Opcode, Ty, IsPairwise);
530
531 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
532 return LT.first * getFullRateInstrCost();
533}
534
535int GCNTTIImpl::getMinMaxReductionCost(Type *Ty, Type *CondTy,
536 bool IsPairwise,
537 bool IsUnsigned) {
538 EVT OrigTy = TLI->getValueType(DL, Ty);
539
540 // Computes cost on targets that have packed math instructions(which support
541 // 16-bit types only).
542 if (IsPairwise ||
543 !ST->hasVOP3PInsts() ||
544 OrigTy.getScalarSizeInBits() != 16)
545 return BaseT::getMinMaxReductionCost(Ty, CondTy, IsPairwise, IsUnsigned);
546
547 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
548 return LT.first * getHalfRateInstrCost();
549}
550
551int GCNTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
552 unsigned Index) {
553 switch (Opcode) {
554 case Instruction::ExtractElement:
555 case Instruction::InsertElement: {
556 unsigned EltSize
557 = DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType());
558 if (EltSize < 32) {
559 if (EltSize == 16 && Index == 0 && ST->has16BitInsts())
560 return 0;
561 return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
562 }
563
564 // Extracts are just reads of a subregister, so are free. Inserts are
565 // considered free because we don't want to have any cost for scalarizing
566 // operations, and we don't have to copy into a different register class.
567
568 // Dynamic indexing isn't free and is best avoided.
569 return Index == ~0u ? 2 : 0;
570 }
571 default:
572 return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
573 }
574}
575
576
577
578static bool isArgPassedInSGPR(const Argument *A) {
579 const Function *F = A->getParent();
580
581 // Arguments to compute shaders are never a source of divergence.
582 CallingConv::ID CC = F->getCallingConv();
583 switch (CC) {
584 case CallingConv::AMDGPU_KERNEL:
585 case CallingConv::SPIR_KERNEL:
586 return true;
587 case CallingConv::AMDGPU_VS:
588 case CallingConv::AMDGPU_LS:
589 case CallingConv::AMDGPU_HS:
590 case CallingConv::AMDGPU_ES:
591 case CallingConv::AMDGPU_GS:
592 case CallingConv::AMDGPU_PS:
593 case CallingConv::AMDGPU_CS:
594 // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
595 // Everything else is in VGPRs.
596 return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
597 F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
598 default:
599 // TODO: Should calls support inreg for SGPR inputs?
600 return false;
601 }
602}
603
604/// \returns true if the result of the value could potentially be
605/// different across workitems in a wavefront.
606bool GCNTTIImpl::isSourceOfDivergence(const Value *V) const {
607 if (const Argument *A = dyn_cast<Argument>(V))
608 return !isArgPassedInSGPR(A);
609
610 // Loads from the private and flat address spaces are divergent, because
611 // threads can execute the load instruction with the same inputs and get
612 // different results.
613 //
614 // All other loads are not divergent, because if threads issue loads with the
615 // same arguments, they will always get the same result.
616 if (const LoadInst *Load = dyn_cast<LoadInst>(V))
617 return Load->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
618 Load->getPointerAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
619
620 // Atomics are divergent because they are executed sequentially: when an
621 // atomic operation refers to the same address in each thread, then each
622 // thread after the first sees the value written by the previous thread as
623 // original value.
624 if (isa<AtomicRMWInst>(V) || isa<AtomicCmpXchgInst>(V))
625 return true;
626
627 if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V))
628 return AMDGPU::isIntrinsicSourceOfDivergence(Intrinsic->getIntrinsicID());
629
630 // Assume all function calls are a source of divergence.
631 if (isa<CallInst>(V) || isa<InvokeInst>(V))
632 return true;
633
634 return false;
635}
636
637bool GCNTTIImpl::isAlwaysUniform(const Value *V) const {
638 if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) {
639 switch (Intrinsic->getIntrinsicID()) {
640 default:
641 return false;
642 case Intrinsic::amdgcn_readfirstlane:
643 case Intrinsic::amdgcn_readlane:
644 case Intrinsic::amdgcn_icmp:
645 case Intrinsic::amdgcn_fcmp:
646 return true;
647 }
648 }
649 return false;
650}
651
652bool GCNTTIImpl::collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
653 Intrinsic::ID IID) const {
654 switch (IID) {
655 case Intrinsic::amdgcn_atomic_inc:
656 case Intrinsic::amdgcn_atomic_dec:
657 case Intrinsic::amdgcn_ds_fadd:
658 case Intrinsic::amdgcn_ds_fmin:
659 case Intrinsic::amdgcn_ds_fmax:
660 case Intrinsic::amdgcn_is_shared:
661 case Intrinsic::amdgcn_is_private:
662 OpIndexes.push_back(0);
663 return true;
664 default:
665 return false;
666 }
667}
668
669bool GCNTTIImpl::rewriteIntrinsicWithAddressSpace(
670 IntrinsicInst *II, Value *OldV, Value *NewV) const {
671 auto IntrID = II->getIntrinsicID();
672 switch (IntrID) {
673 case Intrinsic::amdgcn_atomic_inc:
674 case Intrinsic::amdgcn_atomic_dec:
675 case Intrinsic::amdgcn_ds_fadd:
676 case Intrinsic::amdgcn_ds_fmin:
677 case Intrinsic::amdgcn_ds_fmax: {
678 const ConstantInt *IsVolatile = cast<ConstantInt>(II->getArgOperand(4));
679 if (!IsVolatile->isZero())
680 return false;
681 Module *M = II->getParent()->getParent()->getParent();
682 Type *DestTy = II->getType();
683 Type *SrcTy = NewV->getType();
684 Function *NewDecl =
685 Intrinsic::getDeclaration(M, II->getIntrinsicID(), {DestTy, SrcTy});
686 II->setArgOperand(0, NewV);
687 II->setCalledFunction(NewDecl);
688 return true;
689 }
690 case Intrinsic::amdgcn_is_shared:
691 case Intrinsic::amdgcn_is_private: {
692 unsigned TrueAS = IntrID == Intrinsic::amdgcn_is_shared ?
693 AMDGPUAS::LOCAL_ADDRESS : AMDGPUAS::PRIVATE_ADDRESS;
694 unsigned NewAS = NewV->getType()->getPointerAddressSpace();
695 LLVMContext &Ctx = NewV->getType()->getContext();
696 ConstantInt *NewVal = (TrueAS == NewAS) ?
697 ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx);
698 II->replaceAllUsesWith(NewVal);
699 II->eraseFromParent();
700 return true;
701 }
702 default:
703 return false;
704 }
705}
706
707unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
708 Type *SubTp) {
709 if (ST->hasVOP3PInsts()) {
710 VectorType *VT = cast<VectorType>(Tp);
711 if (VT->getNumElements() == 2 &&
712 DL.getTypeSizeInBits(VT->getElementType()) == 16) {
713 // With op_sel VOP3P instructions freely can access the low half or high
714 // half of a register, so any swizzle is free.
715
716 switch (Kind) {
717 case TTI::SK_Broadcast:
718 case TTI::SK_Reverse:
719 case TTI::SK_PermuteSingleSrc:
720 return 0;
721 default:
722 break;
723 }
724 }
725 }
726
727 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
728}
729
730bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
731 const Function *Callee) const {
732 const TargetMachine &TM = getTLI()->getTargetMachine();
733 const GCNSubtarget *CallerST
734 = static_cast<const GCNSubtarget *>(TM.getSubtargetImpl(*Caller));
735 const GCNSubtarget *CalleeST
736 = static_cast<const GCNSubtarget *>(TM.getSubtargetImpl(*Callee));
737
738 const FeatureBitset &CallerBits = CallerST->getFeatureBits();
739 const FeatureBitset &CalleeBits = CalleeST->getFeatureBits();
740
741 FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
742 FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
743 if ((RealCallerBits & RealCalleeBits) != RealCalleeBits)
744 return false;
745
746 // FIXME: dx10_clamp can just take the caller setting, but there seems to be
747 // no way to support merge for backend defined attributes.
748 AMDGPU::SIModeRegisterDefaults CallerMode(*Caller, *CallerST);
749 AMDGPU::SIModeRegisterDefaults CalleeMode(*Callee, *CalleeST);
750 return CallerMode.isInlineCompatible(CalleeMode);
751}
752
753void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
754 TTI::UnrollingPreferences &UP) {
755 CommonTTI.getUnrollingPreferences(L, SE, UP);
756}
757
758unsigned GCNTTIImpl::getUserCost(const User *U,
759 ArrayRef<const Value *> Operands) {
760 const Instruction *I = dyn_cast<Instruction>(U);
1
Assuming 'U' is not a 'Instruction'
761 if (!I
1.1
'I' is null
1.1
'I' is null
1.1
'I' is null
1.1
'I' is null
1.1
'I' is null
1.1
'I' is null
1.1
'I' is null
)
2
Taking true branch
762 return BaseT::getUserCost(U, Operands);
3
Calling 'TargetTransformInfoImplCRTPBase::getUserCost'
763
764 // Estimate different operations to be optimized out
765 switch (I->getOpcode()) {
766 case Instruction::ExtractElement: {
767 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
768 unsigned Idx = -1;
769 if (CI)
770 Idx = CI->getZExtValue();
771 return getVectorInstrCost(I->getOpcode(), I->getOperand(0)->getType(), Idx);
772 }
773 case Instruction::InsertElement: {
774 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
775 unsigned Idx = -1;
776 if (CI)
777 Idx = CI->getZExtValue();
778 return getVectorInstrCost(I->getOpcode(), I->getType(), Idx);
779 }
780 case Instruction::Call: {
781 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
782 SmallVector<Value *, 4> Args(II->arg_operands());
783 FastMathFlags FMF;
784 if (auto *FPMO = dyn_cast<FPMathOperator>(II))
785 FMF = FPMO->getFastMathFlags();
786 return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args,
787 FMF);
788 } else {
789 return BaseT::getUserCost(U, Operands);
790 }
791 }
792 case Instruction::ShuffleVector: {
793 const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
794 Type *Ty = Shuffle->getType();
795 Type *SrcTy = Shuffle->getOperand(0)->getType();
796
797 // TODO: Identify and add costs for insert subvector, etc.
798 int SubIndex;
799 if (Shuffle->isExtractSubvectorMask(SubIndex))
800 return getShuffleCost(TTI::SK_ExtractSubvector, SrcTy, SubIndex, Ty);
801
802 if (Shuffle->changesLength())
803 return BaseT::getUserCost(U, Operands);
804
805 if (Shuffle->isIdentity())
806 return 0;
807
808 if (Shuffle->isReverse())
809 return getShuffleCost(TTI::SK_Reverse, Ty, 0, nullptr);
810
811 if (Shuffle->isSelect())
812 return getShuffleCost(TTI::SK_Select, Ty, 0, nullptr);
813
814 if (Shuffle->isTranspose())
815 return getShuffleCost(TTI::SK_Transpose, Ty, 0, nullptr);
816
817 if (Shuffle->isZeroEltSplat())
818 return getShuffleCost(TTI::SK_Broadcast, Ty, 0, nullptr);
819
820 if (Shuffle->isSingleSource())
821 return getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, 0, nullptr);
822
823 return getShuffleCost(TTI::SK_PermuteTwoSrc, Ty, 0, nullptr);
824 }
825 case Instruction::ZExt:
826 case Instruction::SExt:
827 case Instruction::FPToUI:
828 case Instruction::FPToSI:
829 case Instruction::FPExt:
830 case Instruction::PtrToInt:
831 case Instruction::IntToPtr:
832 case Instruction::SIToFP:
833 case Instruction::UIToFP:
834 case Instruction::Trunc:
835 case Instruction::FPTrunc:
836 case Instruction::BitCast:
837 case Instruction::AddrSpaceCast: {
838 return getCastInstrCost(I->getOpcode(), I->getType(),
839 I->getOperand(0)->getType(), I);
840 }
841 case Instruction::Add:
842 case Instruction::FAdd:
843 case Instruction::Sub:
844 case Instruction::FSub:
845 case Instruction::Mul:
846 case Instruction::FMul:
847 case Instruction::UDiv:
848 case Instruction::SDiv:
849 case Instruction::FDiv:
850 case Instruction::URem:
851 case Instruction::SRem:
852 case Instruction::FRem:
853 case Instruction::Shl:
854 case Instruction::LShr:
855 case Instruction::AShr:
856 case Instruction::And:
857 case Instruction::Or:
858 case Instruction::Xor:
859 case Instruction::FNeg: {
860 return getArithmeticInstrCost(I->getOpcode(), I->getType(),
861 TTI::OK_AnyValue, TTI::OK_AnyValue,
862 TTI::OP_None, TTI::OP_None, Operands, I);
863 }
864 default:
865 break;
866 }
867
868 return BaseT::getUserCost(U, Operands);
869}
870
871unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
872 return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
873}
874
875unsigned R600TTIImpl::getNumberOfRegisters(bool Vec) const {
876 return getHardwareNumberOfRegisters(Vec);
877}
878
879unsigned R600TTIImpl::getRegisterBitWidth(bool Vector) const {
880 return 32;
881}
882
883unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const {
884 return 32;
885}
886
887unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
888 if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
889 AddrSpace == AMDGPUAS::CONSTANT_ADDRESS)
890 return 128;
891 if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
892 AddrSpace == AMDGPUAS::REGION_ADDRESS)
893 return 64;
894 if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
895 return 32;
896
897 if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS ||
898 AddrSpace == AMDGPUAS::PARAM_I_ADDRESS ||
899 (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&
900 AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))
901 return 128;
902 llvm_unreachable("unhandled address space")::llvm::llvm_unreachable_internal("unhandled address space", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp"
, 902)
;
903}
904
905bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
906 unsigned Alignment,
907 unsigned AddrSpace) const {
908 // We allow vectorization of flat stores, even though we may need to decompose
909 // them later if they may access private memory. We don't have enough context
910 // here, and legalization can handle it.
911 return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS);
912}
913
914bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
915 unsigned Alignment,
916 unsigned AddrSpace) const {
917 return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
918}
919
920bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
921 unsigned Alignment,
922 unsigned AddrSpace) const {
923 return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
924}
925
926unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) {
927 // Disable unrolling if the loop is not vectorized.
928 // TODO: Enable this again.
929 if (VF == 1)
930 return 1;
931
932 return 8;
933}
934
935unsigned R600TTIImpl::getCFInstrCost(unsigned Opcode) {
936 // XXX - For some reason this isn't called for switch.
937 switch (Opcode) {
938 case Instruction::Br:
939 case Instruction::Ret:
940 return 10;
941 default:
942 return BaseT::getCFInstrCost(Opcode);
943 }
944}
945
946int R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
947 unsigned Index) {
948 switch (Opcode) {
949 case Instruction::ExtractElement:
950 case Instruction::InsertElement: {
951 unsigned EltSize
952 = DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType());
953 if (EltSize < 32) {
954 return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
955 }
956
957 // Extracts are just reads of a subregister, so are free. Inserts are
958 // considered free because we don't want to have any cost for scalarizing
959 // operations, and we don't have to copy into a different register class.
960
961 // Dynamic indexing isn't free and is best avoided.
962 return Index == ~0u ? 2 : 0;
963 }
964 default:
965 return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
966 }
967}
968
969void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
970 TTI::UnrollingPreferences &UP) {
971 CommonTTI.getUnrollingPreferences(L, SE, UP);
972}

/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

1//===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file provides helpers for the implementation of
10/// a TargetTransformInfo-conforming class.
11///
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
16
17#include "llvm/Analysis/ScalarEvolutionExpressions.h"
18#include "llvm/Analysis/TargetTransformInfo.h"
19#include "llvm/Analysis/VectorUtils.h"
20#include "llvm/IR/CallSite.h"
21#include "llvm/IR/DataLayout.h"
22#include "llvm/IR/Function.h"
23#include "llvm/IR/GetElementPtrTypeIterator.h"
24#include "llvm/IR/Operator.h"
25#include "llvm/IR/Type.h"
26
27namespace llvm {
28
29/// Base class for use as a mix-in that aids implementing
30/// a TargetTransformInfo-compatible class.
31class TargetTransformInfoImplBase {
32protected:
33 typedef TargetTransformInfo TTI;
34
35 const DataLayout &DL;
36
37 explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {}
38
39public:
40 // Provide value semantics. MSVC requires that we spell all of these out.
41 TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg)
42 : DL(Arg.DL) {}
43 TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {}
44
45 const DataLayout &getDataLayout() const { return DL; }
46
47 unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
48 switch (Opcode) {
47
Control jumps to 'case IntToPtr:' at line 73
49 default:
50 // By default, just classify everything as 'basic'.
51 return TTI::TCC_Basic;
52
53 case Instruction::GetElementPtr:
54 llvm_unreachable("Use getGEPCost for GEP operations!")::llvm::llvm_unreachable_internal("Use getGEPCost for GEP operations!"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 54)
;
55
56 case Instruction::BitCast:
57 assert(OpTy && "Cast instructions must provide the operand type")((OpTy && "Cast instructions must provide the operand type"
) ? static_cast<void> (0) : __assert_fail ("OpTy && \"Cast instructions must provide the operand type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 57, __PRETTY_FUNCTION__))
;
58 if (Ty == OpTy || (Ty->isPointerTy() && OpTy->isPointerTy()))
59 // Identity and pointer-to-pointer casts are free.
60 return TTI::TCC_Free;
61
62 // Otherwise, the default basic cost is used.
63 return TTI::TCC_Basic;
64
65 case Instruction::FDiv:
66 case Instruction::FRem:
67 case Instruction::SDiv:
68 case Instruction::SRem:
69 case Instruction::UDiv:
70 case Instruction::URem:
71 return TTI::TCC_Expensive;
72
73 case Instruction::IntToPtr: {
74 // An inttoptr cast is free so long as the input is a legal integer type
75 // which doesn't contain values outside the range of a pointer.
76 unsigned OpSize = OpTy->getScalarSizeInBits();
48
Called C++ object pointer is null
77 if (DL.isLegalInteger(OpSize) &&
78 OpSize <= DL.getPointerTypeSizeInBits(Ty))
79 return TTI::TCC_Free;
80
81 // Otherwise it's not a no-op.
82 return TTI::TCC_Basic;
83 }
84 case Instruction::PtrToInt: {
85 // A ptrtoint cast is free so long as the result is large enough to store
86 // the pointer, and a legal integer type.
87 unsigned DestSize = Ty->getScalarSizeInBits();
88 if (DL.isLegalInteger(DestSize) &&
89 DestSize >= DL.getPointerTypeSizeInBits(OpTy))
90 return TTI::TCC_Free;
91
92 // Otherwise it's not a no-op.
93 return TTI::TCC_Basic;
94 }
95 case Instruction::Trunc:
96 // trunc to a native type is free (assuming the target has compare and
97 // shift-right of the same width).
98 if (DL.isLegalInteger(DL.getTypeSizeInBits(Ty)))
99 return TTI::TCC_Free;
100
101 return TTI::TCC_Basic;
102 }
103 }
104
105 int getGEPCost(Type *PointeeType, const Value *Ptr,
106 ArrayRef<const Value *> Operands) {
107 // In the basic model, we just assume that all-constant GEPs will be folded
108 // into their uses via addressing modes.
109 for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
110 if (!isa<Constant>(Operands[Idx]))
111 return TTI::TCC_Basic;
112
113 return TTI::TCC_Free;
114 }
115
116 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
117 unsigned &JTSize,
118 ProfileSummaryInfo *PSI,
119 BlockFrequencyInfo *BFI) {
120 (void)PSI;
121 (void)BFI;
122 JTSize = 0;
123 return SI.getNumCases();
124 }
125
126 int getExtCost(const Instruction *I, const Value *Src) {
127 return TTI::TCC_Basic;
128 }
129
130 unsigned getCallCost(FunctionType *FTy, int NumArgs, const User *U) {
131 assert(FTy && "FunctionType must be provided to this routine.")((FTy && "FunctionType must be provided to this routine."
) ? static_cast<void> (0) : __assert_fail ("FTy && \"FunctionType must be provided to this routine.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 131, __PRETTY_FUNCTION__))
;
132
133 // The target-independent implementation just measures the size of the
134 // function by approximating that each argument will take on average one
135 // instruction to prepare.
136
137 if (NumArgs < 0)
138 // Set the argument number to the number of explicit arguments in the
139 // function.
140 NumArgs = FTy->getNumParams();
141
142 return TTI::TCC_Basic * (NumArgs + 1);
143 }
144
145 unsigned getInliningThresholdMultiplier() { return 1; }
146
147 int getInlinerVectorBonusPercent() { return 150; }
148
149 unsigned getMemcpyCost(const Instruction *I) {
150 return TTI::TCC_Expensive;
151 }
152
153 bool hasBranchDivergence() { return false; }
154
155 bool isSourceOfDivergence(const Value *V) { return false; }
156
157 bool isAlwaysUniform(const Value *V) { return false; }
158
159 unsigned getFlatAddressSpace () {
160 return -1;
161 }
162
163 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
164 Intrinsic::ID IID) const {
165 return false;
166 }
167
168 bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
169 Value *OldV, Value *NewV) const {
170 return false;
171 }
172
173 bool isLoweredToCall(const Function *F) {
174 assert(F && "A concrete function must be provided to this routine.")((F && "A concrete function must be provided to this routine."
) ? static_cast<void> (0) : __assert_fail ("F && \"A concrete function must be provided to this routine.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 174, __PRETTY_FUNCTION__))
;
175
176 // FIXME: These should almost certainly not be handled here, and instead
177 // handled with the help of TLI or the target itself. This was largely
178 // ported from existing analysis heuristics here so that such refactorings
179 // can take place in the future.
180
181 if (F->isIntrinsic())
182 return false;
183
184 if (F->hasLocalLinkage() || !F->hasName())
185 return true;
186
187 StringRef Name = F->getName();
188
189 // These will all likely lower to a single selection DAG node.
190 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
191 Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" ||
192 Name == "fmin" || Name == "fminf" || Name == "fminl" ||
193 Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
194 Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" ||
195 Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
196 return false;
197
198 // These are all likely to be optimized into something smaller.
199 if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
200 Name == "exp2l" || Name == "exp2f" || Name == "floor" ||
201 Name == "floorf" || Name == "ceil" || Name == "round" ||
202 Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" ||
203 Name == "llabs")
204 return false;
205
206 return true;
207 }
208
209 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
210 AssumptionCache &AC,
211 TargetLibraryInfo *LibInfo,
212 HardwareLoopInfo &HWLoopInfo) {
213 return false;
214 }
215
216 bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
217 AssumptionCache &AC, TargetLibraryInfo *TLI,
218 DominatorTree *DT,
219 const LoopAccessInfo *LAI) const {
220 return false;
221 }
222
223 void getUnrollingPreferences(Loop *, ScalarEvolution &,
224 TTI::UnrollingPreferences &) {}
225
226 bool isLegalAddImmediate(int64_t Imm) { return false; }
227
228 bool isLegalICmpImmediate(int64_t Imm) { return false; }
229
230 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
231 bool HasBaseReg, int64_t Scale,
232 unsigned AddrSpace, Instruction *I = nullptr) {
233 // Guess that only reg and reg+reg addressing is allowed. This heuristic is
234 // taken from the implementation of LSR.
235 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
236 }
237
238 bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) {
239 return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
240 C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
241 std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
242 C2.ScaleCost, C2.ImmCost, C2.SetupCost);
243 }
244
245 bool canMacroFuseCmp() { return false; }
246
247 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
248 DominatorTree *DT, AssumptionCache *AC,
249 TargetLibraryInfo *LibInfo) {
250 return false;
251 }
252
253 bool shouldFavorPostInc() const { return false; }
254
255 bool shouldFavorBackedgeIndex(const Loop *L) const { return false; }
256
257 bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) { return false; }
258
259 bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) { return false; }
260
261 bool isLegalNTStore(Type *DataType, Align Alignment) {
262 // By default, assume nontemporal memory stores are available for stores
263 // that are aligned and have a size that is a power of 2.
264 unsigned DataSize = DL.getTypeStoreSize(DataType);
265 return Alignment >= DataSize && isPowerOf2_32(DataSize);
266 }
267
268 bool isLegalNTLoad(Type *DataType, Align Alignment) {
269 // By default, assume nontemporal memory loads are available for loads that
270 // are aligned and have a size that is a power of 2.
271 unsigned DataSize = DL.getTypeStoreSize(DataType);
272 return Alignment >= DataSize && isPowerOf2_32(DataSize);
273 }
274
275 bool isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment) {
276 return false;
277 }
278
279 bool isLegalMaskedGather(Type *DataType, MaybeAlign Alignment) {
280 return false;
281 }
282
283 bool isLegalMaskedCompressStore(Type *DataType) { return false; }
284
285 bool isLegalMaskedExpandLoad(Type *DataType) { return false; }
286
287 bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; }
288
289 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; }
290
291 bool prefersVectorizedAddressing() { return true; }
292
293 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
294 bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
295 // Guess that all legal addressing mode are free.
296 if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
297 Scale, AddrSpace))
298 return 0;
299 return -1;
300 }
301
302 bool LSRWithInstrQueries() { return false; }
303
304 bool isTruncateFree(Type *Ty1, Type *Ty2) { return false; }
305
306 bool isProfitableToHoist(Instruction *I) { return true; }
307
308 bool useAA() { return false; }
309
310 bool isTypeLegal(Type *Ty) { return false; }
311
312 bool shouldBuildLookupTables() { return true; }
313 bool shouldBuildLookupTablesForConstant(Constant *C) { return true; }
314
315 bool useColdCCForColdCall(Function &F) { return false; }
316
317 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
318 return 0;
319 }
320
321 unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
322 unsigned VF) { return 0; }
323
324 bool supportsEfficientVectorElementLoadStore() { return false; }
325
326 bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; }
327
328 TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
329 bool IsZeroCmp) const {
330 return {};
331 }
332
333 bool enableInterleavedAccessVectorization() { return false; }
334
335 bool enableMaskedInterleavedAccessVectorization() { return false; }
336
337 bool isFPVectorizationPotentiallyUnsafe() { return false; }
338
339 bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
340 unsigned BitWidth,
341 unsigned AddressSpace,
342 unsigned Alignment,
343 bool *Fast) { return false; }
344
345 TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) {
346 return TTI::PSK_Software;
347 }
348
349 bool haveFastSqrt(Type *Ty) { return false; }
350
351 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { return true; }
352
353 unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; }
354
355 int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
356 Type *Ty) {
357 return 0;
358 }
359
360 unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; }
361
362 unsigned getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
363 Type *Ty) {
364 return TTI::TCC_Free;
365 }
366
367 unsigned getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
368 const APInt &Imm, Type *Ty) {
369 return TTI::TCC_Free;
370 }
371
372 unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; }
373
374 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const {
375 return Vector ? 1 : 0;
376 };
377
378 const char* getRegisterClassName(unsigned ClassID) const {
379 switch (ClassID) {
380 default:
381 return "Generic::Unknown Register Class";
382 case 0: return "Generic::ScalarRC";
383 case 1: return "Generic::VectorRC";
384 }
385 }
386
387 unsigned getRegisterBitWidth(bool Vector) const { return 32; }
388
389 unsigned getMinVectorRegisterBitWidth() { return 128; }
390
391 bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; }
392
393 unsigned getMinimumVF(unsigned ElemWidth) const { return 0; }
394
395 bool
396 shouldConsiderAddressTypePromotion(const Instruction &I,
397 bool &AllowPromotionWithoutCommonHeader) {
398 AllowPromotionWithoutCommonHeader = false;
399 return false;
400 }
401
402 unsigned getCacheLineSize() const { return 0; }
403
404 llvm::Optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level) const {
405 switch (Level) {
406 case TargetTransformInfo::CacheLevel::L1D:
407 LLVM_FALLTHROUGH[[gnu::fallthrough]];
408 case TargetTransformInfo::CacheLevel::L2D:
409 return llvm::Optional<unsigned>();
410 }
411 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel")::llvm::llvm_unreachable_internal("Unknown TargetTransformInfo::CacheLevel"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 411)
;
412 }
413
414 llvm::Optional<unsigned> getCacheAssociativity(
415 TargetTransformInfo::CacheLevel Level) const {
416 switch (Level) {
417 case TargetTransformInfo::CacheLevel::L1D:
418 LLVM_FALLTHROUGH[[gnu::fallthrough]];
419 case TargetTransformInfo::CacheLevel::L2D:
420 return llvm::Optional<unsigned>();
421 }
422
423 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel")::llvm::llvm_unreachable_internal("Unknown TargetTransformInfo::CacheLevel"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 423)
;
424 }
425
426 unsigned getPrefetchDistance() const { return 0; }
427 unsigned getMinPrefetchStride() const { return 1; }
428 unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX(2147483647 *2U +1U); }
429
430 unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
431
432 unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
433 TTI::OperandValueKind Opd1Info,
434 TTI::OperandValueKind Opd2Info,
435 TTI::OperandValueProperties Opd1PropInfo,
436 TTI::OperandValueProperties Opd2PropInfo,
437 ArrayRef<const Value *> Args,
438 const Instruction *CxtI = nullptr) {
439 return 1;
440 }
441
442 unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Ty, int Index,
443 Type *SubTp) {
444 return 1;
445 }
446
447 unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
448 const Instruction *I) { return 1; }
449
450 unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
451 VectorType *VecTy, unsigned Index) {
452 return 1;
453 }
454
455 unsigned getCFInstrCost(unsigned Opcode) { return 1; }
456
457 unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
458 const Instruction *I) {
459 return 1;
460 }
461
462 unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
463 return 1;
464 }
465
466 unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
467 unsigned AddressSpace, const Instruction *I) {
468 return 1;
469 }
470
471 unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
472 unsigned AddressSpace) {
473 return 1;
474 }
475
476 unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
477 bool VariableMask,
478 unsigned Alignment) {
479 return 1;
480 }
481
482 unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
483 unsigned Factor,
484 ArrayRef<unsigned> Indices,
485 unsigned Alignment, unsigned AddressSpace,
486 bool UseMaskForCond = false,
487 bool UseMaskForGaps = false) {
488 return 1;
489 }
490
491 unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
492 ArrayRef<Type *> Tys, FastMathFlags FMF,
493 unsigned ScalarizationCostPassed) {
494 return 1;
495 }
496 unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
497 ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) {
498 return 1;
499 }
500
501 unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) {
502 return 1;
503 }
504
505 unsigned getNumberOfParts(Type *Tp) { return 0; }
506
507 unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *,
508 const SCEV *) {
509 return 0;
510 }
511
512 unsigned getArithmeticReductionCost(unsigned, Type *, bool) { return 1; }
513
514 unsigned getMinMaxReductionCost(Type *, Type *, bool, bool) { return 1; }
515
516 unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { return 0; }
517
518 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) {
519 return false;
520 }
521
522 unsigned getAtomicMemIntrinsicMaxElementSize() const {
523 // Note for overrides: You must ensure for all element unordered-atomic
524 // memory intrinsics that all power-of-2 element sizes up to, and
525 // including, the return value of this method have a corresponding
526 // runtime lib call. These runtime lib call definitions can be found
527 // in RuntimeLibcalls.h
528 return 0;
529 }
530
531 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
532 Type *ExpectedType) {
533 return nullptr;
534 }
535
536 Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
537 unsigned SrcAlign, unsigned DestAlign) const {
538 return Type::getInt8Ty(Context);
539 }
540
541 void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
542 LLVMContext &Context,
543 unsigned RemainingBytes,
544 unsigned SrcAlign,
545 unsigned DestAlign) const {
546 for (unsigned i = 0; i != RemainingBytes; ++i)
547 OpsOut.push_back(Type::getInt8Ty(Context));
548 }
549
550 bool areInlineCompatible(const Function *Caller,
551 const Function *Callee) const {
552 return (Caller->getFnAttribute("target-cpu") ==
553 Callee->getFnAttribute("target-cpu")) &&
554 (Caller->getFnAttribute("target-features") ==
555 Callee->getFnAttribute("target-features"));
556 }
557
558 bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,
559 SmallPtrSetImpl<Argument *> &Args) const {
560 return (Caller->getFnAttribute("target-cpu") ==
561 Callee->getFnAttribute("target-cpu")) &&
562 (Caller->getFnAttribute("target-features") ==
563 Callee->getFnAttribute("target-features"));
564 }
565
566 bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty,
567 const DataLayout &DL) const {
568 return false;
569 }
570
571 bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty,
572 const DataLayout &DL) const {
573 return false;
574 }
575
576 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; }
577
578 bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
579
580 bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
581
582 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
583 unsigned Alignment,
584 unsigned AddrSpace) const {
585 return true;
586 }
587
588 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
589 unsigned Alignment,
590 unsigned AddrSpace) const {
591 return true;
592 }
593
594 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
595 unsigned ChainSizeInBytes,
596 VectorType *VecTy) const {
597 return VF;
598 }
599
600 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
601 unsigned ChainSizeInBytes,
602 VectorType *VecTy) const {
603 return VF;
604 }
605
606 bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
607 TTI::ReductionFlags Flags) const {
608 return false;
609 }
610
611 bool shouldExpandReduction(const IntrinsicInst *II) const {
612 return true;
613 }
614
615 unsigned getGISelRematGlobalCost() const {
616 return 1;
617 }
618
619protected:
620 // Obtain the minimum required size to hold the value (without the sign)
621 // In case of a vector it returns the min required size for one element.
622 unsigned minRequiredElementSize(const Value* Val, bool &isSigned) {
623 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
624 const auto* VectorValue = cast<Constant>(Val);
625
626 // In case of a vector need to pick the max between the min
627 // required size for each element
628 auto *VT = cast<VectorType>(Val->getType());
629
630 // Assume unsigned elements
631 isSigned = false;
632
633 // The max required size is the total vector width divided by num
634 // of elements in the vector
635 unsigned MaxRequiredSize = VT->getBitWidth() / VT->getNumElements();
636
637 unsigned MinRequiredSize = 0;
638 for(unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
639 if (auto* IntElement =
640 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
641 bool signedElement = IntElement->getValue().isNegative();
642 // Get the element min required size.
643 unsigned ElementMinRequiredSize =
644 IntElement->getValue().getMinSignedBits() - 1;
645 // In case one element is signed then all the vector is signed.
646 isSigned |= signedElement;
647 // Save the max required bit size between all the elements.
648 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
649 }
650 else {
651 // not an int constant element
652 return MaxRequiredSize;
653 }
654 }
655 return MinRequiredSize;
656 }
657
658 if (const auto* CI = dyn_cast<ConstantInt>(Val)) {
659 isSigned = CI->getValue().isNegative();
660 return CI->getValue().getMinSignedBits() - 1;
661 }
662
663 if (const auto* Cast = dyn_cast<SExtInst>(Val)) {
664 isSigned = true;
665 return Cast->getSrcTy()->getScalarSizeInBits() - 1;
666 }
667
668 if (const auto* Cast = dyn_cast<ZExtInst>(Val)) {
669 isSigned = false;
670 return Cast->getSrcTy()->getScalarSizeInBits();
671 }
672
673 isSigned = false;
674 return Val->getType()->getScalarSizeInBits();
675 }
676
677 bool isStridedAccess(const SCEV *Ptr) {
678 return Ptr && isa<SCEVAddRecExpr>(Ptr);
679 }
680
681 const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE,
682 const SCEV *Ptr) {
683 if (!isStridedAccess(Ptr))
684 return nullptr;
685 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
686 return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
687 }
688
689 bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr,
690 int64_t MergeDistance) {
691 const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
692 if (!Step)
693 return false;
694 APInt StrideVal = Step->getAPInt();
695 if (StrideVal.getBitWidth() > 64)
696 return false;
697 // FIXME: Need to take absolute value for negative stride case.
698 return StrideVal.getSExtValue() < MergeDistance;
699 }
700};
701
702/// CRTP base class for use as a mix-in that aids implementing
703/// a TargetTransformInfo-compatible class.
704template <typename T>
705class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
706private:
707 typedef TargetTransformInfoImplBase BaseT;
708
709protected:
710 explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {}
711
712public:
713 using BaseT::getCallCost;
714
715 unsigned getCallCost(const Function *F, int NumArgs, const User *U) {
716 assert(F && "A concrete function must be provided to this routine.")((F && "A concrete function must be provided to this routine."
) ? static_cast<void> (0) : __assert_fail ("F && \"A concrete function must be provided to this routine.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 716, __PRETTY_FUNCTION__))
;
717
718 if (NumArgs < 0)
719 // Set the argument number to the number of explicit arguments in the
720 // function.
721 NumArgs = F->arg_size();
722
723 if (Intrinsic::ID IID = F->getIntrinsicID()) {
724 FunctionType *FTy = F->getFunctionType();
725 SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end());
726 return static_cast<T *>(this)
727 ->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys, U);
728 }
729
730 if (!static_cast<T *>(this)->isLoweredToCall(F))
731 return TTI::TCC_Basic; // Give a basic cost if it will be lowered
732 // directly.
733
734 return static_cast<T *>(this)->getCallCost(F->getFunctionType(), NumArgs, U);
735 }
736
737 unsigned getCallCost(const Function *F, ArrayRef<const Value *> Arguments,
738 const User *U) {
739 // Simply delegate to generic handling of the call.
740 // FIXME: We should use instsimplify or something else to catch calls which
741 // will constant fold with these arguments.
742 return static_cast<T *>(this)->getCallCost(F, Arguments.size(), U);
743 }
744
745 using BaseT::getGEPCost;
746
747 int getGEPCost(Type *PointeeType, const Value *Ptr,
748 ArrayRef<const Value *> Operands) {
749 assert(PointeeType && Ptr && "can't get GEPCost of nullptr")((PointeeType && Ptr && "can't get GEPCost of nullptr"
) ? static_cast<void> (0) : __assert_fail ("PointeeType && Ptr && \"can't get GEPCost of nullptr\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 749, __PRETTY_FUNCTION__))
;
750 // TODO: will remove this when pointers have an opaque type.
751 assert(Ptr->getType()->getScalarType()->getPointerElementType() ==((Ptr->getType()->getScalarType()->getPointerElementType
() == PointeeType && "explicit pointee type doesn't match operand's pointee type"
) ? static_cast<void> (0) : __assert_fail ("Ptr->getType()->getScalarType()->getPointerElementType() == PointeeType && \"explicit pointee type doesn't match operand's pointee type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 753, __PRETTY_FUNCTION__))
752 PointeeType &&((Ptr->getType()->getScalarType()->getPointerElementType
() == PointeeType && "explicit pointee type doesn't match operand's pointee type"
) ? static_cast<void> (0) : __assert_fail ("Ptr->getType()->getScalarType()->getPointerElementType() == PointeeType && \"explicit pointee type doesn't match operand's pointee type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 753, __PRETTY_FUNCTION__))
753 "explicit pointee type doesn't match operand's pointee type")((Ptr->getType()->getScalarType()->getPointerElementType
() == PointeeType && "explicit pointee type doesn't match operand's pointee type"
) ? static_cast<void> (0) : __assert_fail ("Ptr->getType()->getScalarType()->getPointerElementType() == PointeeType && \"explicit pointee type doesn't match operand's pointee type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 753, __PRETTY_FUNCTION__))
;
754 auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
755 bool HasBaseReg = (BaseGV == nullptr);
756
757 auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
758 APInt BaseOffset(PtrSizeBits, 0);
759 int64_t Scale = 0;
760
761 auto GTI = gep_type_begin(PointeeType, Operands);
762 Type *TargetType = nullptr;
763
764 // Handle the case where the GEP instruction has a single operand,
765 // the basis, therefore TargetType is a nullptr.
766 if (Operands.empty())
767 return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
768
769 for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
770 TargetType = GTI.getIndexedType();
771 // We assume that the cost of Scalar GEP with constant index and the
772 // cost of Vector GEP with splat constant index are the same.
773 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
774 if (!ConstIdx)
775 if (auto Splat = getSplatValue(*I))
776 ConstIdx = dyn_cast<ConstantInt>(Splat);
777 if (StructType *STy = GTI.getStructTypeOrNull()) {
778 // For structures the index is always splat or scalar constant
779 assert(ConstIdx && "Unexpected GEP index")((ConstIdx && "Unexpected GEP index") ? static_cast<
void> (0) : __assert_fail ("ConstIdx && \"Unexpected GEP index\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h"
, 779, __PRETTY_FUNCTION__))
;
780 uint64_t Field = ConstIdx->getZExtValue();
781 BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
782 } else {
783 int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType());
784 if (ConstIdx) {
785 BaseOffset +=
786 ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
787 } else {
788 // Needs scale register.
789 if (Scale != 0)
790 // No addressing mode takes two scale registers.
791 return TTI::TCC_Basic;
792 Scale = ElementSize;
793 }
794 }
795 }
796
797 if (static_cast<T *>(this)->isLegalAddressingMode(
798 TargetType, const_cast<GlobalValue *>(BaseGV),
799 BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale,
800 Ptr->getType()->getPointerAddressSpace()))
801 return TTI::TCC_Free;
802 return TTI::TCC_Basic;
803 }
804
805 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
806 ArrayRef<Type *> ParamTys, const User *U) {
807 switch (IID) {
808 default:
809 // Intrinsics rarely (if ever) have normal argument setup constraints.
810 // Model them as having a basic instruction cost.
811 return TTI::TCC_Basic;
812
813 // TODO: other libc intrinsics.
814 case Intrinsic::memcpy:
815 return static_cast<T *>(this)->getMemcpyCost(dyn_cast<Instruction>(U));
816
817 case Intrinsic::annotation:
818 case Intrinsic::assume:
819 case Intrinsic::sideeffect:
820 case Intrinsic::dbg_declare:
821 case Intrinsic::dbg_value:
822 case Intrinsic::dbg_label:
823 case Intrinsic::invariant_start:
824 case Intrinsic::invariant_end:
825 case Intrinsic::launder_invariant_group:
826 case Intrinsic::strip_invariant_group:
827 case Intrinsic::is_constant:
828 case Intrinsic::lifetime_start:
829 case Intrinsic::lifetime_end:
830 case Intrinsic::objectsize:
831 case Intrinsic::ptr_annotation:
832 case Intrinsic::var_annotation:
833 case Intrinsic::experimental_gc_result:
834 case Intrinsic::experimental_gc_relocate:
835 case Intrinsic::coro_alloc:
836 case Intrinsic::coro_begin:
837 case Intrinsic::coro_free:
838 case Intrinsic::coro_end:
839 case Intrinsic::coro_frame:
840 case Intrinsic::coro_size:
841 case Intrinsic::coro_suspend:
842 case Intrinsic::coro_param:
843 case Intrinsic::coro_subfn_addr:
844 // These intrinsics don't actually represent code after lowering.
845 return TTI::TCC_Free;
846 }
847 }
848
849 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
850 ArrayRef<const Value *> Arguments, const User *U) {
851 // Delegate to the generic intrinsic handling code. This mostly provides an
852 // opportunity for targets to (for example) special case the cost of
853 // certain intrinsics based on constants used as arguments.
854 SmallVector<Type *, 8> ParamTys;
855 ParamTys.reserve(Arguments.size());
856 for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
857 ParamTys.push_back(Arguments[Idx]->getType());
858 return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys, U);
859 }
860
861 unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands) {
862 if (isa<PHINode>(U))
4
Assuming 'U' is not a 'PHINode'
5
Taking false branch
863 return TTI::TCC_Free; // Model all PHI nodes as free.
864
865 if (isa<ExtractValueInst>(U))
6
Assuming 'U' is not a 'ExtractValueInst'
7
Taking false branch
866 return TTI::TCC_Free; // Model all ExtractValue nodes as free.
867
868 // Static alloca doesn't generate target instructions.
869 if (auto *A
8.1
'A' is null
8.1
'A' is null
8.1
'A' is null
8.1
'A' is null
8.1
'A' is null
8.1
'A' is null
8.1
'A' is null
= dyn_cast<AllocaInst>(U))
8
Assuming 'U' is not a 'AllocaInst'
9
Taking false branch
870 if (A->isStaticAlloca())
871 return TTI::TCC_Free;
872
873 if (const GEPOperator *GEP
10.1
'GEP' is null
10.1
'GEP' is null
10.1
'GEP' is null
10.1
'GEP' is null
10.1
'GEP' is null
10.1
'GEP' is null
10.1
'GEP' is null
= dyn_cast<GEPOperator>(U)) {
10
Assuming 'U' is not a 'GEPOperator'
11
Taking false branch
874 return static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(),
875 GEP->getPointerOperand(),
876 Operands.drop_front());
877 }
878
879 if (auto CS = ImmutableCallSite(U)) {
12
Calling 'CallSiteBase::operator bool'
26
Returning from 'CallSiteBase::operator bool'
27
Taking false branch
880 const Function *F = CS.getCalledFunction();
881 if (!F) {
882 // Just use the called value type.
883 Type *FTy = CS.getCalledValue()->getType()->getPointerElementType();
884 return static_cast<T *>(this)
885 ->getCallCost(cast<FunctionType>(FTy), CS.arg_size(), U);
886 }
887
888 SmallVector<const Value *, 8> Arguments(CS.arg_begin(), CS.arg_end());
889 return static_cast<T *>(this)->getCallCost(F, Arguments, U);
890 }
891
892 if (isa<SExtInst>(U) || isa<ZExtInst>(U) || isa<FPExtInst>(U))
28
Assuming 'U' is not a 'SExtInst'
29
Assuming 'U' is not a 'ZExtInst'
30
Assuming 'U' is not a 'FPExtInst'
31
Taking false branch
893 // The old behaviour of generally treating extensions of icmp to be free
894 // has been removed. A target that needs it should override getUserCost().
895 return static_cast<T *>(this)->getExtCost(cast<Instruction>(U),
896 Operands.back());
897
898 return static_cast<T *>(this)->getOperationCost(
42
Calling 'BasicTTIImplBase::getOperationCost'
899 Operator::getOpcode(U), U->getType(),
32
Calling 'Operator::getOpcode'
38
Returning from 'Operator::getOpcode'
900 U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr);
39
Assuming the condition is false
40
'?' condition is false
41
Passing null pointer value via 3rd parameter 'OpTy'
901 }
902
903 int getInstructionLatency(const Instruction *I) {
904 SmallVector<const Value *, 4> Operands(I->value_op_begin(),
905 I->value_op_end());
906 if (getUserCost(I, Operands) == TTI::TCC_Free)
907 return 0;
908
909 if (isa<LoadInst>(I))
910 return 4;
911
912 Type *DstTy = I->getType();
913
914 // Usually an intrinsic is a simple instruction.
915 // A real function call is much slower.
916 if (auto *CI = dyn_cast<CallInst>(I)) {
917 const Function *F = CI->getCalledFunction();
918 if (!F || static_cast<T *>(this)->isLoweredToCall(F))
919 return 40;
920 // Some intrinsics return a value and a flag, we use the value type
921 // to decide its latency.
922 if (StructType* StructTy = dyn_cast<StructType>(DstTy))
923 DstTy = StructTy->getElementType(0);
924 // Fall through to simple instructions.
925 }
926
927 if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy))
928 DstTy = VectorTy->getElementType();
929 if (DstTy->isFloatingPointTy())
930 return 3;
931
932 return 1;
933 }
934};
935}
936
937#endif

/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/CallSite.h

1//===- CallSite.h - Abstract Call & Invoke instrs ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the CallSite class, which is a handy wrapper for code that
10// wants to treat Call, Invoke and CallBr instructions in a generic way. When
11// in non-mutation context (e.g. an analysis) ImmutableCallSite should be used.
12// Finally, when some degree of customization is necessary between these two
13// extremes, CallSiteBase<> can be supplied with fine-tuned parameters.
14//
15// NOTE: These classes are supposed to have "value semantics". So they should be
16// passed by value, not by reference; they should not be "new"ed or "delete"d.
17// They are efficiently copyable, assignable and constructable, with cost
18// equivalent to copying a pointer (notice that they have only a single data
19// member). The internal representation carries a flag which indicates which of
20// the three variants is enclosed. This allows for cheaper checks when various
21// accessors of CallSite are employed.
22//
23//===----------------------------------------------------------------------===//
24
25#ifndef LLVM_IR_CALLSITE_H
26#define LLVM_IR_CALLSITE_H
27
28#include "llvm/ADT/Optional.h"
29#include "llvm/ADT/PointerIntPair.h"
30#include "llvm/ADT/iterator_range.h"
31#include "llvm/IR/Attributes.h"
32#include "llvm/IR/CallingConv.h"
33#include "llvm/IR/Function.h"
34#include "llvm/IR/InstrTypes.h"
35#include "llvm/IR/Instruction.h"
36#include "llvm/IR/Instructions.h"
37#include "llvm/IR/Use.h"
38#include "llvm/IR/User.h"
39#include "llvm/IR/Value.h"
40#include "llvm/Support/Casting.h"
41#include <cassert>
42#include <cstdint>
43#include <iterator>
44
45namespace llvm {
46
47namespace Intrinsic {
48typedef unsigned ID;
49}
50
51template <typename FunTy = const Function, typename BBTy = const BasicBlock,
52 typename ValTy = const Value, typename UserTy = const User,
53 typename UseTy = const Use, typename InstrTy = const Instruction,
54 typename CallTy = const CallInst,
55 typename InvokeTy = const InvokeInst,
56 typename CallBrTy = const CallBrInst,
57 typename IterTy = User::const_op_iterator>
58class CallSiteBase {
59protected:
60 PointerIntPair<InstrTy *, 2, int> I;
61
62 CallSiteBase() = default;
63 CallSiteBase(CallTy *CI) : I(CI, 1) { assert(CI)((CI) ? static_cast<void> (0) : __assert_fail ("CI", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/CallSite.h"
, 63, __PRETTY_FUNCTION__))
; }
64 CallSiteBase(InvokeTy *II) : I(II, 0) { assert(II)((II) ? static_cast<void> (0) : __assert_fail ("II", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/CallSite.h"
, 64, __PRETTY_FUNCTION__))
; }
65 CallSiteBase(CallBrTy *CBI) : I(CBI, 2) { assert(CBI)((CBI) ? static_cast<void> (0) : __assert_fail ("CBI", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/CallSite.h"
, 65, __PRETTY_FUNCTION__))
; }
66 explicit CallSiteBase(ValTy *II) { *this = get(II); }
67
68private:
69 /// This static method is like a constructor. It will create an appropriate
70 /// call site for a Call, Invoke or CallBr instruction, but it can also create
71 /// a null initialized CallSiteBase object for something which is NOT a call
72 /// site.
73 static CallSiteBase get(ValTy *V) {
74 if (InstrTy *II = dyn_cast<InstrTy>(V)) {
75 if (II->getOpcode() == Instruction::Call)
76 return CallSiteBase(static_cast<CallTy*>(II));
77 if (II->getOpcode() == Instruction::Invoke)
78 return CallSiteBase(static_cast<InvokeTy*>(II));
79 if (II->getOpcode() == Instruction::CallBr)
80 return CallSiteBase(static_cast<CallBrTy *>(II));
81 }
82 return CallSiteBase();
83 }
84
85public:
86 /// Return true if a CallInst is enclosed.
87 bool isCall() const { return I.getInt() == 1; }
88
89 /// Return true if a InvokeInst is enclosed. !I.getInt() may also signify a
90 /// NULL instruction pointer, so check that.
91 bool isInvoke() const { return getInstruction() && I.getInt() == 0; }
92
93 /// Return true if a CallBrInst is enclosed.
94 bool isCallBr() const { return I.getInt() == 2; }
95
96 InstrTy *getInstruction() const { return I.getPointer(); }
97 InstrTy *operator->() const { return I.getPointer(); }
98 explicit operator bool() const { return I.getPointer(); }
13
Calling 'PointerIntPair::getPointer'
24
Returning from 'PointerIntPair::getPointer'
25
Returning zero, which participates in a condition later
99
100 /// Get the basic block containing the call site.
101 BBTy* getParent() const { return getInstruction()->getParent(); }
102
103 /// Return the pointer to function that is being called.
104 ValTy *getCalledValue() const {
105 assert(getInstruction() && "Not a call, invoke or callbr instruction!")((getInstruction() && "Not a call, invoke or callbr instruction!"
) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call, invoke or callbr instruction!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/CallSite.h"
, 105, __PRETTY_FUNCTION__))
;
106 return *getCallee();
107 }
108
109 /// Return the function being called if this is a direct call, otherwise
110 /// return null (if it's an indirect call).
111 FunTy *getCalledFunction() const {
112 return dyn_cast<FunTy>(getCalledValue());
113 }
114
115 /// Return true if the callsite is an indirect call.
116 bool isIndirectCall() const {
117 const Value *V = getCalledValue();
118 if (!V)
119 return false;
120 if (isa<FunTy>(V) || isa<Constant>(V))
121 return false;
122 if (const CallBase *CB = dyn_cast<CallBase>(getInstruction()))
123 if (CB->isInlineAsm())
124 return false;
125 return true;
126 }
127
128 /// Set the callee to the specified value. Unlike the function of the same
129 /// name on CallBase, does not modify the type!
130 void setCalledFunction(Value *V) {
131 assert(getInstruction() && "Not a call, callbr, or invoke instruction!")((getInstruction() && "Not a call, callbr, or invoke instruction!"
) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call, callbr, or invoke instruction!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/CallSite.h"
, 131, __PRETTY_FUNCTION__))
;
132 assert(cast<PointerType>(V->getType())->getElementType() ==((cast<PointerType>(V->getType())->getElementType
() == cast<CallBase>(getInstruction())->getFunctionType
() && "New callee type does not match FunctionType on call"
) ? static_cast<void> (0) : __assert_fail ("cast<PointerType>(V->getType())->getElementType() == cast<CallBase>(getInstruction())->getFunctionType() && \"New callee type does not match FunctionType on call\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/CallSite.h"
, 134, __PRETTY_FUNCTION__))
133 cast<CallBase>(getInstruction())->getFunctionType() &&((cast<PointerType>(V->getType())->getElementType
() == cast<CallBase>(getInstruction())->getFunctionType
() && "New callee type does not match FunctionType on call"
) ? static_cast<void> (0) : __assert_fail ("cast<PointerType>(V->getType())->getElementType() == cast<CallBase>(getInstruction())->getFunctionType() && \"New callee type does not match FunctionType on call\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/CallSite.h"
, 134, __PRETTY_FUNCTION__))
134 "New callee type does not match FunctionType on call")((cast<PointerType>(V->getType())->getElementType
() == cast<CallBase>(getInstruction())->getFunctionType
() && "New callee type does not match FunctionType on call"
) ? static_cast<void> (0) : __assert_fail ("cast<PointerType>(V->getType())->getElementType() == cast<CallBase>(getInstruction())->getFunctionType() && \"New callee type does not match FunctionType on call\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/CallSite.h"
, 134, __PRETTY_FUNCTION__))
;
135 *getCallee() = V;
136 }
137
138 /// Return the intrinsic ID of the intrinsic called by this CallSite,
139 /// or Intrinsic::not_intrinsic if the called function is not an
140 /// intrinsic, or if this CallSite is an indirect call.
141 Intrinsic::ID getIntrinsicID() const {
142 if (auto *F = getCalledFunction())
143 return F->getIntrinsicID();
144 // Don't use Intrinsic::not_intrinsic, as it will require pulling
145 // Intrinsics.h into every header that uses CallSite.
146 return static_cast<Intrinsic::ID>(0);
147 }
148
149 /// Determine whether the passed iterator points to the callee operand's Use.
150 bool isCallee(Value::const_user_iterator UI) const {
151 return isCallee(&UI.getUse());
152 }
153
154 /// Determine whether this Use is the callee operand's Use.
155 bool isCallee(const Use *U) const { return getCallee() == U; }
156
157 /// Determine whether the passed iterator points to an argument operand.
158 bool isArgOperand(Value::const_user_iterator UI) const {
159 return isArgOperand(&UI.getUse());
160 }
161
162 /// Determine whether the passed use points to an argument operand.
163 bool isArgOperand(const Use *U) const {
164 assert(getInstruction() == U->getUser())((getInstruction() == U->getUser()) ? static_cast<void>
(0) : __assert_fail ("getInstruction() == U->getUser()", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/CallSite.h"
, 164, __PRETTY_FUNCTION__))
;
165 return arg_begin() <= U && U < arg_end();
166 }
167
168 /// Determine whether the passed iterator points to a bundle operand.
169 bool isBundleOperand(Value::const_user_iterator UI) const {
170 return isBundleOperand(&UI.getUse());
171 }
172
173 /// Determine whether the passed use points to a bundle operand.
174 bool isBundleOperand(const Use *U) const {
175 assert(getInstruction() == U->getUser())((getInstruction() == U->getUser()) ? static_cast<void>
(0) : __assert_fail ("getInstruction() == U->getUser()", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/CallSite.h"
, 175, __PRETTY_FUNCTION__))
;
176 if (!hasOperandBundles())
177 return false;
178 unsigned OperandNo = U - (*this)->op_begin();
179 return getBundleOperandsStartIndex() <= OperandNo &&
180 OperandNo < getBundleOperandsEndIndex();
181 }
182
183 /// Determine whether the passed iterator points to a data operand.
184 bool isDataOperand(Value::const_user_iterator UI) const {
185 return isDataOperand(&UI.getUse());
186 }
187
188 /// Determine whether the passed use points to a data operand.
189 bool isDataOperand(const Use *U) const {
190 return data_operands_begin() <= U && U < data_operands_end();
191 }
192
193 ValTy *getArgument(unsigned ArgNo) const {
194 assert(arg_begin() + ArgNo < arg_end() && "Argument # out of range!")((arg_begin() + ArgNo < arg_end() && "Argument # out of range!"
) ? static_cast<void> (0) : __assert_fail ("arg_begin() + ArgNo < arg_end() && \"Argument # out of range!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/CallSite.h"
, 194, __PRETTY_FUNCTION__))
;
195 return *(arg_begin() + ArgNo);
196 }
197
198 void setArgument(unsigned ArgNo, Value* newVal) {
199 assert(getInstruction() && "Not a call, invoke or callbr instruction!")((getInstruction() && "Not a call, invoke or callbr instruction!"
) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call, invoke or callbr instruction!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/CallSite.h"
, 199, __PRETTY_FUNCTION__))
;
200 assert(arg_begin() + ArgNo < arg_end() && "Argument # out of range!")((arg_begin() + ArgNo < arg_end() && "Argument # out of range!"
) ? static_cast<void> (0) : __assert_fail ("arg_begin() + ArgNo < arg_end() && \"Argument # out of range!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/CallSite.h"
, 200, __PRETTY_FUNCTION__))
;
201 getInstruction()->setOperand(ArgNo, newVal);
202 }
203
204 /// Given a value use iterator, returns the argument that corresponds to it.
205 /// Iterator must actually correspond to an argument.
206 unsigned getArgumentNo(Value::const_user_iterator I) const {
207 return getArgumentNo(&I.getUse());
208 }
209
210 /// Given a use for an argument, get the argument number that corresponds to
211 /// it.
212 unsigned getArgumentNo(const Use *U) const {
213 assert(getInstruction() && "Not a call, invoke or callbr instruction!")((getInstruction() && "Not a call, invoke or callbr instruction!"
) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call, invoke or callbr instruction!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/CallSite.h"
, 213, __PRETTY_FUNCTION__))
;
214 assert(isArgOperand(U) && "Argument # out of range!")((isArgOperand(U) && "Argument # out of range!") ? static_cast
<void> (0) : __assert_fail ("isArgOperand(U) && \"Argument # out of range!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/CallSite.h"
, 214, __PRETTY_FUNCTION__))
;
215 return U - arg_begin();
216 }
217
218 /// The type of iterator to use when looping over actual arguments at this
219 /// call site.
220 using arg_iterator = IterTy;
221
222 iterator_range<IterTy> args() const {
223 return make_range(arg_begin(), arg_end());
224 }
225 bool arg_empty() const { return arg_end() == arg_begin(); }
226 unsigned arg_size() const { return unsigned(arg_end() - arg_begin()); }
227
228 /// Given a value use iterator, return the data operand corresponding to it.
229 /// Iterator must actually correspond to a data operand.
230 unsigned getDataOperandNo(Value::const_user_iterator UI) const {
231 return getDataOperandNo(&UI.getUse());
232 }
233
234 /// Given a use for a data operand, get the data operand number that
235 /// corresponds to it.
236 unsigned getDataOperandNo(const Use *U) const {
237 assert(getInstruction() && "Not a call, invoke or callbr instruction!")((getInstruction() && "Not a call, invoke or callbr instruction!"
) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call, invoke or callbr instruction!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/CallSite.h"
, 237, __PRETTY_FUNCTION__))
;
238 assert(isDataOperand(U) && "Data operand # out of range!")((isDataOperand(U) && "Data operand # out of range!")
? static_cast<void> (0) : __assert_fail ("isDataOperand(U) && \"Data operand # out of range!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/CallSite.h"
, 238, __PRETTY_FUNCTION__))
;
239 return U - data_operands_begin();
240 }
241
242 /// Type of iterator to use when looping over data operands at this call site
243 /// (see below).
244 using data_operand_iterator = IterTy;
245
246 /// data_operands_begin/data_operands_end - Return iterators iterating over
247 /// the call / invoke / callbr argument list and bundle operands. For invokes,
248 /// this is the set of instruction operands except the invoke target and the
249 /// two successor blocks; for calls this is the set of instruction operands
250 /// except the call target; for callbrs the number of labels to skip must be
251 /// determined first.
252
253 IterTy data_operands_begin() const {
254 assert(getInstruction() && "Not a call or invoke instruction!")((getInstruction() && "Not a call or invoke instruction!"
) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call or invoke instruction!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/CallSite.h"
, 254, __PRETTY_FUNCTION__))
;
255 return cast<CallBase>(getInstruction())->data_operands_begin();
256 }
257 IterTy data_operands_end() const {
258 assert(getInstruction() && "Not a call or invoke instruction!")((getInstruction() && "Not a call or invoke instruction!"
) ? static_cast<void> (0) : __assert_fail ("getInstruction() && \"Not a call or invoke instruction!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/CallSite.h"
, 258, __PRETTY_FUNCTION__))
;
259 return cast<CallBase>(getInstruction())->data_operands_end();
260 }
261 iterator_range<IterTy> data_ops() const {
262 return make_range(data_operands_begin(), data_operands_end());
263 }
264 bool data_operands_empty() const {
265 return data_operands_end() == data_operands_begin();
266 }
267 unsigned data_operands_size() const {
268 return std::distance(data_operands_begin(), data_operands_end());
269 }
270
271 /// Return the type of the instruction that generated this call site.
272 Type *getType() const { return (*this)->getType(); }
273
274 /// Return the caller function for this call site.
275 FunTy *getCaller() const { return (*this)->getParent()->getParent(); }
276
277 /// Tests if this call site must be tail call optimized. Only a CallInst can
278 /// be tail call optimized.
279 bool isMustTailCall() const {
280 return isCall() && cast<CallInst>(getInstruction())->isMustTailCall();
281 }
282
283 /// Tests if this call site is marked as a tail call.
284 bool isTailCall() const {
285 return isCall() && cast<CallInst>(getInstruction())->isTailCall();
286 }
287
288#define CALLSITE_DELEGATE_GETTER(METHOD) \
289 InstrTy *II = getInstruction(); \
290 return isCall() ? cast<CallInst>(II)->METHOD \
291 : isCallBr() ? cast<CallBrInst>(II)->METHOD \
292 : cast<InvokeInst>(II)->METHOD
293
294#define CALLSITE_DELEGATE_SETTER(METHOD) \
295 InstrTy *II = getInstruction(); \
296 if (isCall()) \
297 cast<CallInst>(II)->METHOD; \
298 else if (isCallBr()) \
299 cast<CallBrInst>(II)->METHOD; \
300 else \
301 cast<InvokeInst>(II)->METHOD
302
303 unsigned getNumArgOperands() const {
304 CALLSITE_DELEGATE_GETTER(getNumArgOperands());
305 }
306
307 ValTy *getArgOperand(unsigned i) const {
308 CALLSITE_DELEGATE_GETTER(getArgOperand(i));
309 }
310
311 ValTy *getReturnedArgOperand() const {
312 CALLSITE_DELEGATE_GETTER(getReturnedArgOperand());
313 }
314
315 bool isInlineAsm() const {
316 return cast<CallBase>(getInstruction())->isInlineAsm();
317 }
318
319 /// Get the calling convention of the call.
320 CallingConv::ID getCallingConv() const {
321 CALLSITE_DELEGATE_GETTER(getCallingConv());
322 }
323 /// Set the calling convention of the call.
324 void setCallingConv(CallingConv::ID CC) {
325 CALLSITE_DELEGATE_SETTER(setCallingConv(CC));
326 }
327
328 FunctionType *getFunctionType() const {
329 CALLSITE_DELEGATE_GETTER(getFunctionType());
330 }
331
332 void mutateFunctionType(FunctionType *Ty) const {
333 CALLSITE_DELEGATE_SETTER(mutateFunctionType(Ty));
334 }
335
336 /// Get the parameter attributes of the call.
337 AttributeList getAttributes() const {
338 CALLSITE_DELEGATE_GETTER(getAttributes());
339 }
340 /// Set the parameter attributes of the call.
341 void setAttributes(AttributeList PAL) {
342 CALLSITE_DELEGATE_SETTER(setAttributes(PAL));
343 }
344
345 void addAttribute(unsigned i, Attribute::AttrKind Kind) {
346 CALLSITE_DELEGATE_SETTER(addAttribute(i, Kind));
347 }
348
349 void addAttribute(unsigned i, Attribute Attr) {
350 CALLSITE_DELEGATE_SETTER(addAttribute(i, Attr));
351 }
352
353 void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
354 CALLSITE_DELEGATE_SETTER(addParamAttr(ArgNo, Kind));
355 }
356
357 void removeAttribute(unsigned i, Attribute::AttrKind Kind) {
358 CALLSITE_DELEGATE_SETTER(removeAttribute(i, Kind));
359 }
360
361 void removeAttribute(unsigned i, StringRef Kind) {
362 CALLSITE_DELEGATE_SETTER(removeAttribute(i, Kind));
363 }
364
365 void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
366 CALLSITE_DELEGATE_SETTER(removeParamAttr(ArgNo, Kind));
367 }
368
369 /// Return true if this function has the given attribute.
370 bool hasFnAttr(Attribute::AttrKind Kind) const {
371 CALLSITE_DELEGATE_GETTER(hasFnAttr(Kind));
372 }
373
374 /// Return true if this function has the given attribute.
375 bool hasFnAttr(StringRef Kind) const {
376 CALLSITE_DELEGATE_GETTER(hasFnAttr(Kind));
377 }
378
379 /// Return true if this return value has the given attribute.
380 bool hasRetAttr(Attribute::AttrKind Kind) const {
381 CALLSITE_DELEGATE_GETTER(hasRetAttr(Kind));
382 }
383
384 /// Return true if the call or the callee has the given attribute.
385 bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const {
386 CALLSITE_DELEGATE_GETTER(paramHasAttr(ArgNo, Kind));
387 }
388
389 Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const {
390 CALLSITE_DELEGATE_GETTER(getAttribute(i, Kind));
391 }
392
393 Attribute getAttribute(unsigned i, StringRef Kind) const {
394 CALLSITE_DELEGATE_GETTER(getAttribute(i, Kind));
395 }
396
397 /// Return true if the data operand at index \p i directly or indirectly has
398 /// the attribute \p A.
399 ///
400 /// Normal call, invoke or callbr arguments have per operand attributes, as
401 /// specified in the attribute set attached to this instruction, while operand
402 /// bundle operands may have some attributes implied by the type of its
403 /// containing operand bundle.
404 bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind Kind) const {
405 CALLSITE_DELEGATE_GETTER(dataOperandHasImpliedAttr(i, Kind));
406 }
407
408 /// Extract the alignment of the return value.
409 unsigned getRetAlignment() const {
410 CALLSITE_DELEGATE_GETTER(getRetAlignment());
411 }
412
413 /// Extract the alignment for a call or parameter (0=unknown).
414 unsigned getParamAlignment(unsigned ArgNo) const {
415 CALLSITE_DELEGATE_GETTER(getParamAlignment(ArgNo));
416 }
417
418 /// Extract the byval type for a call or parameter (nullptr=unknown).
419 Type *getParamByValType(unsigned ArgNo) const {
420 CALLSITE_DELEGATE_GETTER(getParamByValType(ArgNo));
421 }
422
423 /// Extract the number of dereferenceable bytes for a call or parameter
424 /// (0=unknown).
425 uint64_t getDereferenceableBytes(unsigned i) const {
426 CALLSITE_DELEGATE_GETTER(getDereferenceableBytes(i));
427 }
428
429 /// Extract the number of dereferenceable_or_null bytes for a call or
430 /// parameter (0=unknown).
431 uint64_t getDereferenceableOrNullBytes(unsigned i) const {
432 CALLSITE_DELEGATE_GETTER(getDereferenceableOrNullBytes(i));
433 }
434
435 /// Determine if the return value is marked with NoAlias attribute.
436 bool returnDoesNotAlias() const {
437 CALLSITE_DELEGATE_GETTER(returnDoesNotAlias());
438 }
439
440 /// Return true if the call should not be treated as a call to a builtin.
441 bool isNoBuiltin() const {
442 CALLSITE_DELEGATE_GETTER(isNoBuiltin());
443 }
444
445 /// Return true if the call requires strict floating point semantics.
446 bool isStrictFP() const {
447 CALLSITE_DELEGATE_GETTER(isStrictFP());
448 }
449
450 /// Return true if the call should not be inlined.
451 bool isNoInline() const {
452 CALLSITE_DELEGATE_GETTER(isNoInline());
453 }
454 void setIsNoInline(bool Value = true) {
455 CALLSITE_DELEGATE_SETTER(setIsNoInline(Value));
456 }
457
458 /// Determine if the call does not access memory.
459 bool doesNotAccessMemory() const {
460 CALLSITE_DELEGATE_GETTER(doesNotAccessMemory());
461 }
462 void setDoesNotAccessMemory() {
463 CALLSITE_DELEGATE_SETTER(setDoesNotAccessMemory());
464 }
465
466 /// Determine if the call does not access or only reads memory.
467 bool onlyReadsMemory() const {
468 CALLSITE_DELEGATE_GETTER(onlyReadsMemory());
469 }
470 void setOnlyReadsMemory() {
471 CALLSITE_DELEGATE_SETTER(setOnlyReadsMemory());
472 }
473
474 /// Determine if the call does not access or only writes memory.
475 bool doesNotReadMemory() const {
476 CALLSITE_DELEGATE_GETTER(doesNotReadMemory());
477 }
478 void setDoesNotReadMemory() {
479 CALLSITE_DELEGATE_SETTER(setDoesNotReadMemory());
480 }
481
482 /// Determine if the call can access memmory only using pointers based
483 /// on its arguments.
484 bool onlyAccessesArgMemory() const {
485 CALLSITE_DELEGATE_GETTER(onlyAccessesArgMemory());
486 }
487 void setOnlyAccessesArgMemory() {
488 CALLSITE_DELEGATE_SETTER(setOnlyAccessesArgMemory());
489 }
490
491 /// Determine if the function may only access memory that is
492 /// inaccessible from the IR.
493 bool onlyAccessesInaccessibleMemory() const {
494 CALLSITE_DELEGATE_GETTER(onlyAccessesInaccessibleMemory());
495 }
496 void setOnlyAccessesInaccessibleMemory() {
497 CALLSITE_DELEGATE_SETTER(setOnlyAccessesInaccessibleMemory());
498 }
499
500 /// Determine if the function may only access memory that is
501 /// either inaccessible from the IR or pointed to by its arguments.
502 bool onlyAccessesInaccessibleMemOrArgMem() const {
503 CALLSITE_DELEGATE_GETTER(onlyAccessesInaccessibleMemOrArgMem());
504 }
505 void setOnlyAccessesInaccessibleMemOrArgMem() {
506 CALLSITE_DELEGATE_SETTER(setOnlyAccessesInaccessibleMemOrArgMem());
507 }
508
509 /// Determine if the call cannot return.
510 bool doesNotReturn() const {
511 CALLSITE_DELEGATE_GETTER(doesNotReturn());
512 }
513 void setDoesNotReturn() {
514 CALLSITE_DELEGATE_SETTER(setDoesNotReturn());
515 }
516
517 /// Determine if the call cannot unwind.
518 bool doesNotThrow() const {
519 CALLSITE_DELEGATE_GETTER(doesNotThrow());
520 }
521 void setDoesNotThrow() {
522 CALLSITE_DELEGATE_SETTER(setDoesNotThrow());
523 }
524
525 /// Determine if the call can be duplicated.
526 bool cannotDuplicate() const {
527 CALLSITE_DELEGATE_GETTER(cannotDuplicate());
528 }
529 void setCannotDuplicate() {
530 CALLSITE_DELEGATE_SETTER(setCannotDuplicate());
531 }
532
533 /// Determine if the call is convergent.
534 bool isConvergent() const {
535 CALLSITE_DELEGATE_GETTER(isConvergent());
536 }
537 void setConvergent() {
538 CALLSITE_DELEGATE_SETTER(setConvergent());
539 }
540 void setNotConvergent() {
541 CALLSITE_DELEGATE_SETTER(setNotConvergent());
542 }
543
544 unsigned getNumOperandBundles() const {
545 CALLSITE_DELEGATE_GETTER(getNumOperandBundles());
546 }
547
548 bool hasOperandBundles() const {
549 CALLSITE_DELEGATE_GETTER(hasOperandBundles());
550 }
551
552 unsigned getBundleOperandsStartIndex() const {
553 CALLSITE_DELEGATE_GETTER(getBundleOperandsStartIndex());
554 }
555
556 unsigned getBundleOperandsEndIndex() const {
557 CALLSITE_DELEGATE_GETTER(getBundleOperandsEndIndex());
558 }
559
560 unsigned getNumTotalBundleOperands() const {
561 CALLSITE_DELEGATE_GETTER(getNumTotalBundleOperands());
562 }
563
564 OperandBundleUse getOperandBundleAt(unsigned Index) const {
565 CALLSITE_DELEGATE_GETTER(getOperandBundleAt(Index));
566 }
567
568 Optional<OperandBundleUse> getOperandBundle(StringRef Name) const {
569 CALLSITE_DELEGATE_GETTER(getOperandBundle(Name));
570 }
571
572 Optional<OperandBundleUse> getOperandBundle(uint32_t ID) const {
573 CALLSITE_DELEGATE_GETTER(getOperandBundle(ID));
574 }
575
576 unsigned countOperandBundlesOfType(uint32_t ID) const {
577 CALLSITE_DELEGATE_GETTER(countOperandBundlesOfType(ID));
578 }
579
580 bool isBundleOperand(unsigned Idx) const {
581 CALLSITE_DELEGATE_GETTER(isBundleOperand(Idx));
582 }
583
584 IterTy arg_begin() const {
585 CALLSITE_DELEGATE_GETTER(arg_begin());
586 }
587
588 IterTy arg_end() const {
589 CALLSITE_DELEGATE_GETTER(arg_end());
590 }
591
592#undef CALLSITE_DELEGATE_GETTER
593#undef CALLSITE_DELEGATE_SETTER
594
595 void getOperandBundlesAsDefs(SmallVectorImpl<OperandBundleDef> &Defs) const {
596 // Since this is actually a getter that "looks like" a setter, don't use the
597 // above macros to avoid confusion.
598 cast<CallBase>(getInstruction())->getOperandBundlesAsDefs(Defs);
599 }
600
601 /// Determine whether this data operand is not captured.
602 bool doesNotCapture(unsigned OpNo) const {
603 return dataOperandHasImpliedAttr(OpNo + 1, Attribute::NoCapture);
604 }
605
606 /// Determine whether this argument is passed by value.
607 bool isByValArgument(unsigned ArgNo) const {
608 return paramHasAttr(ArgNo, Attribute::ByVal);
609 }
610
611 /// Determine whether this argument is passed in an alloca.
612 bool isInAllocaArgument(unsigned ArgNo) const {
613 return paramHasAttr(ArgNo, Attribute::InAlloca);
614 }
615
616 /// Determine whether this argument is passed by value or in an alloca.
617 bool isByValOrInAllocaArgument(unsigned ArgNo) const {
618 return paramHasAttr(ArgNo, Attribute::ByVal) ||
619 paramHasAttr(ArgNo, Attribute::InAlloca);
620 }
621
622 /// Determine if there are is an inalloca argument. Only the last argument can
623 /// have the inalloca attribute.
624 bool hasInAllocaArgument() const {
625 return !arg_empty() && paramHasAttr(arg_size() - 1, Attribute::InAlloca);
626 }
627
628 bool doesNotAccessMemory(unsigned OpNo) const {
629 return dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone);
630 }
631
632 bool onlyReadsMemory(unsigned OpNo) const {
633 return dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadOnly) ||
634 dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone);
635 }
636
637 bool doesNotReadMemory(unsigned OpNo) const {
638 return dataOperandHasImpliedAttr(OpNo + 1, Attribute::WriteOnly) ||
639 dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone);
640 }
641
642 /// Return true if the return value is known to be not null.
643 /// This may be because it has the nonnull attribute, or because at least
644 /// one byte is dereferenceable and the pointer is in addrspace(0).
645 bool isReturnNonNull() const {
646 if (hasRetAttr(Attribute::NonNull))
647 return true;
648 else if (getDereferenceableBytes(AttributeList::ReturnIndex) > 0 &&
649 !NullPointerIsDefined(getCaller(),
650 getType()->getPointerAddressSpace()))
651 return true;
652
653 return false;
654 }
655
656 /// Returns true if this CallSite passes the given Value* as an argument to
657 /// the called function.
658 bool hasArgument(const Value *Arg) const {
659 for (arg_iterator AI = this->arg_begin(), E = this->arg_end(); AI != E;
660 ++AI)
661 if (AI->get() == Arg)
662 return true;
663 return false;
664 }
665
666private:
667 IterTy getCallee() const {
668 return cast<CallBase>(getInstruction())->op_end() - 1;
669 }
670};
671
672class CallSite : public CallSiteBase<Function, BasicBlock, Value, User, Use,
673 Instruction, CallInst, InvokeInst,
674 CallBrInst, User::op_iterator> {
675public:
676 CallSite() = default;
677 CallSite(CallSiteBase B) : CallSiteBase(B) {}
678 CallSite(CallInst *CI) : CallSiteBase(CI) {}
679 CallSite(InvokeInst *II) : CallSiteBase(II) {}
680 CallSite(CallBrInst *CBI) : CallSiteBase(CBI) {}
681 explicit CallSite(Instruction *II) : CallSiteBase(II) {}
682 explicit CallSite(Value *V) : CallSiteBase(V) {}
683
684 bool operator==(const CallSite &CS) const { return I == CS.I; }
685 bool operator!=(const CallSite &CS) const { return I != CS.I; }
686 bool operator<(const CallSite &CS) const {
687 return getInstruction() < CS.getInstruction();
688 }
689
690private:
691 friend struct DenseMapInfo<CallSite>;
692
693 User::op_iterator getCallee() const;
694};
695
696/// Establish a view to a call site for examination.
697class ImmutableCallSite : public CallSiteBase<> {
698public:
699 ImmutableCallSite() = default;
700 ImmutableCallSite(const CallInst *CI) : CallSiteBase(CI) {}
701 ImmutableCallSite(const InvokeInst *II) : CallSiteBase(II) {}
702 ImmutableCallSite(const CallBrInst *CBI) : CallSiteBase(CBI) {}
703 explicit ImmutableCallSite(const Instruction *II) : CallSiteBase(II) {}
704 explicit ImmutableCallSite(const Value *V) : CallSiteBase(V) {}
705 ImmutableCallSite(CallSite CS) : CallSiteBase(CS.getInstruction()) {}
706};
707
708/// AbstractCallSite
709///
710/// An abstract call site is a wrapper that allows to treat direct,
711/// indirect, and callback calls the same. If an abstract call site
712/// represents a direct or indirect call site it behaves like a stripped
713/// down version of a normal call site object. The abstract call site can
714/// also represent a callback call, thus the fact that the initially
715/// called function (=broker) may invoke a third one (=callback callee).
716/// In this case, the abstract call site hides the middle man, hence the
717/// broker function. The result is a representation of the callback call,
718/// inside the broker, but in the context of the original call to the broker.
719///
720/// There are up to three functions involved when we talk about callback call
721/// sites. The caller (1), which invokes the broker function. The broker
722/// function (2), that will invoke the callee zero or more times. And finally
723/// the callee (3), which is the target of the callback call.
724///
725/// The abstract call site will handle the mapping from parameters to arguments
726/// depending on the semantic of the broker function. However, it is important
727/// to note that the mapping is often partial. Thus, some arguments of the
728/// call/invoke instruction are mapped to parameters of the callee while others
729/// are not.
730class AbstractCallSite {
731public:
732
733 /// The encoding of a callback with regards to the underlying instruction.
734 struct CallbackInfo {
735
736 /// For direct/indirect calls the parameter encoding is empty. If it is not,
737 /// the abstract call site represents a callback. In that case, the first
738 /// element of the encoding vector represents which argument of the call
739 /// site CS is the callback callee. The remaining elements map parameters
740 /// (identified by their position) to the arguments that will be passed
741 /// through (also identified by position but in the call site instruction).
742 ///
743 /// NOTE that we use LLVM argument numbers (starting at 0) and not
744 /// clang/source argument numbers (starting at 1). The -1 entries represent
745 /// unknown values that are passed to the callee.
746 using ParameterEncodingTy = SmallVector<int, 0>;
747 ParameterEncodingTy ParameterEncoding;
748
749 };
750
751private:
752
753 /// The underlying call site:
754 /// caller -> callee, if this is a direct or indirect call site
755 /// caller -> broker function, if this is a callback call site
756 CallSite CS;
757
758 /// The encoding of a callback with regards to the underlying instruction.
759 CallbackInfo CI;
760
761public:
762 /// Sole constructor for abstract call sites (ACS).
763 ///
764 /// An abstract call site can only be constructed through a llvm::Use because
765 /// each operand (=use) of an instruction could potentially be a different
766 /// abstract call site. Furthermore, even if the value of the llvm::Use is the
767 /// same, and the user is as well, the abstract call sites might not be.
768 ///
769 /// If a use is not associated with an abstract call site the constructed ACS
770 /// will evaluate to false if converted to a boolean.
771 ///
772 /// If the use is the callee use of a call or invoke instruction, the
773 /// constructed abstract call site will behave as a llvm::CallSite would.
774 ///
775 /// If the use is not a callee use of a call or invoke instruction, the
776 /// callback metadata is used to determine the argument <-> parameter mapping
777 /// as well as the callee of the abstract call site.
778 AbstractCallSite(const Use *U);
779
780 /// Add operand uses of \p ICS that represent callback uses into \p CBUses.
781 ///
782 /// All uses added to \p CBUses can be used to create abstract call sites for
783 /// which AbstractCallSite::isCallbackCall() will return true.
784 static void getCallbackUses(ImmutableCallSite ICS,
785 SmallVectorImpl<const Use *> &CBUses);
786
787 /// Conversion operator to conveniently check for a valid/initialized ACS.
788 explicit operator bool() const { return (bool)CS; }
789
790 /// Return the underlying instruction.
791 Instruction *getInstruction() const { return CS.getInstruction(); }
792
793 /// Return the call site abstraction for the underlying instruction.
794 CallSite getCallSite() const { return CS; }
795
796 /// Return true if this ACS represents a direct call.
797 bool isDirectCall() const {
798 return !isCallbackCall() && !CS.isIndirectCall();
799 }
800
801 /// Return true if this ACS represents an indirect call.
802 bool isIndirectCall() const {
803 return !isCallbackCall() && CS.isIndirectCall();
804 }
805
806 /// Return true if this ACS represents a callback call.
807 bool isCallbackCall() const {
808 // For a callback call site the callee is ALWAYS stored first in the
809 // transitive values vector. Thus, a non-empty vector indicates a callback.
810 return !CI.ParameterEncoding.empty();
811 }
812
813 /// Return true if @p UI is the use that defines the callee of this ACS.
814 bool isCallee(Value::const_user_iterator UI) const {
815 return isCallee(&UI.getUse());
816 }
817
818 /// Return true if @p U is the use that defines the callee of this ACS.
819 bool isCallee(const Use *U) const {
820 if (isDirectCall())
821 return CS.isCallee(U);
822
823 assert(!CI.ParameterEncoding.empty() &&((!CI.ParameterEncoding.empty() && "Callback without parameter encoding!"
) ? static_cast<void> (0) : __assert_fail ("!CI.ParameterEncoding.empty() && \"Callback without parameter encoding!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/CallSite.h"
, 824, __PRETTY_FUNCTION__))
824 "Callback without parameter encoding!")((!CI.ParameterEncoding.empty() && "Callback without parameter encoding!"
) ? static_cast<void> (0) : __assert_fail ("!CI.ParameterEncoding.empty() && \"Callback without parameter encoding!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/CallSite.h"
, 824, __PRETTY_FUNCTION__))
;
825
826 return (int)CS.getArgumentNo(U) == CI.ParameterEncoding[0];
827 }
828
829 /// Return the number of parameters of the callee.
830 unsigned getNumArgOperands() const {
831 if (isDirectCall())
832 return CS.getNumArgOperands();
833 // Subtract 1 for the callee encoding.
834 return CI.ParameterEncoding.size() - 1;
835 }
836
837 /// Return the operand index of the underlying instruction associated with @p
838 /// Arg.
839 int getCallArgOperandNo(Argument &Arg) const {
840 return getCallArgOperandNo(Arg.getArgNo());
841 }
842
843 /// Return the operand index of the underlying instruction associated with
844 /// the function parameter number @p ArgNo or -1 if there is none.
845 int getCallArgOperandNo(unsigned ArgNo) const {
846 if (isDirectCall())
847 return ArgNo;
848 // Add 1 for the callee encoding.
849 return CI.ParameterEncoding[ArgNo + 1];
850 }
851
852 /// Return the operand of the underlying instruction associated with @p Arg.
853 Value *getCallArgOperand(Argument &Arg) const {
854 return getCallArgOperand(Arg.getArgNo());
855 }
856
857 /// Return the operand of the underlying instruction associated with the
858 /// function parameter number @p ArgNo or nullptr if there is none.
859 Value *getCallArgOperand(unsigned ArgNo) const {
860 if (isDirectCall())
861 return CS.getArgOperand(ArgNo);
862 // Add 1 for the callee encoding.
863 return CI.ParameterEncoding[ArgNo + 1] >= 0
864 ? CS.getArgOperand(CI.ParameterEncoding[ArgNo + 1])
865 : nullptr;
866 }
867
868 /// Return the operand index of the underlying instruction associated with the
869 /// callee of this ACS. Only valid for callback calls!
870 int getCallArgOperandNoForCallee() const {
871 assert(isCallbackCall())((isCallbackCall()) ? static_cast<void> (0) : __assert_fail
("isCallbackCall()", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/CallSite.h"
, 871, __PRETTY_FUNCTION__))
;
872 assert(CI.ParameterEncoding.size() && CI.ParameterEncoding[0] >= 0)((CI.ParameterEncoding.size() && CI.ParameterEncoding
[0] >= 0) ? static_cast<void> (0) : __assert_fail ("CI.ParameterEncoding.size() && CI.ParameterEncoding[0] >= 0"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/CallSite.h"
, 872, __PRETTY_FUNCTION__))
;
873 return CI.ParameterEncoding[0];
874 }
875
876 /// Return the use of the callee value in the underlying instruction. Only
877 /// valid for callback calls!
878 const Use &getCalleeUseForCallback() const {
879 int CalleeArgIdx = getCallArgOperandNoForCallee();
880 assert(CalleeArgIdx >= 0 &&((CalleeArgIdx >= 0 && unsigned(CalleeArgIdx) <
getInstruction()->getNumOperands()) ? static_cast<void
> (0) : __assert_fail ("CalleeArgIdx >= 0 && unsigned(CalleeArgIdx) < getInstruction()->getNumOperands()"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/CallSite.h"
, 881, __PRETTY_FUNCTION__))
881 unsigned(CalleeArgIdx) < getInstruction()->getNumOperands())((CalleeArgIdx >= 0 && unsigned(CalleeArgIdx) <
getInstruction()->getNumOperands()) ? static_cast<void
> (0) : __assert_fail ("CalleeArgIdx >= 0 && unsigned(CalleeArgIdx) < getInstruction()->getNumOperands()"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/CallSite.h"
, 881, __PRETTY_FUNCTION__))
;
882 return getInstruction()->getOperandUse(CalleeArgIdx);
883 }
884
885 /// Return the pointer to function that is being called.
886 Value *getCalledValue() const {
887 if (isDirectCall())
888 return CS.getCalledValue();
889 return CS.getArgOperand(getCallArgOperandNoForCallee());
890 }
891
892 /// Return the function being called if this is a direct call, otherwise
893 /// return null (if it's an indirect call).
894 Function *getCalledFunction() const {
895 Value *V = getCalledValue();
896 return V ? dyn_cast<Function>(V->stripPointerCasts()) : nullptr;
897 }
898};
899
900template <> struct DenseMapInfo<CallSite> {
901 using BaseInfo = DenseMapInfo<decltype(CallSite::I)>;
902
903 static CallSite getEmptyKey() {
904 CallSite CS;
905 CS.I = BaseInfo::getEmptyKey();
906 return CS;
907 }
908
909 static CallSite getTombstoneKey() {
910 CallSite CS;
911 CS.I = BaseInfo::getTombstoneKey();
912 return CS;
913 }
914
915 static unsigned getHashValue(const CallSite &CS) {
916 return BaseInfo::getHashValue(CS.I);
917 }
918
919 static bool isEqual(const CallSite &LHS, const CallSite &RHS) {
920 return LHS == RHS;
921 }
922};
923
924} // end namespace llvm
925
926#endif // LLVM_IR_CALLSITE_H

/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/PointerIntPair.h

1//===- llvm/ADT/PointerIntPair.h - Pair for pointer and int -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the PointerIntPair class.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_ADT_POINTERINTPAIR_H
14#define LLVM_ADT_POINTERINTPAIR_H
15
16#include "llvm/Support/Compiler.h"
17#include "llvm/Support/PointerLikeTypeTraits.h"
18#include "llvm/Support/type_traits.h"
19#include <cassert>
20#include <cstdint>
21#include <limits>
22
23namespace llvm {
24
25template <typename T> struct DenseMapInfo;
26template <typename PointerT, unsigned IntBits, typename PtrTraits>
27struct PointerIntPairInfo;
28
29/// PointerIntPair - This class implements a pair of a pointer and small
30/// integer. It is designed to represent this in the space required by one
31/// pointer by bitmangling the integer into the low part of the pointer. This
32/// can only be done for small integers: typically up to 3 bits, but it depends
33/// on the number of bits available according to PointerLikeTypeTraits for the
34/// type.
35///
36/// Note that PointerIntPair always puts the IntVal part in the highest bits
37/// possible. For example, PointerIntPair<void*, 1, bool> will put the bit for
38/// the bool into bit #2, not bit #0, which allows the low two bits to be used
39/// for something else. For example, this allows:
40/// PointerIntPair<PointerIntPair<void*, 1, bool>, 1, bool>
41/// ... and the two bools will land in different bits.
42template <typename PointerTy, unsigned IntBits, typename IntType = unsigned,
43 typename PtrTraits = PointerLikeTypeTraits<PointerTy>,
44 typename Info = PointerIntPairInfo<PointerTy, IntBits, PtrTraits>>
45class PointerIntPair {
46 // Used by MSVC visualizer and generally helpful for debugging/visualizing.
47 using InfoTy = Info;
48 intptr_t Value = 0;
49
50public:
51 constexpr PointerIntPair() = default;
52
53 PointerIntPair(PointerTy PtrVal, IntType IntVal) {
54 setPointerAndInt(PtrVal, IntVal);
55 }
56
57 explicit PointerIntPair(PointerTy PtrVal) { initWithPointer(PtrVal); }
58
59 PointerTy getPointer() const { return Info::getPointer(Value); }
14
Calling 'PointerIntPairInfo::getPointer'
22
Returning from 'PointerIntPairInfo::getPointer'
23
Returning null pointer, which participates in a condition later
60
61 IntType getInt() const { return (IntType)Info::getInt(Value); }
62
63 void setPointer(PointerTy PtrVal) LLVM_LVALUE_FUNCTION& {
64 Value = Info::updatePointer(Value, PtrVal);
65 }
66
67 void setInt(IntType IntVal) LLVM_LVALUE_FUNCTION& {
68 Value = Info::updateInt(Value, static_cast<intptr_t>(IntVal));
69 }
70
71 void initWithPointer(PointerTy PtrVal) LLVM_LVALUE_FUNCTION& {
72 Value = Info::updatePointer(0, PtrVal);
73 }
74
75 void setPointerAndInt(PointerTy PtrVal, IntType IntVal) LLVM_LVALUE_FUNCTION& {
76 Value = Info::updateInt(Info::updatePointer(0, PtrVal),
77 static_cast<intptr_t>(IntVal));
78 }
79
80 PointerTy const *getAddrOfPointer() const {
81 return const_cast<PointerIntPair *>(this)->getAddrOfPointer();
82 }
83
84 PointerTy *getAddrOfPointer() {
85 assert(Value == reinterpret_cast<intptr_t>(getPointer()) &&((Value == reinterpret_cast<intptr_t>(getPointer()) &&
"Can only return the address if IntBits is cleared and " "PtrTraits doesn't change the pointer"
) ? static_cast<void> (0) : __assert_fail ("Value == reinterpret_cast<intptr_t>(getPointer()) && \"Can only return the address if IntBits is cleared and \" \"PtrTraits doesn't change the pointer\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/PointerIntPair.h"
, 87, __PRETTY_FUNCTION__))
86 "Can only return the address if IntBits is cleared and "((Value == reinterpret_cast<intptr_t>(getPointer()) &&
"Can only return the address if IntBits is cleared and " "PtrTraits doesn't change the pointer"
) ? static_cast<void> (0) : __assert_fail ("Value == reinterpret_cast<intptr_t>(getPointer()) && \"Can only return the address if IntBits is cleared and \" \"PtrTraits doesn't change the pointer\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/PointerIntPair.h"
, 87, __PRETTY_FUNCTION__))
87 "PtrTraits doesn't change the pointer")((Value == reinterpret_cast<intptr_t>(getPointer()) &&
"Can only return the address if IntBits is cleared and " "PtrTraits doesn't change the pointer"
) ? static_cast<void> (0) : __assert_fail ("Value == reinterpret_cast<intptr_t>(getPointer()) && \"Can only return the address if IntBits is cleared and \" \"PtrTraits doesn't change the pointer\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/PointerIntPair.h"
, 87, __PRETTY_FUNCTION__))
;
88 return reinterpret_cast<PointerTy *>(&Value);
89 }
90
91 void *getOpaqueValue() const { return reinterpret_cast<void *>(Value); }
92
93 void setFromOpaqueValue(void *Val) LLVM_LVALUE_FUNCTION& {
94 Value = reinterpret_cast<intptr_t>(Val);
95 }
96
97 static PointerIntPair getFromOpaqueValue(void *V) {
98 PointerIntPair P;
99 P.setFromOpaqueValue(V);
100 return P;
101 }
102
103 // Allow PointerIntPairs to be created from const void * if and only if the
104 // pointer type could be created from a const void *.
105 static PointerIntPair getFromOpaqueValue(const void *V) {
106 (void)PtrTraits::getFromVoidPointer(V);
107 return getFromOpaqueValue(const_cast<void *>(V));
108 }
109
110 bool operator==(const PointerIntPair &RHS) const {
111 return Value == RHS.Value;
112 }
113
114 bool operator!=(const PointerIntPair &RHS) const {
115 return Value != RHS.Value;
116 }
117
118 bool operator<(const PointerIntPair &RHS) const { return Value < RHS.Value; }
119 bool operator>(const PointerIntPair &RHS) const { return Value > RHS.Value; }
120
121 bool operator<=(const PointerIntPair &RHS) const {
122 return Value <= RHS.Value;
123 }
124
125 bool operator>=(const PointerIntPair &RHS) const {
126 return Value >= RHS.Value;
127 }
128};
129
130// Specialize is_trivially_copyable to avoid limitation of llvm::is_trivially_copyable
131// when compiled with gcc 4.9.
132template <typename PointerTy, unsigned IntBits, typename IntType,
133 typename PtrTraits,
134 typename Info>
135struct is_trivially_copyable<PointerIntPair<PointerTy, IntBits, IntType, PtrTraits, Info>> : std::true_type {
136#ifdef HAVE_STD_IS_TRIVIALLY_COPYABLE
137 static_assert(std::is_trivially_copyable<PointerIntPair<PointerTy, IntBits, IntType, PtrTraits, Info>>::value,
138 "inconsistent behavior between llvm:: and std:: implementation of is_trivially_copyable");
139#endif
140};
141
142
143template <typename PointerT, unsigned IntBits, typename PtrTraits>
144struct PointerIntPairInfo {
145 static_assert(PtrTraits::NumLowBitsAvailable <
146 std::numeric_limits<uintptr_t>::digits,
147 "cannot use a pointer type that has all bits free");
148 static_assert(IntBits <= PtrTraits::NumLowBitsAvailable,
149 "PointerIntPair with integer size too large for pointer");
150 enum : uintptr_t {
151 /// PointerBitMask - The bits that come from the pointer.
152 PointerBitMask =
153 ~(uintptr_t)(((intptr_t)1 << PtrTraits::NumLowBitsAvailable) - 1),
154
155 /// IntShift - The number of low bits that we reserve for other uses, and
156 /// keep zero.
157 IntShift = (uintptr_t)PtrTraits::NumLowBitsAvailable - IntBits,
158
159 /// IntMask - This is the unshifted mask for valid bits of the int type.
160 IntMask = (uintptr_t)(((intptr_t)1 << IntBits) - 1),
161
162 // ShiftedIntMask - This is the bits for the integer shifted in place.
163 ShiftedIntMask = (uintptr_t)(IntMask << IntShift)
164 };
165
166 static PointerT getPointer(intptr_t Value) {
167 return PtrTraits::getFromVoidPointer(
15
Calling 'PointerLikeTypeTraits::getFromVoidPointer'
20
Returning from 'PointerLikeTypeTraits::getFromVoidPointer'
21
Returning null pointer, which participates in a condition later
168 reinterpret_cast<void *>(Value & PointerBitMask));
169 }
170
171 static intptr_t getInt(intptr_t Value) {
172 return (Value >> IntShift) & IntMask;
173 }
174
175 static intptr_t updatePointer(intptr_t OrigValue, PointerT Ptr) {
176 intptr_t PtrWord =
177 reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(Ptr));
178 assert((PtrWord & ~PointerBitMask) == 0 &&(((PtrWord & ~PointerBitMask) == 0 && "Pointer is not sufficiently aligned"
) ? static_cast<void> (0) : __assert_fail ("(PtrWord & ~PointerBitMask) == 0 && \"Pointer is not sufficiently aligned\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/PointerIntPair.h"
, 179, __PRETTY_FUNCTION__))
179 "Pointer is not sufficiently aligned")(((PtrWord & ~PointerBitMask) == 0 && "Pointer is not sufficiently aligned"
) ? static_cast<void> (0) : __assert_fail ("(PtrWord & ~PointerBitMask) == 0 && \"Pointer is not sufficiently aligned\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/PointerIntPair.h"
, 179, __PRETTY_FUNCTION__))
;
180 // Preserve all low bits, just update the pointer.
181 return PtrWord | (OrigValue & ~PointerBitMask);
182 }
183
184 static intptr_t updateInt(intptr_t OrigValue, intptr_t Int) {
185 intptr_t IntWord = static_cast<intptr_t>(Int);
186 assert((IntWord & ~IntMask) == 0 && "Integer too large for field")(((IntWord & ~IntMask) == 0 && "Integer too large for field"
) ? static_cast<void> (0) : __assert_fail ("(IntWord & ~IntMask) == 0 && \"Integer too large for field\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/PointerIntPair.h"
, 186, __PRETTY_FUNCTION__))
;
187
188 // Preserve all bits other than the ones we are updating.
189 return (OrigValue & ~ShiftedIntMask) | IntWord << IntShift;
190 }
191};
192
193// Provide specialization of DenseMapInfo for PointerIntPair.
194template <typename PointerTy, unsigned IntBits, typename IntType>
195struct DenseMapInfo<PointerIntPair<PointerTy, IntBits, IntType>> {
196 using Ty = PointerIntPair<PointerTy, IntBits, IntType>;
197
198 static Ty getEmptyKey() {
199 uintptr_t Val = static_cast<uintptr_t>(-1);
200 Val <<= PointerLikeTypeTraits<Ty>::NumLowBitsAvailable;
201 return Ty::getFromOpaqueValue(reinterpret_cast<void *>(Val));
202 }
203
204 static Ty getTombstoneKey() {
205 uintptr_t Val = static_cast<uintptr_t>(-2);
206 Val <<= PointerLikeTypeTraits<PointerTy>::NumLowBitsAvailable;
207 return Ty::getFromOpaqueValue(reinterpret_cast<void *>(Val));
208 }
209
210 static unsigned getHashValue(Ty V) {
211 uintptr_t IV = reinterpret_cast<uintptr_t>(V.getOpaqueValue());
212 return unsigned(IV) ^ unsigned(IV >> 9);
213 }
214
215 static bool isEqual(const Ty &LHS, const Ty &RHS) { return LHS == RHS; }
216};
217
218// Teach SmallPtrSet that PointerIntPair is "basically a pointer".
219template <typename PointerTy, unsigned IntBits, typename IntType,
220 typename PtrTraits>
221struct PointerLikeTypeTraits<
222 PointerIntPair<PointerTy, IntBits, IntType, PtrTraits>> {
223 static inline void *
224 getAsVoidPointer(const PointerIntPair<PointerTy, IntBits, IntType> &P) {
225 return P.getOpaqueValue();
226 }
227
228 static inline PointerIntPair<PointerTy, IntBits, IntType>
229 getFromVoidPointer(void *P) {
230 return PointerIntPair<PointerTy, IntBits, IntType>::getFromOpaqueValue(P);
231 }
232
233 static inline PointerIntPair<PointerTy, IntBits, IntType>
234 getFromVoidPointer(const void *P) {
235 return PointerIntPair<PointerTy, IntBits, IntType>::getFromOpaqueValue(P);
236 }
237
238 enum { NumLowBitsAvailable = PtrTraits::NumLowBitsAvailable - IntBits };
239};
240
241} // end namespace llvm
242
243#endif // LLVM_ADT_POINTERINTPAIR_H

/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/PointerLikeTypeTraits.h

1//===- llvm/Support/PointerLikeTypeTraits.h - Pointer Traits ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the PointerLikeTypeTraits class. This allows data
10// structures to reason about pointers and other things that are pointer sized.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_SUPPORT_POINTERLIKETYPETRAITS_H
15#define LLVM_SUPPORT_POINTERLIKETYPETRAITS_H
16
17#include "llvm/Support/DataTypes.h"
18#include <assert.h>
19#include <type_traits>
20
21namespace llvm {
22
23/// A traits type that is used to handle pointer types and things that are just
24/// wrappers for pointers as a uniform entity.
25template <typename T> struct PointerLikeTypeTraits;
26
27namespace detail {
28/// A tiny meta function to compute the log2 of a compile time constant.
29template <size_t N>
30struct ConstantLog2
31 : std::integral_constant<size_t, ConstantLog2<N / 2>::value + 1> {};
32template <> struct ConstantLog2<1> : std::integral_constant<size_t, 0> {};
33
34// Provide a trait to check if T is pointer-like.
35template <typename T, typename U = void> struct HasPointerLikeTypeTraits {
36 static const bool value = false;
37};
38
39// sizeof(T) is valid only for a complete T.
40template <typename T> struct HasPointerLikeTypeTraits<
41 T, decltype((sizeof(PointerLikeTypeTraits<T>) + sizeof(T)), void())> {
42 static const bool value = true;
43};
44
45template <typename T> struct IsPointerLike {
46 static const bool value = HasPointerLikeTypeTraits<T>::value;
47};
48
49template <typename T> struct IsPointerLike<T *> {
50 static const bool value = true;
51};
52} // namespace detail
53
54// Provide PointerLikeTypeTraits for non-cvr pointers.
55template <typename T> struct PointerLikeTypeTraits<T *> {
56 static inline void *getAsVoidPointer(T *P) { return P; }
57 static inline T *getFromVoidPointer(void *P) { return static_cast<T *>(P); }
17
Returning null pointer (loaded from 'P'), which participates in a condition later
58
59 enum { NumLowBitsAvailable = detail::ConstantLog2<alignof(T)>::value };
60};
61
62template <> struct PointerLikeTypeTraits<void *> {
63 static inline void *getAsVoidPointer(void *P) { return P; }
64 static inline void *getFromVoidPointer(void *P) { return P; }
65
66 /// Note, we assume here that void* is related to raw malloc'ed memory and
67 /// that malloc returns objects at least 4-byte aligned. However, this may be
68 /// wrong, or pointers may be from something other than malloc. In this case,
69 /// you should specify a real typed pointer or avoid this template.
70 ///
71 /// All clients should use assertions to do a run-time check to ensure that
72 /// this is actually true.
73 enum { NumLowBitsAvailable = 2 };
74};
75
76// Provide PointerLikeTypeTraits for const things.
77template <typename T> struct PointerLikeTypeTraits<const T> {
78 typedef PointerLikeTypeTraits<T> NonConst;
79
80 static inline const void *getAsVoidPointer(const T P) {
81 return NonConst::getAsVoidPointer(P);
82 }
83 static inline const T getFromVoidPointer(const void *P) {
84 return NonConst::getFromVoidPointer(const_cast<void *>(P));
85 }
86 enum { NumLowBitsAvailable = NonConst::NumLowBitsAvailable };
87};
88
89// Provide PointerLikeTypeTraits for const pointers.
90template <typename T> struct PointerLikeTypeTraits<const T *> {
91 typedef PointerLikeTypeTraits<T *> NonConst;
92
93 static inline const void *getAsVoidPointer(const T *P) {
94 return NonConst::getAsVoidPointer(const_cast<T *>(P));
95 }
96 static inline const T *getFromVoidPointer(const void *P) {
97 return NonConst::getFromVoidPointer(const_cast<void *>(P));
16
Calling 'PointerLikeTypeTraits::getFromVoidPointer'
18
Returning from 'PointerLikeTypeTraits::getFromVoidPointer'
19
Returning null pointer, which participates in a condition later
98 }
99 enum { NumLowBitsAvailable = NonConst::NumLowBitsAvailable };
100};
101
102// Provide PointerLikeTypeTraits for uintptr_t.
103template <> struct PointerLikeTypeTraits<uintptr_t> {
104 static inline void *getAsVoidPointer(uintptr_t P) {
105 return reinterpret_cast<void *>(P);
106 }
107 static inline uintptr_t getFromVoidPointer(void *P) {
108 return reinterpret_cast<uintptr_t>(P);
109 }
110 // No bits are available!
111 enum { NumLowBitsAvailable = 0 };
112};
113
114/// Provide suitable custom traits struct for function pointers.
115///
116/// Function pointers can't be directly given these traits as functions can't
117/// have their alignment computed with `alignof` and we need different casting.
118///
119/// To rely on higher alignment for a specialized use, you can provide a
120/// customized form of this template explicitly with higher alignment, and
121/// potentially use alignment attributes on functions to satisfy that.
122template <int Alignment, typename FunctionPointerT>
123struct FunctionPointerLikeTypeTraits {
124 enum { NumLowBitsAvailable = detail::ConstantLog2<Alignment>::value };
125 static inline void *getAsVoidPointer(FunctionPointerT P) {
126 assert((reinterpret_cast<uintptr_t>(P) &(((reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1
<< NumLowBitsAvailable)) == 0 && "Alignment not satisfied for an actual function pointer!"
) ? static_cast<void> (0) : __assert_fail ("(reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 && \"Alignment not satisfied for an actual function pointer!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/PointerLikeTypeTraits.h"
, 128, __PRETTY_FUNCTION__))
127 ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 &&(((reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1
<< NumLowBitsAvailable)) == 0 && "Alignment not satisfied for an actual function pointer!"
) ? static_cast<void> (0) : __assert_fail ("(reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 && \"Alignment not satisfied for an actual function pointer!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/PointerLikeTypeTraits.h"
, 128, __PRETTY_FUNCTION__))
128 "Alignment not satisfied for an actual function pointer!")(((reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1
<< NumLowBitsAvailable)) == 0 && "Alignment not satisfied for an actual function pointer!"
) ? static_cast<void> (0) : __assert_fail ("(reinterpret_cast<uintptr_t>(P) & ~((uintptr_t)-1 << NumLowBitsAvailable)) == 0 && \"Alignment not satisfied for an actual function pointer!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/PointerLikeTypeTraits.h"
, 128, __PRETTY_FUNCTION__))
;
129 return reinterpret_cast<void *>(P);
130 }
131 static inline FunctionPointerT getFromVoidPointer(void *P) {
132 return reinterpret_cast<FunctionPointerT>(P);
133 }
134};
135
136/// Provide a default specialization for function pointers that assumes 4-byte
137/// alignment.
138///
139/// We assume here that functions used with this are always at least 4-byte
140/// aligned. This means that, for example, thumb functions won't work or systems
141/// with weird unaligned function pointers won't work. But all practical systems
142/// we support satisfy this requirement.
143template <typename ReturnT, typename... ParamTs>
144struct PointerLikeTypeTraits<ReturnT (*)(ParamTs...)>
145 : FunctionPointerLikeTypeTraits<4, ReturnT (*)(ParamTs...)> {};
146
147} // end namespace llvm
148
149#endif

/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/Operator.h

1//===-- llvm/Operator.h - Operator utility subclass -------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines various classes for working with Instructions and
10// ConstantExprs.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_IR_OPERATOR_H
15#define LLVM_IR_OPERATOR_H
16
17#include "llvm/ADT/None.h"
18#include "llvm/ADT/Optional.h"
19#include "llvm/IR/Constants.h"
20#include "llvm/IR/Instruction.h"
21#include "llvm/IR/Type.h"
22#include "llvm/IR/Value.h"
23#include "llvm/Support/Casting.h"
24#include <cstddef>
25
26namespace llvm {
27
28/// This is a utility class that provides an abstraction for the common
29/// functionality between Instructions and ConstantExprs.
30class Operator : public User {
31public:
32 // The Operator class is intended to be used as a utility, and is never itself
33 // instantiated.
34 Operator() = delete;
35 ~Operator() = delete;
36
37 void *operator new(size_t s) = delete;
38
39 /// Return the opcode for this Instruction or ConstantExpr.
40 unsigned getOpcode() const {
41 if (const Instruction *I = dyn_cast<Instruction>(this))
42 return I->getOpcode();
43 return cast<ConstantExpr>(this)->getOpcode();
44 }
45
46 /// If V is an Instruction or ConstantExpr, return its opcode.
47 /// Otherwise return UserOp1.
48 static unsigned getOpcode(const Value *V) {
49 if (const Instruction *I
33.1
'I' is null
33.1
'I' is null
33.1
'I' is null
33.1
'I' is null
33.1
'I' is null
33.1
'I' is null
33.1
'I' is null
= dyn_cast<Instruction>(V))
33
Assuming 'V' is not a 'Instruction'
34
Taking false branch
50 return I->getOpcode();
51 if (const ConstantExpr *CE
35.1
'CE' is non-null
35.1
'CE' is non-null
35.1
'CE' is non-null
35.1
'CE' is non-null
35.1
'CE' is non-null
35.1
'CE' is non-null
35.1
'CE' is non-null
= dyn_cast<ConstantExpr>(V))
35
Assuming 'V' is a 'ConstantExpr'
36
Taking true branch
52 return CE->getOpcode();
37
Returning value, which participates in a condition later
53 return Instruction::UserOp1;
54 }
55
56 static bool classof(const Instruction *) { return true; }
57 static bool classof(const ConstantExpr *) { return true; }
58 static bool classof(const Value *V) {
59 return isa<Instruction>(V) || isa<ConstantExpr>(V);
60 }
61};
62
63/// Utility class for integer operators which may exhibit overflow - Add, Sub,
64/// Mul, and Shl. It does not include SDiv, despite that operator having the
65/// potential for overflow.
66class OverflowingBinaryOperator : public Operator {
67public:
68 enum {
69 AnyWrap = 0,
70 NoUnsignedWrap = (1 << 0),
71 NoSignedWrap = (1 << 1)
72 };
73
74private:
75 friend class Instruction;
76 friend class ConstantExpr;
77
78 void setHasNoUnsignedWrap(bool B) {
79 SubclassOptionalData =
80 (SubclassOptionalData & ~NoUnsignedWrap) | (B * NoUnsignedWrap);
81 }
82 void setHasNoSignedWrap(bool B) {
83 SubclassOptionalData =
84 (SubclassOptionalData & ~NoSignedWrap) | (B * NoSignedWrap);
85 }
86
87public:
88 /// Test whether this operation is known to never
89 /// undergo unsigned overflow, aka the nuw property.
90 bool hasNoUnsignedWrap() const {
91 return SubclassOptionalData & NoUnsignedWrap;
92 }
93
94 /// Test whether this operation is known to never
95 /// undergo signed overflow, aka the nsw property.
96 bool hasNoSignedWrap() const {
97 return (SubclassOptionalData & NoSignedWrap) != 0;
98 }
99
100 static bool classof(const Instruction *I) {
101 return I->getOpcode() == Instruction::Add ||
102 I->getOpcode() == Instruction::Sub ||
103 I->getOpcode() == Instruction::Mul ||
104 I->getOpcode() == Instruction::Shl;
105 }
106 static bool classof(const ConstantExpr *CE) {
107 return CE->getOpcode() == Instruction::Add ||
108 CE->getOpcode() == Instruction::Sub ||
109 CE->getOpcode() == Instruction::Mul ||
110 CE->getOpcode() == Instruction::Shl;
111 }
112 static bool classof(const Value *V) {
113 return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
114 (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
115 }
116};
117
118/// A udiv or sdiv instruction, which can be marked as "exact",
119/// indicating that no bits are destroyed.
120class PossiblyExactOperator : public Operator {
121public:
122 enum {
123 IsExact = (1 << 0)
124 };
125
126private:
127 friend class Instruction;
128 friend class ConstantExpr;
129
130 void setIsExact(bool B) {
131 SubclassOptionalData = (SubclassOptionalData & ~IsExact) | (B * IsExact);
132 }
133
134public:
135 /// Test whether this division is known to be exact, with zero remainder.
136 bool isExact() const {
137 return SubclassOptionalData & IsExact;
138 }
139
140 static bool isPossiblyExactOpcode(unsigned OpC) {
141 return OpC == Instruction::SDiv ||
142 OpC == Instruction::UDiv ||
143 OpC == Instruction::AShr ||
144 OpC == Instruction::LShr;
145 }
146
147 static bool classof(const ConstantExpr *CE) {
148 return isPossiblyExactOpcode(CE->getOpcode());
149 }
150 static bool classof(const Instruction *I) {
151 return isPossiblyExactOpcode(I->getOpcode());
152 }
153 static bool classof(const Value *V) {
154 return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
155 (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
156 }
157};
158
159/// Convenience struct for specifying and reasoning about fast-math flags.
160class FastMathFlags {
161private:
162 friend class FPMathOperator;
163
164 unsigned Flags = 0;
165
166 FastMathFlags(unsigned F) {
167 // If all 7 bits are set, turn this into -1. If the number of bits grows,
168 // this must be updated. This is intended to provide some forward binary
169 // compatibility insurance for the meaning of 'fast' in case bits are added.
170 if (F == 0x7F) Flags = ~0U;
171 else Flags = F;
172 }
173
174public:
175 // This is how the bits are used in Value::SubclassOptionalData so they
176 // should fit there too.
177 // WARNING: We're out of space. SubclassOptionalData only has 7 bits. New
178 // functionality will require a change in how this information is stored.
179 enum {
180 AllowReassoc = (1 << 0),
181 NoNaNs = (1 << 1),
182 NoInfs = (1 << 2),
183 NoSignedZeros = (1 << 3),
184 AllowReciprocal = (1 << 4),
185 AllowContract = (1 << 5),
186 ApproxFunc = (1 << 6)
187 };
188
189 FastMathFlags() = default;
190
191 static FastMathFlags getFast() {
192 FastMathFlags FMF;
193 FMF.setFast();
194 return FMF;
195 }
196
197 bool any() const { return Flags != 0; }
198 bool none() const { return Flags == 0; }
199 bool all() const { return Flags == ~0U; }
200
201 void clear() { Flags = 0; }
202 void set() { Flags = ~0U; }
203
204 /// Flag queries
205 bool allowReassoc() const { return 0 != (Flags & AllowReassoc); }
206 bool noNaNs() const { return 0 != (Flags & NoNaNs); }
207 bool noInfs() const { return 0 != (Flags & NoInfs); }
208 bool noSignedZeros() const { return 0 != (Flags & NoSignedZeros); }
209 bool allowReciprocal() const { return 0 != (Flags & AllowReciprocal); }
210 bool allowContract() const { return 0 != (Flags & AllowContract); }
211 bool approxFunc() const { return 0 != (Flags & ApproxFunc); }
212 /// 'Fast' means all bits are set.
213 bool isFast() const { return all(); }
214
215 /// Flag setters
216 void setAllowReassoc(bool B = true) {
217 Flags = (Flags & ~AllowReassoc) | B * AllowReassoc;
218 }
219 void setNoNaNs(bool B = true) {
220 Flags = (Flags & ~NoNaNs) | B * NoNaNs;
221 }
222 void setNoInfs(bool B = true) {
223 Flags = (Flags & ~NoInfs) | B * NoInfs;
224 }
225 void setNoSignedZeros(bool B = true) {
226 Flags = (Flags & ~NoSignedZeros) | B * NoSignedZeros;
227 }
228 void setAllowReciprocal(bool B = true) {
229 Flags = (Flags & ~AllowReciprocal) | B * AllowReciprocal;
230 }
231 void setAllowContract(bool B = true) {
232 Flags = (Flags & ~AllowContract) | B * AllowContract;
233 }
234 void setApproxFunc(bool B = true) {
235 Flags = (Flags & ~ApproxFunc) | B * ApproxFunc;
236 }
237 void setFast(bool B = true) { B ? set() : clear(); }
238
239 void operator&=(const FastMathFlags &OtherFlags) {
240 Flags &= OtherFlags.Flags;
241 }
242};
243
244/// Utility class for floating point operations which can have
245/// information about relaxed accuracy requirements attached to them.
246class FPMathOperator : public Operator {
247private:
248 friend class Instruction;
249
250 /// 'Fast' means all bits are set.
251 void setFast(bool B) {
252 setHasAllowReassoc(B);
253 setHasNoNaNs(B);
254 setHasNoInfs(B);
255 setHasNoSignedZeros(B);
256 setHasAllowReciprocal(B);
257 setHasAllowContract(B);
258 setHasApproxFunc(B);
259 }
260
261 void setHasAllowReassoc(bool B) {
262 SubclassOptionalData =
263 (SubclassOptionalData & ~FastMathFlags::AllowReassoc) |
264 (B * FastMathFlags::AllowReassoc);
265 }
266
267 void setHasNoNaNs(bool B) {
268 SubclassOptionalData =
269 (SubclassOptionalData & ~FastMathFlags::NoNaNs) |
270 (B * FastMathFlags::NoNaNs);
271 }
272
273 void setHasNoInfs(bool B) {
274 SubclassOptionalData =
275 (SubclassOptionalData & ~FastMathFlags::NoInfs) |
276 (B * FastMathFlags::NoInfs);
277 }
278
279 void setHasNoSignedZeros(bool B) {
280 SubclassOptionalData =
281 (SubclassOptionalData & ~FastMathFlags::NoSignedZeros) |
282 (B * FastMathFlags::NoSignedZeros);
283 }
284
285 void setHasAllowReciprocal(bool B) {
286 SubclassOptionalData =
287 (SubclassOptionalData & ~FastMathFlags::AllowReciprocal) |
288 (B * FastMathFlags::AllowReciprocal);
289 }
290
291 void setHasAllowContract(bool B) {
292 SubclassOptionalData =
293 (SubclassOptionalData & ~FastMathFlags::AllowContract) |
294 (B * FastMathFlags::AllowContract);
295 }
296
297 void setHasApproxFunc(bool B) {
298 SubclassOptionalData =
299 (SubclassOptionalData & ~FastMathFlags::ApproxFunc) |
300 (B * FastMathFlags::ApproxFunc);
301 }
302
303 /// Convenience function for setting multiple fast-math flags.
304 /// FMF is a mask of the bits to set.
305 void setFastMathFlags(FastMathFlags FMF) {
306 SubclassOptionalData |= FMF.Flags;
307 }
308
309 /// Convenience function for copying all fast-math flags.
310 /// All values in FMF are transferred to this operator.
311 void copyFastMathFlags(FastMathFlags FMF) {
312 SubclassOptionalData = FMF.Flags;
313 }
314
315public:
316 /// Test if this operation allows all non-strict floating-point transforms.
317 bool isFast() const {
318 return ((SubclassOptionalData & FastMathFlags::AllowReassoc) != 0 &&
319 (SubclassOptionalData & FastMathFlags::NoNaNs) != 0 &&
320 (SubclassOptionalData & FastMathFlags::NoInfs) != 0 &&
321 (SubclassOptionalData & FastMathFlags::NoSignedZeros) != 0 &&
322 (SubclassOptionalData & FastMathFlags::AllowReciprocal) != 0 &&
323 (SubclassOptionalData & FastMathFlags::AllowContract) != 0 &&
324 (SubclassOptionalData & FastMathFlags::ApproxFunc) != 0);
325 }
326
327 /// Test if this operation may be simplified with reassociative transforms.
328 bool hasAllowReassoc() const {
329 return (SubclassOptionalData & FastMathFlags::AllowReassoc) != 0;
330 }
331
332 /// Test if this operation's arguments and results are assumed not-NaN.
333 bool hasNoNaNs() const {
334 return (SubclassOptionalData & FastMathFlags::NoNaNs) != 0;
335 }
336
337 /// Test if this operation's arguments and results are assumed not-infinite.
338 bool hasNoInfs() const {
339 return (SubclassOptionalData & FastMathFlags::NoInfs) != 0;
340 }
341
342 /// Test if this operation can ignore the sign of zero.
343 bool hasNoSignedZeros() const {
344 return (SubclassOptionalData & FastMathFlags::NoSignedZeros) != 0;
345 }
346
347 /// Test if this operation can use reciprocal multiply instead of division.
348 bool hasAllowReciprocal() const {
349 return (SubclassOptionalData & FastMathFlags::AllowReciprocal) != 0;
350 }
351
352 /// Test if this operation can be floating-point contracted (FMA).
353 bool hasAllowContract() const {
354 return (SubclassOptionalData & FastMathFlags::AllowContract) != 0;
355 }
356
357 /// Test if this operation allows approximations of math library functions or
358 /// intrinsics.
359 bool hasApproxFunc() const {
360 return (SubclassOptionalData & FastMathFlags::ApproxFunc) != 0;
361 }
362
363 /// Convenience function for getting all the fast-math flags
364 FastMathFlags getFastMathFlags() const {
365 return FastMathFlags(SubclassOptionalData);
366 }
367
368 /// Get the maximum error permitted by this operation in ULPs. An accuracy of
369 /// 0.0 means that the operation should be performed with the default
370 /// precision.
371 float getFPAccuracy() const;
372
373 static bool classof(const Value *V) {
374 unsigned Opcode;
375 if (auto *I = dyn_cast<Instruction>(V))
376 Opcode = I->getOpcode();
377 else if (auto *CE = dyn_cast<ConstantExpr>(V))
378 Opcode = CE->getOpcode();
379 else
380 return false;
381
382 switch (Opcode) {
383 case Instruction::FNeg:
384 case Instruction::FAdd:
385 case Instruction::FSub:
386 case Instruction::FMul:
387 case Instruction::FDiv:
388 case Instruction::FRem:
389 // FIXME: To clean up and correct the semantics of fast-math-flags, FCmp
390 // should not be treated as a math op, but the other opcodes should.
391 // This would make things consistent with Select/PHI (FP value type
392 // determines whether they are math ops and, therefore, capable of
393 // having fast-math-flags).
394 case Instruction::FCmp:
395 return true;
396 case Instruction::PHI:
397 case Instruction::Select:
398 case Instruction::Call: {
399 Type *Ty = V->getType();
400 while (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty))
401 Ty = ArrTy->getElementType();
402 return Ty->isFPOrFPVectorTy();
403 }
404 default:
405 return false;
406 }
407 }
408};
409
410/// A helper template for defining operators for individual opcodes.
411template<typename SuperClass, unsigned Opc>
412class ConcreteOperator : public SuperClass {
413public:
414 static bool classof(const Instruction *I) {
415 return I->getOpcode() == Opc;
416 }
417 static bool classof(const ConstantExpr *CE) {
418 return CE->getOpcode() == Opc;
419 }
420 static bool classof(const Value *V) {
421 return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
422 (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
423 }
424};
425
426class AddOperator
427 : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Add> {
428};
429class SubOperator
430 : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Sub> {
431};
432class MulOperator
433 : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Mul> {
434};
435class ShlOperator
436 : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Shl> {
437};
438
439class SDivOperator
440 : public ConcreteOperator<PossiblyExactOperator, Instruction::SDiv> {
441};
442class UDivOperator
443 : public ConcreteOperator<PossiblyExactOperator, Instruction::UDiv> {
444};
445class AShrOperator
446 : public ConcreteOperator<PossiblyExactOperator, Instruction::AShr> {
447};
448class LShrOperator
449 : public ConcreteOperator<PossiblyExactOperator, Instruction::LShr> {
450};
451
452class ZExtOperator : public ConcreteOperator<Operator, Instruction::ZExt> {};
453
454class GEPOperator
455 : public ConcreteOperator<Operator, Instruction::GetElementPtr> {
456 friend class GetElementPtrInst;
457 friend class ConstantExpr;
458
459 enum {
460 IsInBounds = (1 << 0),
461 // InRangeIndex: bits 1-6
462 };
463
464 void setIsInBounds(bool B) {
465 SubclassOptionalData =
466 (SubclassOptionalData & ~IsInBounds) | (B * IsInBounds);
467 }
468
469public:
470 /// Test whether this is an inbounds GEP, as defined by LangRef.html.
471 bool isInBounds() const {
472 return SubclassOptionalData & IsInBounds;
473 }
474
475 /// Returns the offset of the index with an inrange attachment, or None if
476 /// none.
477 Optional<unsigned> getInRangeIndex() const {
478 if (SubclassOptionalData >> 1 == 0) return None;
479 return (SubclassOptionalData >> 1) - 1;
480 }
481
482 inline op_iterator idx_begin() { return op_begin()+1; }
483 inline const_op_iterator idx_begin() const { return op_begin()+1; }
484 inline op_iterator idx_end() { return op_end(); }
485 inline const_op_iterator idx_end() const { return op_end(); }
486
487 Value *getPointerOperand() {
488 return getOperand(0);
489 }
490 const Value *getPointerOperand() const {
491 return getOperand(0);
492 }
493 static unsigned getPointerOperandIndex() {
494 return 0U; // get index for modifying correct operand
495 }
496
497 /// Method to return the pointer operand as a PointerType.
498 Type *getPointerOperandType() const {
499 return getPointerOperand()->getType();
500 }
501
502 Type *getSourceElementType() const;
503 Type *getResultElementType() const;
504
505 /// Method to return the address space of the pointer operand.
506 unsigned getPointerAddressSpace() const {
507 return getPointerOperandType()->getPointerAddressSpace();
508 }
509
510 unsigned getNumIndices() const { // Note: always non-negative
511 return getNumOperands() - 1;
512 }
513
514 bool hasIndices() const {
515 return getNumOperands() > 1;
516 }
517
518 /// Return true if all of the indices of this GEP are zeros.
519 /// If so, the result pointer and the first operand have the same
520 /// value, just potentially different types.
521 bool hasAllZeroIndices() const {
522 for (const_op_iterator I = idx_begin(), E = idx_end(); I != E; ++I) {
523 if (ConstantInt *C = dyn_cast<ConstantInt>(I))
524 if (C->isZero())
525 continue;
526 return false;
527 }
528 return true;
529 }
530
531 /// Return true if all of the indices of this GEP are constant integers.
532 /// If so, the result pointer and the first operand have
533 /// a constant offset between them.
534 bool hasAllConstantIndices() const {
535 for (const_op_iterator I = idx_begin(), E = idx_end(); I != E; ++I) {
536 if (!isa<ConstantInt>(I))
537 return false;
538 }
539 return true;
540 }
541
542 unsigned countNonConstantIndices() const {
543 return count_if(make_range(idx_begin(), idx_end()), [](const Use& use) {
544 return !isa<ConstantInt>(*use);
545 });
546 }
547
548 /// Accumulate the constant address offset of this GEP if possible.
549 ///
550 /// This routine accepts an APInt into which it will accumulate the constant
551 /// offset of this GEP if the GEP is in fact constant. If the GEP is not
552 /// all-constant, it returns false and the value of the offset APInt is
553 /// undefined (it is *not* preserved!). The APInt passed into this routine
554 /// must be at exactly as wide as the IntPtr type for the address space of the
555 /// base GEP pointer.
556 bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset) const;
557};
558
559class PtrToIntOperator
560 : public ConcreteOperator<Operator, Instruction::PtrToInt> {
561 friend class PtrToInt;
562 friend class ConstantExpr;
563
564public:
565 Value *getPointerOperand() {
566 return getOperand(0);
567 }
568 const Value *getPointerOperand() const {
569 return getOperand(0);
570 }
571
572 static unsigned getPointerOperandIndex() {
573 return 0U; // get index for modifying correct operand
574 }
575
576 /// Method to return the pointer operand as a PointerType.
577 Type *getPointerOperandType() const {
578 return getPointerOperand()->getType();
579 }
580
581 /// Method to return the address space of the pointer operand.
582 unsigned getPointerAddressSpace() const {
583 return cast<PointerType>(getPointerOperandType())->getAddressSpace();
584 }
585};
586
587class BitCastOperator
588 : public ConcreteOperator<Operator, Instruction::BitCast> {
589 friend class BitCastInst;
590 friend class ConstantExpr;
591
592public:
593 Type *getSrcTy() const {
594 return getOperand(0)->getType();
595 }
596
597 Type *getDestTy() const {
598 return getType();
599 }
600};
601
602} // end namespace llvm
603
604#endif // LLVM_IR_OPERATOR_H

/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h

1//===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file provides a helper that implements much of the TTI interface in
11/// terms of the target-independent code generator and TargetLowering
12/// interfaces.
13//
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_CODEGEN_BASICTTIIMPL_H
17#define LLVM_CODEGEN_BASICTTIIMPL_H
18
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/BitVector.h"
22#include "llvm/ADT/SmallPtrSet.h"
23#include "llvm/ADT/SmallVector.h"
24#include "llvm/Analysis/LoopInfo.h"
25#include "llvm/Analysis/TargetTransformInfo.h"
26#include "llvm/Analysis/TargetTransformInfoImpl.h"
27#include "llvm/CodeGen/ISDOpcodes.h"
28#include "llvm/CodeGen/TargetLowering.h"
29#include "llvm/CodeGen/TargetSubtargetInfo.h"
30#include "llvm/CodeGen/ValueTypes.h"
31#include "llvm/IR/BasicBlock.h"
32#include "llvm/IR/CallSite.h"
33#include "llvm/IR/Constant.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DataLayout.h"
36#include "llvm/IR/DerivedTypes.h"
37#include "llvm/IR/InstrTypes.h"
38#include "llvm/IR/Instruction.h"
39#include "llvm/IR/Instructions.h"
40#include "llvm/IR/Intrinsics.h"
41#include "llvm/IR/Operator.h"
42#include "llvm/IR/Type.h"
43#include "llvm/IR/Value.h"
44#include "llvm/MC/MCSchedule.h"
45#include "llvm/Support/Casting.h"
46#include "llvm/Support/CommandLine.h"
47#include "llvm/Support/ErrorHandling.h"
48#include "llvm/Support/MachineValueType.h"
49#include "llvm/Support/MathExtras.h"
50#include <algorithm>
51#include <cassert>
52#include <cstdint>
53#include <limits>
54#include <utility>
55
56namespace llvm {
57
58class Function;
59class GlobalValue;
60class LLVMContext;
61class ScalarEvolution;
62class SCEV;
63class TargetMachine;
64
65extern cl::opt<unsigned> PartialUnrollingThreshold;
66
67/// Base class which can be used to help build a TTI implementation.
68///
69/// This class provides as much implementation of the TTI interface as is
70/// possible using the target independent parts of the code generator.
71///
72/// In order to subclass it, your class must implement a getST() method to
73/// return the subtarget, and a getTLI() method to return the target lowering.
74/// We need these methods implemented in the derived class so that this class
75/// doesn't have to duplicate storage for them.
76template <typename T>
77class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
78private:
79 using BaseT = TargetTransformInfoImplCRTPBase<T>;
80 using TTI = TargetTransformInfo;
81
82 /// Estimate a cost of Broadcast as an extract and sequence of insert
83 /// operations.
84 unsigned getBroadcastShuffleOverhead(Type *Ty) {
85 assert(Ty->isVectorTy() && "Can only shuffle vectors")((Ty->isVectorTy() && "Can only shuffle vectors") ?
static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only shuffle vectors\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 85, __PRETTY_FUNCTION__))
;
86 unsigned Cost = 0;
87 // Broadcast cost is equal to the cost of extracting the zero'th element
88 // plus the cost of inserting it into every element of the result vector.
89 Cost += static_cast<T *>(this)->getVectorInstrCost(
90 Instruction::ExtractElement, Ty, 0);
91
92 for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
93 Cost += static_cast<T *>(this)->getVectorInstrCost(
94 Instruction::InsertElement, Ty, i);
95 }
96 return Cost;
97 }
98
99 /// Estimate a cost of shuffle as a sequence of extract and insert
100 /// operations.
101 unsigned getPermuteShuffleOverhead(Type *Ty) {
102 assert(Ty->isVectorTy() && "Can only shuffle vectors")((Ty->isVectorTy() && "Can only shuffle vectors") ?
static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only shuffle vectors\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 102, __PRETTY_FUNCTION__))
;
103 unsigned Cost = 0;
104 // Shuffle cost is equal to the cost of extracting element from its argument
105 // plus the cost of inserting them onto the result vector.
106
107 // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
108 // index 0 of first vector, index 1 of second vector,index 2 of first
109 // vector and finally index 3 of second vector and insert them at index
110 // <0,1,2,3> of result vector.
111 for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
112 Cost += static_cast<T *>(this)
113 ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
114 Cost += static_cast<T *>(this)
115 ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
116 }
117 return Cost;
118 }
119
120 /// Estimate a cost of subvector extraction as a sequence of extract and
121 /// insert operations.
122 unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
123 assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only extract subvectors from vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only extract subvectors from vectors\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 124, __PRETTY_FUNCTION__))
124 "Can only extract subvectors from vectors")((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only extract subvectors from vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only extract subvectors from vectors\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 124, __PRETTY_FUNCTION__))
;
125 int NumSubElts = SubTy->getVectorNumElements();
126 assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_ExtractSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_ExtractSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 127, __PRETTY_FUNCTION__))
127 "SK_ExtractSubvector index out of range")(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_ExtractSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_ExtractSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 127, __PRETTY_FUNCTION__))
;
128
129 unsigned Cost = 0;
130 // Subvector extraction cost is equal to the cost of extracting element from
131 // the source type plus the cost of inserting them into the result vector
132 // type.
133 for (int i = 0; i != NumSubElts; ++i) {
134 Cost += static_cast<T *>(this)->getVectorInstrCost(
135 Instruction::ExtractElement, Ty, i + Index);
136 Cost += static_cast<T *>(this)->getVectorInstrCost(
137 Instruction::InsertElement, SubTy, i);
138 }
139 return Cost;
140 }
141
142 /// Estimate a cost of subvector insertion as a sequence of extract and
143 /// insert operations.
144 unsigned getInsertSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
145 assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only insert subvectors into vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only insert subvectors into vectors\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 146, __PRETTY_FUNCTION__))
146 "Can only insert subvectors into vectors")((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only insert subvectors into vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only insert subvectors into vectors\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 146, __PRETTY_FUNCTION__))
;
147 int NumSubElts = SubTy->getVectorNumElements();
148 assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_InsertSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_InsertSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 149, __PRETTY_FUNCTION__))
149 "SK_InsertSubvector index out of range")(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_InsertSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_InsertSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 149, __PRETTY_FUNCTION__))
;
150
151 unsigned Cost = 0;
152 // Subvector insertion cost is equal to the cost of extracting element from
153 // the source type plus the cost of inserting them into the result vector
154 // type.
155 for (int i = 0; i != NumSubElts; ++i) {
156 Cost += static_cast<T *>(this)->getVectorInstrCost(
157 Instruction::ExtractElement, SubTy, i);
158 Cost += static_cast<T *>(this)->getVectorInstrCost(
159 Instruction::InsertElement, Ty, i + Index);
160 }
161 return Cost;
162 }
163
164 /// Local query method delegates up to T which *must* implement this!
165 const TargetSubtargetInfo *getST() const {
166 return static_cast<const T *>(this)->getST();
167 }
168
169 /// Local query method delegates up to T which *must* implement this!
170 const TargetLoweringBase *getTLI() const {
171 return static_cast<const T *>(this)->getTLI();
172 }
173
174 static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
175 switch (M) {
176 case TTI::MIM_Unindexed:
177 return ISD::UNINDEXED;
178 case TTI::MIM_PreInc:
179 return ISD::PRE_INC;
180 case TTI::MIM_PreDec:
181 return ISD::PRE_DEC;
182 case TTI::MIM_PostInc:
183 return ISD::POST_INC;
184 case TTI::MIM_PostDec:
185 return ISD::POST_DEC;
186 }
187 llvm_unreachable("Unexpected MemIndexedMode")::llvm::llvm_unreachable_internal("Unexpected MemIndexedMode"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 187)
;
188 }
189
190protected:
191 explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
192 : BaseT(DL) {}
193 virtual ~BasicTTIImplBase() = default;
194
195 using TargetTransformInfoImplBase::DL;
196
197public:
198 /// \name Scalar TTI Implementations
199 /// @{
200 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
201 unsigned AddressSpace, unsigned Alignment,
202 bool *Fast) const {
203 EVT E = EVT::getIntegerVT(Context, BitWidth);
204 return getTLI()->allowsMisalignedMemoryAccesses(
205 E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);
206 }
207
208 bool hasBranchDivergence() { return false; }
209
210 bool isSourceOfDivergence(const Value *V) { return false; }
211
212 bool isAlwaysUniform(const Value *V) { return false; }
213
214 unsigned getFlatAddressSpace() {
215 // Return an invalid address space.
216 return -1;
217 }
218
219 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
220 Intrinsic::ID IID) const {
221 return false;
222 }
223
224 bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
225 Value *OldV, Value *NewV) const {
226 return false;
227 }
228
229 bool isLegalAddImmediate(int64_t imm) {
230 return getTLI()->isLegalAddImmediate(imm);
231 }
232
233 bool isLegalICmpImmediate(int64_t imm) {
234 return getTLI()->isLegalICmpImmediate(imm);
235 }
236
237 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
238 bool HasBaseReg, int64_t Scale,
239 unsigned AddrSpace, Instruction *I = nullptr) {
240 TargetLoweringBase::AddrMode AM;
241 AM.BaseGV = BaseGV;
242 AM.BaseOffs = BaseOffset;
243 AM.HasBaseReg = HasBaseReg;
244 AM.Scale = Scale;
245 return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
246 }
247
248 bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty,
249 const DataLayout &DL) const {
250 EVT VT = getTLI()->getValueType(DL, Ty);
251 return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
252 }
253
254 bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty,
255 const DataLayout &DL) const {
256 EVT VT = getTLI()->getValueType(DL, Ty);
257 return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
258 }
259
260 bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
261 return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
262 }
263
264 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
265 bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
266 TargetLoweringBase::AddrMode AM;
267 AM.BaseGV = BaseGV;
268 AM.BaseOffs = BaseOffset;
269 AM.HasBaseReg = HasBaseReg;
270 AM.Scale = Scale;
271 return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
272 }
273
274 bool isTruncateFree(Type *Ty1, Type *Ty2) {
275 return getTLI()->isTruncateFree(Ty1, Ty2);
276 }
277
278 bool isProfitableToHoist(Instruction *I) {
279 return getTLI()->isProfitableToHoist(I);
280 }
281
282 bool useAA() const { return getST()->useAA(); }
283
284 bool isTypeLegal(Type *Ty) {
285 EVT VT = getTLI()->getValueType(DL, Ty);
286 return getTLI()->isTypeLegal(VT);
287 }
288
289 int getGEPCost(Type *PointeeType, const Value *Ptr,
290 ArrayRef<const Value *> Operands) {
291 return BaseT::getGEPCost(PointeeType, Ptr, Operands);
292 }
293
294 int getExtCost(const Instruction *I, const Value *Src) {
295 if (getTLI()->isExtFree(I))
296 return TargetTransformInfo::TCC_Free;
297
298 if (isa<ZExtInst>(I) || isa<SExtInst>(I))
299 if (const LoadInst *LI = dyn_cast<LoadInst>(Src))
300 if (getTLI()->isExtLoad(LI, I, DL))
301 return TargetTransformInfo::TCC_Free;
302
303 return TargetTransformInfo::TCC_Basic;
304 }
305
306 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
307 ArrayRef<const Value *> Arguments, const User *U) {
308 return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U);
309 }
310
311 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
312 ArrayRef<Type *> ParamTys, const User *U) {
313 if (IID == Intrinsic::cttz) {
314 if (getTLI()->isCheapToSpeculateCttz())
315 return TargetTransformInfo::TCC_Basic;
316 return TargetTransformInfo::TCC_Expensive;
317 }
318
319 if (IID == Intrinsic::ctlz) {
320 if (getTLI()->isCheapToSpeculateCtlz())
321 return TargetTransformInfo::TCC_Basic;
322 return TargetTransformInfo::TCC_Expensive;
323 }
324
325 return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U);
326 }
327
328 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
329 unsigned &JumpTableSize,
330 ProfileSummaryInfo *PSI,
331 BlockFrequencyInfo *BFI) {
332 /// Try to find the estimated number of clusters. Note that the number of
333 /// clusters identified in this function could be different from the actual
334 /// numbers found in lowering. This function ignore switches that are
335 /// lowered with a mix of jump table / bit test / BTree. This function was
336 /// initially intended to be used when estimating the cost of switch in
337 /// inline cost heuristic, but it's a generic cost model to be used in other
338 /// places (e.g., in loop unrolling).
339 unsigned N = SI.getNumCases();
340 const TargetLoweringBase *TLI = getTLI();
341 const DataLayout &DL = this->getDataLayout();
342
343 JumpTableSize = 0;
344 bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
345
346 // Early exit if both a jump table and bit test are not allowed.
347 if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N))
348 return N;
349
350 APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
351 APInt MinCaseVal = MaxCaseVal;
352 for (auto CI : SI.cases()) {
353 const APInt &CaseVal = CI.getCaseValue()->getValue();
354 if (CaseVal.sgt(MaxCaseVal))
355 MaxCaseVal = CaseVal;
356 if (CaseVal.slt(MinCaseVal))
357 MinCaseVal = CaseVal;
358 }
359
360 // Check if suitable for a bit test
361 if (N <= DL.getIndexSizeInBits(0u)) {
362 SmallPtrSet<const BasicBlock *, 4> Dests;
363 for (auto I : SI.cases())
364 Dests.insert(I.getCaseSuccessor());
365
366 if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
367 DL))
368 return 1;
369 }
370
371 // Check if suitable for a jump table.
372 if (IsJTAllowed) {
373 if (N < 2 || N < TLI->getMinimumJumpTableEntries())
374 return N;
375 uint64_t Range =
376 (MaxCaseVal - MinCaseVal)
377 .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
378 // Check whether a range of clusters is dense enough for a jump table
379 if (TLI->isSuitableForJumpTable(&SI, N, Range, PSI, BFI)) {
380 JumpTableSize = Range;
381 return 1;
382 }
383 }
384 return N;
385 }
386
387 bool shouldBuildLookupTables() {
388 const TargetLoweringBase *TLI = getTLI();
389 return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
390 TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
391 }
392
393 bool haveFastSqrt(Type *Ty) {
394 const TargetLoweringBase *TLI = getTLI();
395 EVT VT = TLI->getValueType(DL, Ty);
396 return TLI->isTypeLegal(VT) &&
397 TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
398 }
399
400 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) {
401 return true;
402 }
403
404 unsigned getFPOpCost(Type *Ty) {
405 // Check whether FADD is available, as a proxy for floating-point in
406 // general.
407 const TargetLoweringBase *TLI = getTLI();
408 EVT VT = TLI->getValueType(DL, Ty);
409 if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT))
410 return TargetTransformInfo::TCC_Basic;
411 return TargetTransformInfo::TCC_Expensive;
412 }
413
414 unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
415 const TargetLoweringBase *TLI = getTLI();
416 switch (Opcode) {
43
Control jumps to the 'default' case at line 417
417 default: break;
44
Execution continues on line 434
418 case Instruction::Trunc:
419 if (TLI->isTruncateFree(OpTy, Ty))
420 return TargetTransformInfo::TCC_Free;
421 return TargetTransformInfo::TCC_Basic;
422 case Instruction::ZExt:
423 if (TLI->isZExtFree(OpTy, Ty))
424 return TargetTransformInfo::TCC_Free;
425 return TargetTransformInfo::TCC_Basic;
426
427 case Instruction::AddrSpaceCast:
428 if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(),
429 Ty->getPointerAddressSpace()))
430 return TargetTransformInfo::TCC_Free;
431 return TargetTransformInfo::TCC_Basic;
432 }
433
434 return BaseT::getOperationCost(Opcode, Ty, OpTy);
45
Passing null pointer value via 3rd parameter 'OpTy'
46
Calling 'TargetTransformInfoImplBase::getOperationCost'
435 }
436
437 unsigned getInliningThresholdMultiplier() { return 1; }
438
439 int getInlinerVectorBonusPercent() { return 150; }
440
441 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
442 TTI::UnrollingPreferences &UP) {
443 // This unrolling functionality is target independent, but to provide some
444 // motivation for its intended use, for x86:
445
446 // According to the Intel 64 and IA-32 Architectures Optimization Reference
447 // Manual, Intel Core models and later have a loop stream detector (and
448 // associated uop queue) that can benefit from partial unrolling.
449 // The relevant requirements are:
450 // - The loop must have no more than 4 (8 for Nehalem and later) branches
451 // taken, and none of them may be calls.
452 // - The loop can have no more than 18 (28 for Nehalem and later) uops.
453
454 // According to the Software Optimization Guide for AMD Family 15h
455 // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
456 // and loop buffer which can benefit from partial unrolling.
457 // The relevant requirements are:
458 // - The loop must have fewer than 16 branches
459 // - The loop must have less than 40 uops in all executed loop branches
460
461 // The number of taken branches in a loop is hard to estimate here, and
462 // benchmarking has revealed that it is better not to be conservative when
463 // estimating the branch count. As a result, we'll ignore the branch limits
464 // until someone finds a case where it matters in practice.
465
466 unsigned MaxOps;
467 const TargetSubtargetInfo *ST = getST();
468 if (PartialUnrollingThreshold.getNumOccurrences() > 0)
469 MaxOps = PartialUnrollingThreshold;
470 else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
471 MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
472 else
473 return;
474
475 // Scan the loop: don't unroll loops with calls.
476 for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E;
477 ++I) {
478 BasicBlock *BB = *I;
479
480 for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
481 if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
482 ImmutableCallSite CS(&*J);
483 if (const Function *F = CS.getCalledFunction()) {
484 if (!static_cast<T *>(this)->isLoweredToCall(F))
485 continue;
486 }
487
488 return;
489 }
490 }
491
492 // Enable runtime and partial unrolling up to the specified size.
493 // Enable using trip count upper bound to unroll loops.
494 UP.Partial = UP.Runtime = UP.UpperBound = true;
495 UP.PartialThreshold = MaxOps;
496
497 // Avoid unrolling when optimizing for size.
498 UP.OptSizeThreshold = 0;
499 UP.PartialOptSizeThreshold = 0;
500
501 // Set number of instructions optimized when "back edge"
502 // becomes "fall through" to default value of 2.
503 UP.BEInsns = 2;
504 }
505
506 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
507 AssumptionCache &AC,
508 TargetLibraryInfo *LibInfo,
509 HardwareLoopInfo &HWLoopInfo) {
510 return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
511 }
512
513 bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
514 AssumptionCache &AC, TargetLibraryInfo *TLI,
515 DominatorTree *DT,
516 const LoopAccessInfo *LAI) {
517 return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
518 }
519
520 int getInstructionLatency(const Instruction *I) {
521 if (isa<LoadInst>(I))
522 return getST()->getSchedModel().DefaultLoadLatency;
523
524 return BaseT::getInstructionLatency(I);
525 }
526
527 virtual Optional<unsigned>
528 getCacheSize(TargetTransformInfo::CacheLevel Level) const {
529 return Optional<unsigned>(
530 getST()->getCacheSize(static_cast<unsigned>(Level)));
531 }
532
533 virtual Optional<unsigned>
534 getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const {
535 Optional<unsigned> TargetResult =
536 getST()->getCacheAssociativity(static_cast<unsigned>(Level));
537
538 if (TargetResult)
539 return TargetResult;
540
541 return BaseT::getCacheAssociativity(Level);
542 }
543
544 virtual unsigned getCacheLineSize() const {
545 return getST()->getCacheLineSize();
546 }
547
548 virtual unsigned getPrefetchDistance() const {
549 return getST()->getPrefetchDistance();
550 }
551
552 virtual unsigned getMinPrefetchStride() const {
553 return getST()->getMinPrefetchStride();
554 }
555
556 virtual unsigned getMaxPrefetchIterationsAhead() const {
557 return getST()->getMaxPrefetchIterationsAhead();
558 }
559
560 /// @}
561
562 /// \name Vector TTI Implementations
563 /// @{
564
565 unsigned getRegisterBitWidth(bool Vector) const { return 32; }
566
567 /// Estimate the overhead of scalarizing an instruction. Insert and Extract
568 /// are set if the result needs to be inserted and/or extracted from vectors.
569 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
570 assert(Ty->isVectorTy() && "Can only scalarize vectors")((Ty->isVectorTy() && "Can only scalarize vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only scalarize vectors\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 570, __PRETTY_FUNCTION__))
;
571 unsigned Cost = 0;
572
573 for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
574 if (Insert)
575 Cost += static_cast<T *>(this)
576 ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
577 if (Extract)
578 Cost += static_cast<T *>(this)
579 ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
580 }
581
582 return Cost;
583 }
584
585 /// Estimate the overhead of scalarizing an instructions unique
586 /// non-constant operands. The types of the arguments are ordinarily
587 /// scalar, in which case the costs are multiplied with VF.
588 unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
589 unsigned VF) {
590 unsigned Cost = 0;
591 SmallPtrSet<const Value*, 4> UniqueOperands;
592 for (const Value *A : Args) {
593 if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
594 Type *VecTy = nullptr;
595 if (A->getType()->isVectorTy()) {
596 VecTy = A->getType();
597 // If A is a vector operand, VF should be 1 or correspond to A.
598 assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&(((VF == 1 || VF == VecTy->getVectorNumElements()) &&
"Vector argument does not match VF") ? static_cast<void>
(0) : __assert_fail ("(VF == 1 || VF == VecTy->getVectorNumElements()) && \"Vector argument does not match VF\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 599, __PRETTY_FUNCTION__))
599 "Vector argument does not match VF")(((VF == 1 || VF == VecTy->getVectorNumElements()) &&
"Vector argument does not match VF") ? static_cast<void>
(0) : __assert_fail ("(VF == 1 || VF == VecTy->getVectorNumElements()) && \"Vector argument does not match VF\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 599, __PRETTY_FUNCTION__))
;
600 }
601 else
602 VecTy = VectorType::get(A->getType(), VF);
603
604 Cost += getScalarizationOverhead(VecTy, false, true);
605 }
606 }
607
608 return Cost;
609 }
610
611 unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) {
612 assert(VecTy->isVectorTy())((VecTy->isVectorTy()) ? static_cast<void> (0) : __assert_fail
("VecTy->isVectorTy()", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 612, __PRETTY_FUNCTION__))
;
613
614 unsigned Cost = 0;
615
616 Cost += getScalarizationOverhead(VecTy, true, false);
617 if (!Args.empty())
618 Cost += getOperandsScalarizationOverhead(Args,
619 VecTy->getVectorNumElements());
620 else
621 // When no information on arguments is provided, we add the cost
622 // associated with one argument as a heuristic.
623 Cost += getScalarizationOverhead(VecTy, false, true);
624
625 return Cost;
626 }
627
628 unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
629
630 unsigned getArithmeticInstrCost(
631 unsigned Opcode, Type *Ty,
632 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
633 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
634 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
635 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
636 ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
637 const Instruction *CxtI = nullptr) {
638 // Check if any of the operands are vector operands.
639 const TargetLoweringBase *TLI = getTLI();
640 int ISD = TLI->InstructionOpcodeToISD(Opcode);
641 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 641, __PRETTY_FUNCTION__))
;
642
643 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
644
645 bool IsFloat = Ty->isFPOrFPVectorTy();
646 // Assume that floating point arithmetic operations cost twice as much as
647 // integer operations.
648 unsigned OpCost = (IsFloat ? 2 : 1);
649
650 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
651 // The operation is legal. Assume it costs 1.
652 // TODO: Once we have extract/insert subvector cost we need to use them.
653 return LT.first * OpCost;
654 }
655
656 if (!TLI->isOperationExpand(ISD, LT.second)) {
657 // If the operation is custom lowered, then assume that the code is twice
658 // as expensive.
659 return LT.first * 2 * OpCost;
660 }
661
662 // Else, assume that we need to scalarize this op.
663 // TODO: If one of the types get legalized by splitting, handle this
664 // similarly to what getCastInstrCost() does.
665 if (Ty->isVectorTy()) {
666 unsigned Num = Ty->getVectorNumElements();
667 unsigned Cost = static_cast<T *>(this)
668 ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
669 // Return the cost of multiple scalar invocation plus the cost of
670 // inserting and extracting the values.
671 return getScalarizationOverhead(Ty, Args) + Num * Cost;
672 }
673
674 // We don't know anything about this scalar instruction.
675 return OpCost;
676 }
677
678 unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
679 Type *SubTp) {
680 switch (Kind) {
681 case TTI::SK_Broadcast:
682 return getBroadcastShuffleOverhead(Tp);
683 case TTI::SK_Select:
684 case TTI::SK_Reverse:
685 case TTI::SK_Transpose:
686 case TTI::SK_PermuteSingleSrc:
687 case TTI::SK_PermuteTwoSrc:
688 return getPermuteShuffleOverhead(Tp);
689 case TTI::SK_ExtractSubvector:
690 return getExtractSubvectorOverhead(Tp, Index, SubTp);
691 case TTI::SK_InsertSubvector:
692 return getInsertSubvectorOverhead(Tp, Index, SubTp);
693 }
694 llvm_unreachable("Unknown TTI::ShuffleKind")::llvm::llvm_unreachable_internal("Unknown TTI::ShuffleKind",
"/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 694)
;
695 }
696
697 unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
698 const Instruction *I = nullptr) {
699 const TargetLoweringBase *TLI = getTLI();
700 int ISD = TLI->InstructionOpcodeToISD(Opcode);
701 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 701, __PRETTY_FUNCTION__))
;
702 std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
703 std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
704
705 // Check for NOOP conversions.
706 if (SrcLT.first == DstLT.first &&
707 SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
708
709 // Bitcast between types that are legalized to the same type are free.
710 if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
711 return 0;
712 }
713
714 if (Opcode == Instruction::Trunc &&
715 TLI->isTruncateFree(SrcLT.second, DstLT.second))
716 return 0;
717
718 if (Opcode == Instruction::ZExt &&
719 TLI->isZExtFree(SrcLT.second, DstLT.second))
720 return 0;
721
722 if (Opcode == Instruction::AddrSpaceCast &&
723 TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(),
724 Dst->getPointerAddressSpace()))
725 return 0;
726
727 // If this is a zext/sext of a load, return 0 if the corresponding
728 // extending load exists on target.
729 if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
730 I && isa<LoadInst>(I->getOperand(0))) {
731 EVT ExtVT = EVT::getEVT(Dst);
732 EVT LoadVT = EVT::getEVT(Src);
733 unsigned LType =
734 ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
735 if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
736 return 0;
737 }
738
739 // If the cast is marked as legal (or promote) then assume low cost.
740 if (SrcLT.first == DstLT.first &&
741 TLI->isOperationLegalOrPromote(ISD, DstLT.second))
742 return 1;
743
744 // Handle scalar conversions.
745 if (!Src->isVectorTy() && !Dst->isVectorTy()) {
746 // Scalar bitcasts are usually free.
747 if (Opcode == Instruction::BitCast)
748 return 0;
749
750 // Just check the op cost. If the operation is legal then assume it costs
751 // 1.
752 if (!TLI->isOperationExpand(ISD, DstLT.second))
753 return 1;
754
755 // Assume that illegal scalar instruction are expensive.
756 return 4;
757 }
758
759 // Check vector-to-vector casts.
760 if (Dst->isVectorTy() && Src->isVectorTy()) {
761 // If the cast is between same-sized registers, then the check is simple.
762 if (SrcLT.first == DstLT.first &&
763 SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
764
765 // Assume that Zext is done using AND.
766 if (Opcode == Instruction::ZExt)
767 return 1;
768
769 // Assume that sext is done using SHL and SRA.
770 if (Opcode == Instruction::SExt)
771 return 2;
772
773 // Just check the op cost. If the operation is legal then assume it
774 // costs
775 // 1 and multiply by the type-legalization overhead.
776 if (!TLI->isOperationExpand(ISD, DstLT.second))
777 return SrcLT.first * 1;
778 }
779
780 // If we are legalizing by splitting, query the concrete TTI for the cost
781 // of casting the original vector twice. We also need to factor in the
782 // cost of the split itself. Count that as 1, to be consistent with
783 // TLI->getTypeLegalizationCost().
784 if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
785 TargetLowering::TypeSplitVector ||
786 TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
787 TargetLowering::TypeSplitVector) &&
788 Src->getVectorNumElements() > 1 && Dst->getVectorNumElements() > 1) {
789 Type *SplitDst = VectorType::get(Dst->getVectorElementType(),
790 Dst->getVectorNumElements() / 2);
791 Type *SplitSrc = VectorType::get(Src->getVectorElementType(),
792 Src->getVectorNumElements() / 2);
793 T *TTI = static_cast<T *>(this);
794 return TTI->getVectorSplitCost() +
795 (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I));
796 }
797
798 // In other cases where the source or destination are illegal, assume
799 // the operation will get scalarized.
800 unsigned Num = Dst->getVectorNumElements();
801 unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
802 Opcode, Dst->getScalarType(), Src->getScalarType(), I);
803
804 // Return the cost of multiple scalar invocation plus the cost of
805 // inserting and extracting the values.
806 return getScalarizationOverhead(Dst, true, true) + Num * Cost;
807 }
808
809 // We already handled vector-to-vector and scalar-to-scalar conversions.
810 // This
811 // is where we handle bitcast between vectors and scalars. We need to assume
812 // that the conversion is scalarized in one way or another.
813 if (Opcode == Instruction::BitCast)
814 // Illegal bitcasts are done by storing and loading from a stack slot.
815 return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true)
816 : 0) +
817 (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false)
818 : 0);
819
820 llvm_unreachable("Unhandled cast")::llvm::llvm_unreachable_internal("Unhandled cast", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 820)
;
821 }
822
823 unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
824 VectorType *VecTy, unsigned Index) {
825 return static_cast<T *>(this)->getVectorInstrCost(
826 Instruction::ExtractElement, VecTy, Index) +
827 static_cast<T *>(this)->getCastInstrCost(Opcode, Dst,
828 VecTy->getElementType());
829 }
830
831 unsigned getCFInstrCost(unsigned Opcode) {
832 // Branches are assumed to be predicted.
833 return 0;
834 }
835
836 unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
837 const Instruction *I) {
838 const TargetLoweringBase *TLI = getTLI();
839 int ISD = TLI->InstructionOpcodeToISD(Opcode);
840 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 840, __PRETTY_FUNCTION__))
;
841
842 // Selects on vectors are actually vector selects.
843 if (ISD == ISD::SELECT) {
844 assert(CondTy && "CondTy must exist")((CondTy && "CondTy must exist") ? static_cast<void
> (0) : __assert_fail ("CondTy && \"CondTy must exist\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 844, __PRETTY_FUNCTION__))
;
845 if (CondTy->isVectorTy())
846 ISD = ISD::VSELECT;
847 }
848 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
849
850 if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
851 !TLI->isOperationExpand(ISD, LT.second)) {
852 // The operation is legal. Assume it costs 1. Multiply
853 // by the type-legalization overhead.
854 return LT.first * 1;
855 }
856
857 // Otherwise, assume that the cast is scalarized.
858 // TODO: If one of the types get legalized by splitting, handle this
859 // similarly to what getCastInstrCost() does.
860 if (ValTy->isVectorTy()) {
861 unsigned Num = ValTy->getVectorNumElements();
862 if (CondTy)
863 CondTy = CondTy->getScalarType();
864 unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
865 Opcode, ValTy->getScalarType(), CondTy, I);
866
867 // Return the cost of multiple scalar invocation plus the cost of
868 // inserting and extracting the values.
869 return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
870 }
871
872 // Unknown scalar opcode.
873 return 1;
874 }
875
876 unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
877 std::pair<unsigned, MVT> LT =
878 getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
879
880 return LT.first;
881 }
882
883 unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
884 unsigned AddressSpace,
885 const Instruction *I = nullptr) {
886 assert(!Src->isVoidTy() && "Invalid type")((!Src->isVoidTy() && "Invalid type") ? static_cast
<void> (0) : __assert_fail ("!Src->isVoidTy() && \"Invalid type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 886, __PRETTY_FUNCTION__))
;
887 std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
888
889 // Assuming that all loads of legal types cost 1.
890 unsigned Cost = LT.first;
891
892 if (Src->isVectorTy() &&
893 Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) {
894 // This is a vector load that legalizes to a larger type than the vector
895 // itself. Unless the corresponding extending load or truncating store is
896 // legal, then this will scalarize.
897 TargetLowering::LegalizeAction LA = TargetLowering::Expand;
898 EVT MemVT = getTLI()->getValueType(DL, Src);
899 if (Opcode == Instruction::Store)
900 LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
901 else
902 LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
903
904 if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
905 // This is a vector load/store for some illegal type that is scalarized.
906 // We must account for the cost of building or decomposing the vector.
907 Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
908 Opcode == Instruction::Store);
909 }
910 }
911
912 return Cost;
913 }
914
915 unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
916 unsigned Factor,
917 ArrayRef<unsigned> Indices,
918 unsigned Alignment, unsigned AddressSpace,
919 bool UseMaskForCond = false,
920 bool UseMaskForGaps = false) {
921 VectorType *VT = dyn_cast<VectorType>(VecTy);
922 assert(VT && "Expect a vector type for interleaved memory op")((VT && "Expect a vector type for interleaved memory op"
) ? static_cast<void> (0) : __assert_fail ("VT && \"Expect a vector type for interleaved memory op\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 922, __PRETTY_FUNCTION__))
;
923
924 unsigned NumElts = VT->getNumElements();
925 assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor")((Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor"
) ? static_cast<void> (0) : __assert_fail ("Factor > 1 && NumElts % Factor == 0 && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 925, __PRETTY_FUNCTION__))
;
926
927 unsigned NumSubElts = NumElts / Factor;
928 VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
929
930 // Firstly, the cost of load/store operation.
931 unsigned Cost;
932 if (UseMaskForCond || UseMaskForGaps)
933 Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
934 Opcode, VecTy, Alignment, AddressSpace);
935 else
936 Cost = static_cast<T *>(this)->getMemoryOpCost(
937 Opcode, VecTy, MaybeAlign(Alignment), AddressSpace);
938
939 // Legalize the vector type, and get the legalized and unlegalized type
940 // sizes.
941 MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
942 unsigned VecTySize =
943 static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy);
944 unsigned VecTyLTSize = VecTyLT.getStoreSize();
945
946 // Return the ceiling of dividing A by B.
947 auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
948
949 // Scale the cost of the memory operation by the fraction of legalized
950 // instructions that will actually be used. We shouldn't account for the
951 // cost of dead instructions since they will be removed.
952 //
953 // E.g., An interleaved load of factor 8:
954 // %vec = load <16 x i64>, <16 x i64>* %ptr
955 // %v0 = shufflevector %vec, undef, <0, 8>
956 //
957 // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
958 // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
959 // type). The other loads are unused.
960 //
961 // We only scale the cost of loads since interleaved store groups aren't
962 // allowed to have gaps.
963 if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) {
964 // The number of loads of a legal type it will take to represent a load
965 // of the unlegalized vector type.
966 unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize);
967
968 // The number of elements of the unlegalized type that correspond to a
969 // single legal instruction.
970 unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts);
971
972 // Determine which legal instructions will be used.
973 BitVector UsedInsts(NumLegalInsts, false);
974 for (unsigned Index : Indices)
975 for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
976 UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
977
978 // Scale the cost of the load by the fraction of legal instructions that
979 // will be used.
980 Cost *= UsedInsts.count() / NumLegalInsts;
981 }
982
983 // Then plus the cost of interleave operation.
984 if (Opcode == Instruction::Load) {
985 // The interleave cost is similar to extract sub vectors' elements
986 // from the wide vector, and insert them into sub vectors.
987 //
988 // E.g. An interleaved load of factor 2 (with one member of index 0):
989 // %vec = load <8 x i32>, <8 x i32>* %ptr
990 // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
991 // The cost is estimated as extract elements at 0, 2, 4, 6 from the
992 // <8 x i32> vector and insert them into a <4 x i32> vector.
993
994 assert(Indices.size() <= Factor &&((Indices.size() <= Factor && "Interleaved memory op has too many members"
) ? static_cast<void> (0) : __assert_fail ("Indices.size() <= Factor && \"Interleaved memory op has too many members\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 995, __PRETTY_FUNCTION__))
995 "Interleaved memory op has too many members")((Indices.size() <= Factor && "Interleaved memory op has too many members"
) ? static_cast<void> (0) : __assert_fail ("Indices.size() <= Factor && \"Interleaved memory op has too many members\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 995, __PRETTY_FUNCTION__))
;
996
997 for (unsigned Index : Indices) {
998 assert(Index < Factor && "Invalid index for interleaved memory op")((Index < Factor && "Invalid index for interleaved memory op"
) ? static_cast<void> (0) : __assert_fail ("Index < Factor && \"Invalid index for interleaved memory op\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 998, __PRETTY_FUNCTION__))
;
999
1000 // Extract elements from loaded vector for each sub vector.
1001 for (unsigned i = 0; i < NumSubElts; i++)
1002 Cost += static_cast<T *>(this)->getVectorInstrCost(
1003 Instruction::ExtractElement, VT, Index + i * Factor);
1004 }
1005
1006 unsigned InsSubCost = 0;
1007 for (unsigned i = 0; i < NumSubElts; i++)
1008 InsSubCost += static_cast<T *>(this)->getVectorInstrCost(
1009 Instruction::InsertElement, SubVT, i);
1010
1011 Cost += Indices.size() * InsSubCost;
1012 } else {
1013 // The interleave cost is extract all elements from sub vectors, and
1014 // insert them into the wide vector.
1015 //
1016 // E.g. An interleaved store of factor 2:
1017 // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
1018 // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
1019 // The cost is estimated as extract all elements from both <4 x i32>
1020 // vectors and insert into the <8 x i32> vector.
1021
1022 unsigned ExtSubCost = 0;
1023 for (unsigned i = 0; i < NumSubElts; i++)
1024 ExtSubCost += static_cast<T *>(this)->getVectorInstrCost(
1025 Instruction::ExtractElement, SubVT, i);
1026 Cost += ExtSubCost * Factor;
1027
1028 for (unsigned i = 0; i < NumElts; i++)
1029 Cost += static_cast<T *>(this)
1030 ->getVectorInstrCost(Instruction::InsertElement, VT, i);
1031 }
1032
1033 if (!UseMaskForCond)
1034 return Cost;
1035
1036 Type *I8Type = Type::getInt8Ty(VT->getContext());
1037 VectorType *MaskVT = VectorType::get(I8Type, NumElts);
1038 SubVT = VectorType::get(I8Type, NumSubElts);
1039
1040 // The Mask shuffling cost is extract all the elements of the Mask
1041 // and insert each of them Factor times into the wide vector:
1042 //
1043 // E.g. an interleaved group with factor 3:
1044 // %mask = icmp ult <8 x i32> %vec1, %vec2
1045 // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
1046 // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
1047 // The cost is estimated as extract all mask elements from the <8xi1> mask
1048 // vector and insert them factor times into the <24xi1> shuffled mask
1049 // vector.
1050 for (unsigned i = 0; i < NumSubElts; i++)
1051 Cost += static_cast<T *>(this)->getVectorInstrCost(
1052 Instruction::ExtractElement, SubVT, i);
1053
1054 for (unsigned i = 0; i < NumElts; i++)
1055 Cost += static_cast<T *>(this)->getVectorInstrCost(
1056 Instruction::InsertElement, MaskVT, i);
1057
1058 // The Gaps mask is invariant and created outside the loop, therefore the
1059 // cost of creating it is not accounted for here. However if we have both
1060 // a MaskForGaps and some other mask that guards the execution of the
1061 // memory access, we need to account for the cost of And-ing the two masks
1062 // inside the loop.
1063 if (UseMaskForGaps)
1064 Cost += static_cast<T *>(this)->getArithmeticInstrCost(
1065 BinaryOperator::And, MaskVT);
1066
1067 return Cost;
1068 }
1069
1070 /// Get intrinsic cost based on arguments.
1071 unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
1072 ArrayRef<Value *> Args, FastMathFlags FMF,
1073 unsigned VF = 1) {
1074 unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
1075 assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type")(((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type"
) ? static_cast<void> (0) : __assert_fail ("(RetVF == 1 || VF == 1) && \"VF > 1 and RetVF is a vector type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1075, __PRETTY_FUNCTION__))
;
1076 auto *ConcreteTTI = static_cast<T *>(this);
1077
1078 switch (IID) {
1079 default: {
1080 // Assume that we need to scalarize this intrinsic.
1081 SmallVector<Type *, 4> Types;
1082 for (Value *Op : Args) {
1083 Type *OpTy = Op->getType();
1084 assert(VF == 1 || !OpTy->isVectorTy())((VF == 1 || !OpTy->isVectorTy()) ? static_cast<void>
(0) : __assert_fail ("VF == 1 || !OpTy->isVectorTy()", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1084, __PRETTY_FUNCTION__))
;
1085 Types.push_back(VF == 1 ? OpTy : VectorType::get(OpTy, VF));
1086 }
1087
1088 if (VF > 1 && !RetTy->isVoidTy())
1089 RetTy = VectorType::get(RetTy, VF);
1090
1091 // Compute the scalarization overhead based on Args for a vector
1092 // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
1093 // CostModel will pass a vector RetTy and VF is 1.
1094 unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
1095 if (RetVF > 1 || VF > 1) {
1096 ScalarizationCost = 0;
1097 if (!RetTy->isVoidTy())
1098 ScalarizationCost += getScalarizationOverhead(RetTy, true, false);
1099 ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
1100 }
1101
1102 return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF,
1103 ScalarizationCost);
1104 }
1105 case Intrinsic::masked_scatter: {
1106 assert(VF == 1 && "Can't vectorize types here.")((VF == 1 && "Can't vectorize types here.") ? static_cast
<void> (0) : __assert_fail ("VF == 1 && \"Can't vectorize types here.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1106, __PRETTY_FUNCTION__))
;
1107 Value *Mask = Args[3];
1108 bool VarMask = !isa<Constant>(Mask);
1109 unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
1110 return ConcreteTTI->getGatherScatterOpCost(
1111 Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment);
1112 }
1113 case Intrinsic::masked_gather: {
1114 assert(VF == 1 && "Can't vectorize types here.")((VF == 1 && "Can't vectorize types here.") ? static_cast
<void> (0) : __assert_fail ("VF == 1 && \"Can't vectorize types here.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1114, __PRETTY_FUNCTION__))
;
1115 Value *Mask = Args[2];
1116 bool VarMask = !isa<Constant>(Mask);
1117 unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
1118 return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy,
1119 Args[0], VarMask, Alignment);
1120 }
1121 case Intrinsic::experimental_vector_reduce_add:
1122 case Intrinsic::experimental_vector_reduce_mul:
1123 case Intrinsic::experimental_vector_reduce_and:
1124 case Intrinsic::experimental_vector_reduce_or:
1125 case Intrinsic::experimental_vector_reduce_xor:
1126 case Intrinsic::experimental_vector_reduce_v2_fadd:
1127 case Intrinsic::experimental_vector_reduce_v2_fmul:
1128 case Intrinsic::experimental_vector_reduce_smax:
1129 case Intrinsic::experimental_vector_reduce_smin:
1130 case Intrinsic::experimental_vector_reduce_fmax:
1131 case Intrinsic::experimental_vector_reduce_fmin:
1132 case Intrinsic::experimental_vector_reduce_umax:
1133 case Intrinsic::experimental_vector_reduce_umin:
1134 return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF);
1135 case Intrinsic::fshl:
1136 case Intrinsic::fshr: {
1137 Value *X = Args[0];
1138 Value *Y = Args[1];
1139 Value *Z = Args[2];
1140 TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW;
1141 TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX);
1142 TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY);
1143 TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ);
1144 TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue;
1145 OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
1146 : TTI::OP_None;
1147 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
1148 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
1149 unsigned Cost = 0;
1150 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy);
1151 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy);
1152 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy,
1153 OpKindX, OpKindZ, OpPropsX);
1154 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
1155 OpKindY, OpKindZ, OpPropsY);
1156 // Non-constant shift amounts requires a modulo.
1157 if (OpKindZ != TTI::OK_UniformConstantValue &&
1158 OpKindZ != TTI::OK_NonUniformConstantValue)
1159 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1160 OpKindZ, OpKindBW, OpPropsZ,
1161 OpPropsBW);
1162 // For non-rotates (X != Y) we must add shift-by-zero handling costs.
1163 if (X != Y) {
1164 Type *CondTy = RetTy->getWithNewBitWidth(1);
1165 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1166 CondTy, nullptr);
1167 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1168 CondTy, nullptr);
1169 }
1170 return Cost;
1171 }
1172 }
1173 }
1174
1175 /// Get intrinsic cost based on argument types.
1176 /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the
1177 /// cost of scalarizing the arguments and the return value will be computed
1178 /// based on types.
1179 unsigned getIntrinsicInstrCost(
1180 Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
1181 unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
1182 auto *ConcreteTTI = static_cast<T *>(this);
1183
1184 SmallVector<unsigned, 2> ISDs;
1185 unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
1186 switch (IID) {
1187 default: {
1188 // Assume that we need to scalarize this intrinsic.
1189 unsigned ScalarizationCost = ScalarizationCostPassed;
1190 unsigned ScalarCalls = 1;
1191 Type *ScalarRetTy = RetTy;
1192 if (RetTy->isVectorTy()) {
1193 if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1194 ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
1195 ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
1196 ScalarRetTy = RetTy->getScalarType();
1197 }
1198 SmallVector<Type *, 4> ScalarTys;
1199 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1200 Type *Ty = Tys[i];
1201 if (Ty->isVectorTy()) {
1202 if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1203 ScalarizationCost += getScalarizationOverhead(Ty, false, true);
1204 ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
1205 Ty = Ty->getScalarType();
1206 }
1207 ScalarTys.push_back(Ty);
1208 }
1209 if (ScalarCalls == 1)
1210 return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
1211
1212 unsigned ScalarCost =
1213 ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF);
1214
1215 return ScalarCalls * ScalarCost + ScalarizationCost;
1216 }
1217 // Look for intrinsics that can be lowered directly or turned into a scalar
1218 // intrinsic call.
1219 case Intrinsic::sqrt:
1220 ISDs.push_back(ISD::FSQRT);
1221 break;
1222 case Intrinsic::sin:
1223 ISDs.push_back(ISD::FSIN);
1224 break;
1225 case Intrinsic::cos:
1226 ISDs.push_back(ISD::FCOS);
1227 break;
1228 case Intrinsic::exp:
1229 ISDs.push_back(ISD::FEXP);
1230 break;
1231 case Intrinsic::exp2:
1232 ISDs.push_back(ISD::FEXP2);
1233 break;
1234 case Intrinsic::log:
1235 ISDs.push_back(ISD::FLOG);
1236 break;
1237 case Intrinsic::log10:
1238 ISDs.push_back(ISD::FLOG10);
1239 break;
1240 case Intrinsic::log2:
1241 ISDs.push_back(ISD::FLOG2);
1242 break;
1243 case Intrinsic::fabs:
1244 ISDs.push_back(ISD::FABS);
1245 break;
1246 case Intrinsic::canonicalize:
1247 ISDs.push_back(ISD::FCANONICALIZE);
1248 break;
1249 case Intrinsic::minnum:
1250 ISDs.push_back(ISD::FMINNUM);
1251 if (FMF.noNaNs())
1252 ISDs.push_back(ISD::FMINIMUM);
1253 break;
1254 case Intrinsic::maxnum:
1255 ISDs.push_back(ISD::FMAXNUM);
1256 if (FMF.noNaNs())
1257 ISDs.push_back(ISD::FMAXIMUM);
1258 break;
1259 case Intrinsic::copysign:
1260 ISDs.push_back(ISD::FCOPYSIGN);
1261 break;
1262 case Intrinsic::floor:
1263 ISDs.push_back(ISD::FFLOOR);
1264 break;
1265 case Intrinsic::ceil:
1266 ISDs.push_back(ISD::FCEIL);
1267 break;
1268 case Intrinsic::trunc:
1269 ISDs.push_back(ISD::FTRUNC);
1270 break;
1271 case Intrinsic::nearbyint:
1272 ISDs.push_back(ISD::FNEARBYINT);
1273 break;
1274 case Intrinsic::rint:
1275 ISDs.push_back(ISD::FRINT);
1276 break;
1277 case Intrinsic::round:
1278 ISDs.push_back(ISD::FROUND);
1279 break;
1280 case Intrinsic::pow:
1281 ISDs.push_back(ISD::FPOW);
1282 break;
1283 case Intrinsic::fma:
1284 ISDs.push_back(ISD::FMA);
1285 break;
1286 case Intrinsic::fmuladd:
1287 ISDs.push_back(ISD::FMA);
1288 break;
1289 // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1290 case Intrinsic::lifetime_start:
1291 case Intrinsic::lifetime_end:
1292 case Intrinsic::sideeffect:
1293 return 0;
1294 case Intrinsic::masked_store:
1295 return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0,
1296 0);
1297 case Intrinsic::masked_load:
1298 return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
1299 case Intrinsic::experimental_vector_reduce_add:
1300 return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0],
1301 /*IsPairwiseForm=*/false);
1302 case Intrinsic::experimental_vector_reduce_mul:
1303 return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0],
1304 /*IsPairwiseForm=*/false);
1305 case Intrinsic::experimental_vector_reduce_and:
1306 return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0],
1307 /*IsPairwiseForm=*/false);
1308 case Intrinsic::experimental_vector_reduce_or:
1309 return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0],
1310 /*IsPairwiseForm=*/false);
1311 case Intrinsic::experimental_vector_reduce_xor:
1312 return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0],
1313 /*IsPairwiseForm=*/false);
1314 case Intrinsic::experimental_vector_reduce_v2_fadd:
1315 return ConcreteTTI->getArithmeticReductionCost(
1316 Instruction::FAdd, Tys[0],
1317 /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1318 // reductions.
1319 case Intrinsic::experimental_vector_reduce_v2_fmul:
1320 return ConcreteTTI->getArithmeticReductionCost(
1321 Instruction::FMul, Tys[0],
1322 /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1323 // reductions.
1324 case Intrinsic::experimental_vector_reduce_smax:
1325 case Intrinsic::experimental_vector_reduce_smin:
1326 case Intrinsic::experimental_vector_reduce_fmax:
1327 case Intrinsic::experimental_vector_reduce_fmin:
1328 return ConcreteTTI->getMinMaxReductionCost(
1329 Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1330 /*IsUnsigned=*/true);
1331 case Intrinsic::experimental_vector_reduce_umax:
1332 case Intrinsic::experimental_vector_reduce_umin:
1333 return ConcreteTTI->getMinMaxReductionCost(
1334 Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1335 /*IsUnsigned=*/false);
1336 case Intrinsic::sadd_sat:
1337 case Intrinsic::ssub_sat: {
1338 Type *CondTy = RetTy->getWithNewBitWidth(1);
1339
1340 Type *OpTy = StructType::create({RetTy, CondTy});
1341 Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
1342 ? Intrinsic::sadd_with_overflow
1343 : Intrinsic::ssub_with_overflow;
1344
1345 // SatMax -> Overflow && SumDiff < 0
1346 // SatMin -> Overflow && SumDiff >= 0
1347 unsigned Cost = 0;
1348 Cost += ConcreteTTI->getIntrinsicInstrCost(
1349 OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1350 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1351 CondTy, nullptr);
1352 Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1353 CondTy, nullptr);
1354 return Cost;
1355 }
1356 case Intrinsic::uadd_sat:
1357 case Intrinsic::usub_sat: {
1358 Type *CondTy = RetTy->getWithNewBitWidth(1);
1359
1360 Type *OpTy = StructType::create({RetTy, CondTy});
1361 Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
1362 ? Intrinsic::uadd_with_overflow
1363 : Intrinsic::usub_with_overflow;
1364
1365 unsigned Cost = 0;
1366 Cost += ConcreteTTI->getIntrinsicInstrCost(
1367 OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1368 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1369 CondTy, nullptr);
1370 return Cost;
1371 }
1372 case Intrinsic::smul_fix:
1373 case Intrinsic::umul_fix: {
1374 unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
1375 Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
1376
1377 unsigned ExtOp =
1378 IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1379
1380 unsigned Cost = 0;
1381 Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy);
1382 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1383 Cost +=
1384 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy);
1385 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy,
1386 TTI::OK_AnyValue,
1387 TTI::OK_UniformConstantValue);
1388 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy,
1389 TTI::OK_AnyValue,
1390 TTI::OK_UniformConstantValue);
1391 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy);
1392 return Cost;
1393 }
1394 case Intrinsic::sadd_with_overflow:
1395 case Intrinsic::ssub_with_overflow: {
1396 Type *SumTy = RetTy->getContainedType(0);
1397 Type *OverflowTy = RetTy->getContainedType(1);
1398 unsigned Opcode = IID == Intrinsic::sadd_with_overflow
1399 ? BinaryOperator::Add
1400 : BinaryOperator::Sub;
1401
1402 // LHSSign -> LHS >= 0
1403 // RHSSign -> RHS >= 0
1404 // SumSign -> Sum >= 0
1405 //
1406 // Add:
1407 // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
1408 // Sub:
1409 // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
1410 unsigned Cost = 0;
1411 Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1412 Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1413 OverflowTy, nullptr);
1414 Cost += 2 * ConcreteTTI->getCmpSelInstrCost(
1415 BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr);
1416 Cost +=
1417 ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy);
1418 return Cost;
1419 }
1420 case Intrinsic::uadd_with_overflow:
1421 case Intrinsic::usub_with_overflow: {
1422 Type *SumTy = RetTy->getContainedType(0);
1423 Type *OverflowTy = RetTy->getContainedType(1);
1424 unsigned Opcode = IID == Intrinsic::uadd_with_overflow
1425 ? BinaryOperator::Add
1426 : BinaryOperator::Sub;
1427
1428 unsigned Cost = 0;
1429 Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1430 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1431 OverflowTy, nullptr);
1432 return Cost;
1433 }
1434 case Intrinsic::smul_with_overflow:
1435 case Intrinsic::umul_with_overflow: {
1436 Type *MulTy = RetTy->getContainedType(0);
1437 Type *OverflowTy = RetTy->getContainedType(1);
1438 unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
1439 Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
1440
1441 unsigned ExtOp =
1442 IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1443
1444 unsigned Cost = 0;
1445 Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy);
1446 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1447 Cost +=
1448 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy);
1449 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy,
1450 TTI::OK_AnyValue,
1451 TTI::OK_UniformConstantValue);
1452
1453 if (IID == Intrinsic::smul_with_overflow)
1454 Cost += ConcreteTTI->getArithmeticInstrCost(
1455 Instruction::AShr, MulTy, TTI::OK_AnyValue,
1456 TTI::OK_UniformConstantValue);
1457
1458 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy,
1459 OverflowTy, nullptr);
1460 return Cost;
1461 }
1462 case Intrinsic::ctpop:
1463 ISDs.push_back(ISD::CTPOP);
1464 // In case of legalization use TCC_Expensive. This is cheaper than a
1465 // library call but still not a cheap instruction.
1466 SingleCallCost = TargetTransformInfo::TCC_Expensive;
1467 break;
1468 // FIXME: ctlz, cttz, ...
1469 }
1470
1471 const TargetLoweringBase *TLI = getTLI();
1472 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
1473
1474 SmallVector<unsigned, 2> LegalCost;
1475 SmallVector<unsigned, 2> CustomCost;
1476 for (unsigned ISD : ISDs) {
1477 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
1478 if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
1479 TLI->isFAbsFree(LT.second)) {
1480 return 0;
1481 }
1482
1483 // The operation is legal. Assume it costs 1.
1484 // If the type is split to multiple registers, assume that there is some
1485 // overhead to this.
1486 // TODO: Once we have extract/insert subvector cost we need to use them.
1487 if (LT.first > 1)
1488 LegalCost.push_back(LT.first * 2);
1489 else
1490 LegalCost.push_back(LT.first * 1);
1491 } else if (!TLI->isOperationExpand(ISD, LT.second)) {
1492 // If the operation is custom lowered then assume
1493 // that the code is twice as expensive.
1494 CustomCost.push_back(LT.first * 2);
1495 }
1496 }
1497
1498 auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
1499 if (MinLegalCostI != LegalCost.end())
1500 return *MinLegalCostI;
1501
1502 auto MinCustomCostI =
1503 std::min_element(CustomCost.begin(), CustomCost.end());
1504 if (MinCustomCostI != CustomCost.end())
1505 return *MinCustomCostI;
1506
1507 // If we can't lower fmuladd into an FMA estimate the cost as a floating
1508 // point mul followed by an add.
1509 if (IID == Intrinsic::fmuladd)
1510 return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
1511 ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
1512
1513 // Else, assume that we need to scalarize this intrinsic. For math builtins
1514 // this will emit a costly libcall, adding call overhead and spills. Make it
1515 // very expensive.
1516 if (RetTy->isVectorTy()) {
1517 unsigned ScalarizationCost =
1518 ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max())
1519 ? ScalarizationCostPassed
1520 : getScalarizationOverhead(RetTy, true, false));
1521 unsigned ScalarCalls = RetTy->getVectorNumElements();
1522 SmallVector<Type *, 4> ScalarTys;
1523 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1524 Type *Ty = Tys[i];
1525 if (Ty->isVectorTy())
1526 Ty = Ty->getScalarType();
1527 ScalarTys.push_back(Ty);
1528 }
1529 unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost(
1530 IID, RetTy->getScalarType(), ScalarTys, FMF);
1531 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1532 if (Tys[i]->isVectorTy()) {
1533 if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1534 ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
1535 ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
1536 }
1537 }
1538
1539 return ScalarCalls * ScalarCost + ScalarizationCost;
1540 }
1541
1542 // This is going to be turned into a library call, make it expensive.
1543 return SingleCallCost;
1544 }
1545
1546 /// Compute a cost of the given call instruction.
1547 ///
1548 /// Compute the cost of calling function F with return type RetTy and
1549 /// argument types Tys. F might be nullptr, in this case the cost of an
1550 /// arbitrary call with the specified signature will be returned.
1551 /// This is used, for instance, when we estimate call of a vector
1552 /// counterpart of the given function.
1553 /// \param F Called function, might be nullptr.
1554 /// \param RetTy Return value types.
1555 /// \param Tys Argument types.
1556 /// \returns The cost of Call instruction.
1557 unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) {
1558 return 10;
1559 }
1560
1561 unsigned getNumberOfParts(Type *Tp) {
1562 std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
1563 return LT.first;
1564 }
1565
1566 unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *,
1567 const SCEV *) {
1568 return 0;
1569 }
1570
1571 /// Try to calculate arithmetic and shuffle op costs for reduction operations.
1572 /// We're assuming that reduction operation are performing the following way:
1573 /// 1. Non-pairwise reduction
1574 /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1575 /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
1576 /// \----------------v-------------/ \----------v------------/
1577 /// n/2 elements n/2 elements
1578 /// %red1 = op <n x t> %val, <n x t> val1
1579 /// After this operation we have a vector %red1 where only the first n/2
1580 /// elements are meaningful, the second n/2 elements are undefined and can be
1581 /// dropped. All other operations are actually working with the vector of
1582 /// length n/2, not n, though the real vector length is still n.
1583 /// %val2 = shufflevector<n x t> %red1, <n x t> %undef,
1584 /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
1585 /// \----------------v-------------/ \----------v------------/
1586 /// n/4 elements 3*n/4 elements
1587 /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of
1588 /// length n/2, the resulting vector has length n/4 etc.
1589 /// 2. Pairwise reduction:
1590 /// Everything is the same except for an additional shuffle operation which
1591 /// is used to produce operands for pairwise kind of reductions.
1592 /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1593 /// <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef>
1594 /// \-------------v----------/ \----------v------------/
1595 /// n/2 elements n/2 elements
1596 /// %val2 = shufflevector<n x t> %val, <n x t> %undef,
1597 /// <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef>
1598 /// \-------------v----------/ \----------v------------/
1599 /// n/2 elements n/2 elements
1600 /// %red1 = op <n x t> %val1, <n x t> val2
1601 /// Again, the operation is performed on <n x t> vector, but the resulting
1602 /// vector %red1 is <n/2 x t> vector.
1603 ///
1604 /// The cost model should take into account that the actual length of the
1605 /// vector is reduced on each iteration.
1606 unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1607 bool IsPairwise) {
1608 assert(Ty->isVectorTy() && "Expect a vector type")((Ty->isVectorTy() && "Expect a vector type") ? static_cast
<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1608, __PRETTY_FUNCTION__))
;
1609 Type *ScalarTy = Ty->getVectorElementType();
1610 unsigned NumVecElts = Ty->getVectorNumElements();
1611 unsigned NumReduxLevels = Log2_32(NumVecElts);
1612 unsigned ArithCost = 0;
1613 unsigned ShuffleCost = 0;
1614 auto *ConcreteTTI = static_cast<T *>(this);
1615 std::pair<unsigned, MVT> LT =
1616 ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1617 unsigned LongVectorCount = 0;
1618 unsigned MVTLen =
1619 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1620 while (NumVecElts > MVTLen) {
1621 NumVecElts /= 2;
1622 Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1623 // Assume the pairwise shuffles add a cost.
1624 ShuffleCost += (IsPairwise + 1) *
1625 ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1626 NumVecElts, SubTy);
1627 ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy);
1628 Ty = SubTy;
1629 ++LongVectorCount;
1630 }
1631
1632 NumReduxLevels -= LongVectorCount;
1633
1634 // The minimal length of the vector is limited by the real length of vector
1635 // operations performed on the current platform. That's why several final
1636 // reduction operations are performed on the vectors with the same
1637 // architecture-dependent length.
1638
1639 // Non pairwise reductions need one shuffle per reduction level. Pairwise
1640 // reductions need two shuffles on every level, but the last one. On that
1641 // level one of the shuffles is <0, u, u, ...> which is identity.
1642 unsigned NumShuffles = NumReduxLevels;
1643 if (IsPairwise && NumReduxLevels >= 1)
1644 NumShuffles += NumReduxLevels - 1;
1645 ShuffleCost += NumShuffles *
1646 ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1647 0, Ty);
1648 ArithCost += NumReduxLevels *
1649 ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
1650 return ShuffleCost + ArithCost +
1651 ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1652 }
1653
1654 /// Try to calculate op costs for min/max reduction operations.
1655 /// \param CondTy Conditional type for the Select instruction.
1656 unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise,
1657 bool) {
1658 assert(Ty->isVectorTy() && "Expect a vector type")((Ty->isVectorTy() && "Expect a vector type") ? static_cast
<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1658, __PRETTY_FUNCTION__))
;
1659 Type *ScalarTy = Ty->getVectorElementType();
1660 Type *ScalarCondTy = CondTy->getVectorElementType();
1661 unsigned NumVecElts = Ty->getVectorNumElements();
1662 unsigned NumReduxLevels = Log2_32(NumVecElts);
1663 unsigned CmpOpcode;
1664 if (Ty->isFPOrFPVectorTy()) {
1665 CmpOpcode = Instruction::FCmp;
1666 } else {
1667 assert(Ty->isIntOrIntVectorTy() &&((Ty->isIntOrIntVectorTy() && "expecting floating point or integer type for min/max reduction"
) ? static_cast<void> (0) : __assert_fail ("Ty->isIntOrIntVectorTy() && \"expecting floating point or integer type for min/max reduction\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1668, __PRETTY_FUNCTION__))
1668 "expecting floating point or integer type for min/max reduction")((Ty->isIntOrIntVectorTy() && "expecting floating point or integer type for min/max reduction"
) ? static_cast<void> (0) : __assert_fail ("Ty->isIntOrIntVectorTy() && \"expecting floating point or integer type for min/max reduction\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1668, __PRETTY_FUNCTION__))
;
1669 CmpOpcode = Instruction::ICmp;
1670 }
1671 unsigned MinMaxCost = 0;
1672 unsigned ShuffleCost = 0;
1673 auto *ConcreteTTI = static_cast<T *>(this);
1674 std::pair<unsigned, MVT> LT =
1675 ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1676 unsigned LongVectorCount = 0;
1677 unsigned MVTLen =
1678 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1679 while (NumVecElts > MVTLen) {
1680 NumVecElts /= 2;
1681 Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1682 CondTy = VectorType::get(ScalarCondTy, NumVecElts);
1683
1684 // Assume the pairwise shuffles add a cost.
1685 ShuffleCost += (IsPairwise + 1) *
1686 ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1687 NumVecElts, SubTy);
1688 MinMaxCost +=
1689 ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) +
1690 ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
1691 nullptr);
1692 Ty = SubTy;
1693 ++LongVectorCount;
1694 }
1695
1696 NumReduxLevels -= LongVectorCount;
1697
1698 // The minimal length of the vector is limited by the real length of vector
1699 // operations performed on the current platform. That's why several final
1700 // reduction opertions are perfomed on the vectors with the same
1701 // architecture-dependent length.
1702
1703 // Non pairwise reductions need one shuffle per reduction level. Pairwise
1704 // reductions need two shuffles on every level, but the last one. On that
1705 // level one of the shuffles is <0, u, u, ...> which is identity.
1706 unsigned NumShuffles = NumReduxLevels;
1707 if (IsPairwise && NumReduxLevels >= 1)
1708 NumShuffles += NumReduxLevels - 1;
1709 ShuffleCost += NumShuffles *
1710 ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1711 0, Ty);
1712 MinMaxCost +=
1713 NumReduxLevels *
1714 (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
1715 ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
1716 nullptr));
1717 // The last min/max should be in vector registers and we counted it above.
1718 // So just need a single extractelement.
1719 return ShuffleCost + MinMaxCost +
1720 ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1721 }
1722
1723 unsigned getVectorSplitCost() { return 1; }
1724
1725 /// @}
1726};
1727
1728/// Concrete BasicTTIImpl that can be used if no further customization
1729/// is needed.
1730class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
1731 using BaseT = BasicTTIImplBase<BasicTTIImpl>;
1732
1733 friend class BasicTTIImplBase<BasicTTIImpl>;
1734
1735 const TargetSubtargetInfo *ST;
1736 const TargetLoweringBase *TLI;
1737
1738 const TargetSubtargetInfo *getST() const { return ST; }
1739 const TargetLoweringBase *getTLI() const { return TLI; }
1740
1741public:
1742 explicit BasicTTIImpl(const TargetMachine *TM, const Function &F);
1743};
1744
1745} // end namespace llvm
1746
1747#endif // LLVM_CODEGEN_BASICTTIIMPL_H