LLVM 19.0.0git
NVPTXTargetTransformInfo.cpp
Go to the documentation of this file.
1//===-- NVPTXTargetTransformInfo.cpp - NVPTX specific TTI -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "NVPTXUtilities.h"
17#include "llvm/IR/IntrinsicsNVPTX.h"
18#include "llvm/Support/Debug.h"
19#include <optional>
20using namespace llvm;
21
22#define DEBUG_TYPE "NVPTXtti"
23
24// Whether the given intrinsic reads threadIdx.x/y/z.
25static bool readsThreadIndex(const IntrinsicInst *II) {
26 switch (II->getIntrinsicID()) {
27 default: return false;
28 case Intrinsic::nvvm_read_ptx_sreg_tid_x:
29 case Intrinsic::nvvm_read_ptx_sreg_tid_y:
30 case Intrinsic::nvvm_read_ptx_sreg_tid_z:
31 return true;
32 }
33}
34
35static bool readsLaneId(const IntrinsicInst *II) {
36 return II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_laneid;
37}
38
39// Whether the given intrinsic is an atomic instruction in PTX.
40static bool isNVVMAtomic(const IntrinsicInst *II) {
41 switch (II->getIntrinsicID()) {
42 default: return false;
43 case Intrinsic::nvvm_atomic_load_inc_32:
44 case Intrinsic::nvvm_atomic_load_dec_32:
45
46 case Intrinsic::nvvm_atomic_add_gen_f_cta:
47 case Intrinsic::nvvm_atomic_add_gen_f_sys:
48 case Intrinsic::nvvm_atomic_add_gen_i_cta:
49 case Intrinsic::nvvm_atomic_add_gen_i_sys:
50 case Intrinsic::nvvm_atomic_and_gen_i_cta:
51 case Intrinsic::nvvm_atomic_and_gen_i_sys:
52 case Intrinsic::nvvm_atomic_cas_gen_i_cta:
53 case Intrinsic::nvvm_atomic_cas_gen_i_sys:
54 case Intrinsic::nvvm_atomic_dec_gen_i_cta:
55 case Intrinsic::nvvm_atomic_dec_gen_i_sys:
56 case Intrinsic::nvvm_atomic_inc_gen_i_cta:
57 case Intrinsic::nvvm_atomic_inc_gen_i_sys:
58 case Intrinsic::nvvm_atomic_max_gen_i_cta:
59 case Intrinsic::nvvm_atomic_max_gen_i_sys:
60 case Intrinsic::nvvm_atomic_min_gen_i_cta:
61 case Intrinsic::nvvm_atomic_min_gen_i_sys:
62 case Intrinsic::nvvm_atomic_or_gen_i_cta:
63 case Intrinsic::nvvm_atomic_or_gen_i_sys:
64 case Intrinsic::nvvm_atomic_exch_gen_i_cta:
65 case Intrinsic::nvvm_atomic_exch_gen_i_sys:
66 case Intrinsic::nvvm_atomic_xor_gen_i_cta:
67 case Intrinsic::nvvm_atomic_xor_gen_i_sys:
68 return true;
69 }
70}
71
73 // Without inter-procedural analysis, we conservatively assume that arguments
74 // to __device__ functions are divergent.
75 if (const Argument *Arg = dyn_cast<Argument>(V))
76 return !isKernelFunction(*Arg->getParent());
77
78 if (const Instruction *I = dyn_cast<Instruction>(V)) {
79 // Without pointer analysis, we conservatively assume values loaded from
80 // generic or local address space are divergent.
81 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
82 unsigned AS = LI->getPointerAddressSpace();
83 return AS == ADDRESS_SPACE_GENERIC || AS == ADDRESS_SPACE_LOCAL;
84 }
85 // Atomic instructions may cause divergence. Atomic instructions are
86 // executed sequentially across all threads in a warp. Therefore, an earlier
87 // executed thread may see different memory inputs than a later executed
88 // thread. For example, suppose *a = 0 initially.
89 //
90 // atom.global.add.s32 d, [a], 1
91 //
92 // returns 0 for the first thread that enters the critical region, and 1 for
93 // the second thread.
94 if (I->isAtomic())
95 return true;
96 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
97 // Instructions that read threadIdx are obviously divergent.
99 return true;
100 // Handle the NVPTX atomic intrinsics that cannot be represented as an
101 // atomic IR instruction.
102 if (isNVVMAtomic(II))
103 return true;
104 }
105 // Conservatively consider the return value of function calls as divergent.
106 // We could analyze callees with bodies more precisely using
107 // inter-procedural analysis.
108 if (isa<CallInst>(I))
109 return true;
110 }
111
112 return false;
113}
114
115// Convert NVVM intrinsics to target-generic LLVM code where possible.
117 // Each NVVM intrinsic we can simplify can be replaced with one of:
118 //
119 // * an LLVM intrinsic,
120 // * an LLVM cast operation,
121 // * an LLVM binary operation, or
122 // * ad-hoc LLVM IR for the particular operation.
123
124 // Some transformations are only valid when the module's
125 // flush-denormals-to-zero (ftz) setting is true/false, whereas other
126 // transformations are valid regardless of the module's ftz setting.
127 enum FtzRequirementTy {
128 FTZ_Any, // Any ftz setting is ok.
129 FTZ_MustBeOn, // Transformation is valid only if ftz is on.
130 FTZ_MustBeOff, // Transformation is valid only if ftz is off.
131 };
132 // Classes of NVVM intrinsics that can't be replaced one-to-one with a
133 // target-generic intrinsic, cast op, or binary op but that we can nonetheless
134 // simplify.
135 enum SpecialCase {
136 SPC_Reciprocal,
137 };
138
139 // SimplifyAction is a poor-man's variant (plus an additional flag) that
140 // represents how to replace an NVVM intrinsic with target-generic LLVM IR.
141 struct SimplifyAction {
142 // Invariant: At most one of these Optionals has a value.
143 std::optional<Intrinsic::ID> IID;
144 std::optional<Instruction::CastOps> CastOp;
145 std::optional<Instruction::BinaryOps> BinaryOp;
146 std::optional<SpecialCase> Special;
147
148 FtzRequirementTy FtzRequirement = FTZ_Any;
149 // Denormal handling is guarded by different attributes depending on the
150 // type (denormal-fp-math vs denormal-fp-math-f32), take note of halfs.
151 bool IsHalfTy = false;
152
153 SimplifyAction() = default;
154
155 SimplifyAction(Intrinsic::ID IID, FtzRequirementTy FtzReq,
156 bool IsHalfTy = false)
157 : IID(IID), FtzRequirement(FtzReq), IsHalfTy(IsHalfTy) {}
158
159 // Cast operations don't have anything to do with FTZ, so we skip that
160 // argument.
161 SimplifyAction(Instruction::CastOps CastOp) : CastOp(CastOp) {}
162
163 SimplifyAction(Instruction::BinaryOps BinaryOp, FtzRequirementTy FtzReq)
164 : BinaryOp(BinaryOp), FtzRequirement(FtzReq) {}
165
166 SimplifyAction(SpecialCase Special, FtzRequirementTy FtzReq)
167 : Special(Special), FtzRequirement(FtzReq) {}
168 };
169
170 // Try to generate a SimplifyAction describing how to replace our
171 // IntrinsicInstr with target-generic LLVM IR.
172 const SimplifyAction Action = [II]() -> SimplifyAction {
173 switch (II->getIntrinsicID()) {
174 // NVVM intrinsics that map directly to LLVM intrinsics.
175 case Intrinsic::nvvm_ceil_d:
176 return {Intrinsic::ceil, FTZ_Any};
177 case Intrinsic::nvvm_ceil_f:
178 return {Intrinsic::ceil, FTZ_MustBeOff};
179 case Intrinsic::nvvm_ceil_ftz_f:
180 return {Intrinsic::ceil, FTZ_MustBeOn};
181 case Intrinsic::nvvm_fabs_d:
182 return {Intrinsic::fabs, FTZ_Any};
183 case Intrinsic::nvvm_floor_d:
184 return {Intrinsic::floor, FTZ_Any};
185 case Intrinsic::nvvm_floor_f:
186 return {Intrinsic::floor, FTZ_MustBeOff};
187 case Intrinsic::nvvm_floor_ftz_f:
188 return {Intrinsic::floor, FTZ_MustBeOn};
189 case Intrinsic::nvvm_fma_rn_d:
190 return {Intrinsic::fma, FTZ_Any};
191 case Intrinsic::nvvm_fma_rn_f:
192 return {Intrinsic::fma, FTZ_MustBeOff};
193 case Intrinsic::nvvm_fma_rn_ftz_f:
194 return {Intrinsic::fma, FTZ_MustBeOn};
195 case Intrinsic::nvvm_fma_rn_f16:
196 return {Intrinsic::fma, FTZ_MustBeOff, true};
197 case Intrinsic::nvvm_fma_rn_ftz_f16:
198 return {Intrinsic::fma, FTZ_MustBeOn, true};
199 case Intrinsic::nvvm_fma_rn_f16x2:
200 return {Intrinsic::fma, FTZ_MustBeOff, true};
201 case Intrinsic::nvvm_fma_rn_ftz_f16x2:
202 return {Intrinsic::fma, FTZ_MustBeOn, true};
203 case Intrinsic::nvvm_fma_rn_bf16:
204 return {Intrinsic::fma, FTZ_MustBeOff, true};
205 case Intrinsic::nvvm_fma_rn_ftz_bf16:
206 return {Intrinsic::fma, FTZ_MustBeOn, true};
207 case Intrinsic::nvvm_fma_rn_bf16x2:
208 return {Intrinsic::fma, FTZ_MustBeOff, true};
209 case Intrinsic::nvvm_fma_rn_ftz_bf16x2:
210 return {Intrinsic::fma, FTZ_MustBeOn, true};
211 case Intrinsic::nvvm_fmax_d:
212 return {Intrinsic::maxnum, FTZ_Any};
213 case Intrinsic::nvvm_fmax_f:
214 return {Intrinsic::maxnum, FTZ_MustBeOff};
215 case Intrinsic::nvvm_fmax_ftz_f:
216 return {Intrinsic::maxnum, FTZ_MustBeOn};
217 case Intrinsic::nvvm_fmax_nan_f:
218 return {Intrinsic::maximum, FTZ_MustBeOff};
219 case Intrinsic::nvvm_fmax_ftz_nan_f:
220 return {Intrinsic::maximum, FTZ_MustBeOn};
221 case Intrinsic::nvvm_fmax_f16:
222 return {Intrinsic::maxnum, FTZ_MustBeOff, true};
223 case Intrinsic::nvvm_fmax_ftz_f16:
224 return {Intrinsic::maxnum, FTZ_MustBeOn, true};
225 case Intrinsic::nvvm_fmax_f16x2:
226 return {Intrinsic::maxnum, FTZ_MustBeOff, true};
227 case Intrinsic::nvvm_fmax_ftz_f16x2:
228 return {Intrinsic::maxnum, FTZ_MustBeOn, true};
229 case Intrinsic::nvvm_fmax_nan_f16:
230 return {Intrinsic::maximum, FTZ_MustBeOff, true};
231 case Intrinsic::nvvm_fmax_ftz_nan_f16:
232 return {Intrinsic::maximum, FTZ_MustBeOn, true};
233 case Intrinsic::nvvm_fmax_nan_f16x2:
234 return {Intrinsic::maximum, FTZ_MustBeOff, true};
235 case Intrinsic::nvvm_fmax_ftz_nan_f16x2:
236 return {Intrinsic::maximum, FTZ_MustBeOn, true};
237 case Intrinsic::nvvm_fmin_d:
238 return {Intrinsic::minnum, FTZ_Any};
239 case Intrinsic::nvvm_fmin_f:
240 return {Intrinsic::minnum, FTZ_MustBeOff};
241 case Intrinsic::nvvm_fmin_ftz_f:
242 return {Intrinsic::minnum, FTZ_MustBeOn};
243 case Intrinsic::nvvm_fmin_nan_f:
244 return {Intrinsic::minimum, FTZ_MustBeOff};
245 case Intrinsic::nvvm_fmin_ftz_nan_f:
246 return {Intrinsic::minimum, FTZ_MustBeOn};
247 case Intrinsic::nvvm_fmin_f16:
248 return {Intrinsic::minnum, FTZ_MustBeOff, true};
249 case Intrinsic::nvvm_fmin_ftz_f16:
250 return {Intrinsic::minnum, FTZ_MustBeOn, true};
251 case Intrinsic::nvvm_fmin_f16x2:
252 return {Intrinsic::minnum, FTZ_MustBeOff, true};
253 case Intrinsic::nvvm_fmin_ftz_f16x2:
254 return {Intrinsic::minnum, FTZ_MustBeOn, true};
255 case Intrinsic::nvvm_fmin_nan_f16:
256 return {Intrinsic::minimum, FTZ_MustBeOff, true};
257 case Intrinsic::nvvm_fmin_ftz_nan_f16:
258 return {Intrinsic::minimum, FTZ_MustBeOn, true};
259 case Intrinsic::nvvm_fmin_nan_f16x2:
260 return {Intrinsic::minimum, FTZ_MustBeOff, true};
261 case Intrinsic::nvvm_fmin_ftz_nan_f16x2:
262 return {Intrinsic::minimum, FTZ_MustBeOn, true};
263 case Intrinsic::nvvm_sqrt_rn_d:
264 return {Intrinsic::sqrt, FTZ_Any};
265 case Intrinsic::nvvm_sqrt_f:
266 // nvvm_sqrt_f is a special case. For most intrinsics, foo_ftz_f is the
267 // ftz version, and foo_f is the non-ftz version. But nvvm_sqrt_f adopts
268 // the ftz-ness of the surrounding code. sqrt_rn_f and sqrt_rn_ftz_f are
269 // the versions with explicit ftz-ness.
270 return {Intrinsic::sqrt, FTZ_Any};
271 case Intrinsic::nvvm_trunc_d:
272 return {Intrinsic::trunc, FTZ_Any};
273 case Intrinsic::nvvm_trunc_f:
274 return {Intrinsic::trunc, FTZ_MustBeOff};
275 case Intrinsic::nvvm_trunc_ftz_f:
276 return {Intrinsic::trunc, FTZ_MustBeOn};
277
278 // NVVM intrinsics that map to LLVM cast operations.
279 //
280 // Note that llvm's target-generic conversion operators correspond to the rz
281 // (round to zero) versions of the nvvm conversion intrinsics, even though
282 // most everything else here uses the rn (round to nearest even) nvvm ops.
283 case Intrinsic::nvvm_d2i_rz:
284 case Intrinsic::nvvm_f2i_rz:
285 case Intrinsic::nvvm_d2ll_rz:
286 case Intrinsic::nvvm_f2ll_rz:
287 return {Instruction::FPToSI};
288 case Intrinsic::nvvm_d2ui_rz:
289 case Intrinsic::nvvm_f2ui_rz:
290 case Intrinsic::nvvm_d2ull_rz:
291 case Intrinsic::nvvm_f2ull_rz:
292 return {Instruction::FPToUI};
293 case Intrinsic::nvvm_i2d_rz:
294 case Intrinsic::nvvm_i2f_rz:
295 case Intrinsic::nvvm_ll2d_rz:
296 case Intrinsic::nvvm_ll2f_rz:
297 return {Instruction::SIToFP};
298 case Intrinsic::nvvm_ui2d_rz:
299 case Intrinsic::nvvm_ui2f_rz:
300 case Intrinsic::nvvm_ull2d_rz:
301 case Intrinsic::nvvm_ull2f_rz:
302 return {Instruction::UIToFP};
303
304 // NVVM intrinsics that map to LLVM binary ops.
305 case Intrinsic::nvvm_div_rn_d:
306 return {Instruction::FDiv, FTZ_Any};
307
308 // The remainder of cases are NVVM intrinsics that map to LLVM idioms, but
309 // need special handling.
310 //
311 // We seem to be missing intrinsics for rcp.approx.{ftz.}f32, which is just
312 // as well.
313 case Intrinsic::nvvm_rcp_rn_d:
314 return {SPC_Reciprocal, FTZ_Any};
315
316 // We do not currently simplify intrinsics that give an approximate
317 // answer. These include:
318 //
319 // - nvvm_cos_approx_{f,ftz_f}
320 // - nvvm_ex2_approx_{d,f,ftz_f}
321 // - nvvm_lg2_approx_{d,f,ftz_f}
322 // - nvvm_sin_approx_{f,ftz_f}
323 // - nvvm_sqrt_approx_{f,ftz_f}
324 // - nvvm_rsqrt_approx_{d,f,ftz_f}
325 // - nvvm_div_approx_{ftz_d,ftz_f,f}
326 // - nvvm_rcp_approx_ftz_d
327 //
328 // Ideally we'd encode them as e.g. "fast call @llvm.cos", where "fast"
329 // means that fastmath is enabled in the intrinsic. Unfortunately only
330 // binary operators (currently) have a fastmath bit in SelectionDAG, so
331 // this information gets lost and we can't select on it.
332 //
333 // TODO: div and rcp are lowered to a binary op, so these we could in
334 // theory lower them to "fast fdiv".
335
336 default:
337 return {};
338 }
339 }();
340
341 // If Action.FtzRequirementTy is not satisfied by the module's ftz state, we
342 // can bail out now. (Notice that in the case that IID is not an NVVM
343 // intrinsic, we don't have to look up any module metadata, as
344 // FtzRequirementTy will be FTZ_Any.)
345 if (Action.FtzRequirement != FTZ_Any) {
346 // FIXME: Broken for f64
347 DenormalMode Mode = II->getFunction()->getDenormalMode(
348 Action.IsHalfTy ? APFloat::IEEEhalf() : APFloat::IEEEsingle());
349 bool FtzEnabled = Mode.Output == DenormalMode::PreserveSign;
350
351 if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn))
352 return nullptr;
353 }
354
355 // Simplify to target-generic intrinsic.
356 if (Action.IID) {
357 SmallVector<Value *, 4> Args(II->args());
358 // All the target-generic intrinsics currently of interest to us have one
359 // type argument, equal to that of the nvvm intrinsic's argument.
360 Type *Tys[] = {II->getArgOperand(0)->getType()};
361 return CallInst::Create(
362 Intrinsic::getDeclaration(II->getModule(), *Action.IID, Tys), Args);
363 }
364
365 // Simplify to target-generic binary op.
366 if (Action.BinaryOp)
367 return BinaryOperator::Create(*Action.BinaryOp, II->getArgOperand(0),
368 II->getArgOperand(1), II->getName());
369
370 // Simplify to target-generic cast op.
371 if (Action.CastOp)
372 return CastInst::Create(*Action.CastOp, II->getArgOperand(0), II->getType(),
373 II->getName());
374
375 // All that's left are the special cases.
376 if (!Action.Special)
377 return nullptr;
378
379 switch (*Action.Special) {
380 case SPC_Reciprocal:
381 // Simplify reciprocal.
383 Instruction::FDiv, ConstantFP::get(II->getArgOperand(0)->getType(), 1),
384 II->getArgOperand(0), II->getName());
385 }
386 llvm_unreachable("All SpecialCase enumerators should be handled in switch.");
387}
388
389std::optional<Instruction *>
391 if (Instruction *I = simplifyNvvmIntrinsic(&II, IC)) {
392 return I;
393 }
394 return std::nullopt;
395}
396
398 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
401 const Instruction *CxtI) {
402 // Legalize the type.
403 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
404
405 int ISD = TLI->InstructionOpcodeToISD(Opcode);
406
407 switch (ISD) {
408 default:
409 return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
410 Op2Info);
411 case ISD::ADD:
412 case ISD::MUL:
413 case ISD::XOR:
414 case ISD::OR:
415 case ISD::AND:
416 // The machine code (SASS) simulates an i64 with two i32. Therefore, we
417 // estimate that arithmetic operations on i64 are twice as expensive as
418 // those on types that can fit into one machine register.
419 if (LT.second.SimpleTy == MVT::i64)
420 return 2 * LT.first;
421 // Delegate other cases to the basic TTI.
422 return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
423 Op2Info);
424 }
425}
426
430 BaseT::getUnrollingPreferences(L, SE, UP, ORE);
431
432 // Enable partial unrolling and runtime unrolling, but reduce the
433 // threshold. This partially unrolls small loops which are often
434 // unrolled by the PTX to SASS compiler and unrolling earlier can be
435 // beneficial.
436 UP.Partial = UP.Runtime = true;
437 UP.PartialThreshold = UP.Threshold / 4;
438}
439
443}
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
#define I(x, y, z)
Definition: MD5.cpp:58
static Instruction * simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC)
static bool isNVVMAtomic(const IntrinsicInst *II)
static bool readsLaneId(const IntrinsicInst *II)
static bool readsThreadIndex(const IntrinsicInst *II)
This file a TargetTransformInfo::Concept conforming object specific to the NVPTX target machine.
uint64_t IntrinsicInst * II
This file describes how to lower LLVM code to machine code.
This pass exposes codegen information to IR-level passes.
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
Definition: BasicTTIImpl.h:588
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: BasicTTIImpl.h:660
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
Definition: BasicTTIImpl.h:897
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
Definition: BasicTTIImpl.h:861
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
The core instruction combiner logic.
Definition: InstCombiner.h:47
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
An instruction for reading from memory.
Definition: Instructions.h:173
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
bool isSourceOfDivergence(const Value *V)
The optimization diagnostic interface.
The main scalar evolution driver.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
TargetCostKind
The kind of cost model.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:240
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:688
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1474
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ ADDRESS_SPACE_GENERIC
Definition: NVPTXBaseInfo.h:22
@ ADDRESS_SPACE_LOCAL
Definition: NVPTXBaseInfo.h:26
bool isKernelFunction(const Function &F)
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:271
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:269
Represent subnormal handling kind for floating point instruction inputs and outputs.
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
Parameters that control the generic loop unrolling transformation.
unsigned Threshold
The cost threshold for the unrolled loop.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...