LLVM 23.0.0git
AMDGPUTargetTransformInfo.h
Go to the documentation of this file.
1//===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file a TargetTransformInfoImplBase conforming object specific to the
11/// AMDGPU target machine. It uses the target's detailed information to
12/// provide more precise answers to certain TTI queries, while letting the
13/// target independent and default TTI implementations handle the rest.
14//
15//===----------------------------------------------------------------------===//
16
17#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
18#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
19
20#include "AMDGPU.h"
23#include <optional>
24
25namespace llvm {
26
28class GCNSubtarget;
29class InstCombiner;
30class Loop;
31class ScalarEvolution;
33class Type;
34class Value;
35
36class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
38 using TTI = TargetTransformInfo;
39
40 friend BaseT;
41
42 Triple TargetTriple;
43
44 const TargetSubtargetInfo *ST;
45 const TargetLoweringBase *TLI;
46
47 const TargetSubtargetInfo *getST() const { return ST; }
48 const TargetLoweringBase *getTLI() const { return TLI; }
49
50public:
51 explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F);
52
55 OptimizationRemarkEmitter *ORE) const override;
56
58 TTI::PeelingPreferences &PP) const override;
59
61};
62
63class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
64 using BaseT = BasicTTIImplBase<GCNTTIImpl>;
65 using TTI = TargetTransformInfo;
66
67 friend BaseT;
68
69 const GCNSubtarget *ST;
70 const SITargetLowering *TLI;
71 AMDGPUTTIImpl CommonTTI;
72 bool IsGraphics;
73 bool HasFP32Denormals;
74 bool HasFP64FP16Denormals;
75 static constexpr bool InlinerVectorBonusPercent = 0;
76
77 static const FeatureBitset InlineFeatureIgnoreList;
78
79 const GCNSubtarget *getST() const { return ST; }
80 const SITargetLowering *getTLI() const { return TLI; }
81
82 static inline int getFullRateInstrCost() {
84 }
85
86 static inline int getHalfRateInstrCost(TTI::TargetCostKind CostKind) {
87 return CostKind == TTI::TCK_CodeSize ? 2
89 }
90
91 // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
92 // should be 2 or 4.
93 static inline int getQuarterRateInstrCost(TTI::TargetCostKind CostKind) {
94 return CostKind == TTI::TCK_CodeSize ? 2
96 }
97
98 int getTransInstrCost(TTI::TargetCostKind CostKind) const;
99
100 // On some parts, normal fp64 operations are half rate, and others
101 // quarter. This also applies to some integer operations.
102 int get64BitInstrCost(TTI::TargetCostKind CostKind) const;
103
104 std::pair<InstructionCost, MVT> getTypeLegalizationCost(Type *Ty) const;
105
106 /// \returns true if V might be divergent even when all of its operands
107 /// are uniform.
108 bool isSourceOfDivergence(const Value *V) const;
109
110 /// Returns true for the target specific set of operations which produce
111 /// uniform result even taking non-uniform arguments.
112 bool isAlwaysUniform(const Value *V) const;
113
114public:
115 explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F);
116
117 bool hasBranchDivergence(const Function *F = nullptr) const override;
118
121 OptimizationRemarkEmitter *ORE) const override;
122
124 TTI::PeelingPreferences &PP) const override;
125
126 TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override {
127 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
129 }
130
131 unsigned getNumberOfRegisters(unsigned RCID) const override;
134 unsigned getMinVectorRegisterBitWidth() const override;
135 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override;
136 bool preferSLPInstCountCheck() const override;
137 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
138 unsigned ChainSizeInBytes,
139 VectorType *VecTy) const override;
140 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
141 unsigned ChainSizeInBytes,
142 VectorType *VecTy) const override;
143 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override;
144
145 bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment,
146 unsigned AddrSpace) const;
147 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
148 unsigned AddrSpace) const override;
149 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
150 unsigned AddrSpace) const override;
151
154 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
155 unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
156 std::optional<uint32_t> AtomicElementSize) const override;
157
159 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
160 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
161 Align SrcAlign, Align DestAlign,
162 std::optional<uint32_t> AtomicCpySize) const override;
164 bool HasUnorderedReductions) const override;
165
167 MemIntrinsicInfo &Info) const override;
168
170 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
174 const Instruction *CxtI = nullptr) const override;
175
177 const Instruction *I = nullptr) const override;
178
179 bool isInlineAsmSourceOfDivergence(const CallInst *CI,
180 ArrayRef<unsigned> Indices = {}) const;
181
184 getVectorInstrCost(unsigned Opcode, Type *ValTy, TTI::TargetCostKind CostKind,
185 unsigned Index, const Value *Op0, const Value *Op1,
187 TTI::VectorInstrContext::None) const override;
188
189 bool isReadRegisterSourceOfDivergence(const IntrinsicInst *ReadReg) const;
190
191 bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
192 // Address space casts must cast between different address spaces.
193 if (FromAS == ToAS)
194 return false;
195
196 // Casts between any aliasing address spaces are valid.
197 return AMDGPU::addrspacesMayAlias(FromAS, ToAS);
198 }
199
200 bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const override {
201 return AMDGPU::addrspacesMayAlias(AS0, AS1);
202 }
203
204 unsigned getFlatAddressSpace() const override {
205 // Don't bother running InferAddressSpaces pass on graphics shaders which
206 // don't use flat addressing.
207 if (IsGraphics)
208 return -1;
210 }
211
213 Intrinsic::ID IID) const override;
214
215 bool
220
222 Value *NewV) const override;
223
224 bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0,
225 const Value *Op1, InstCombiner &IC) const;
226
228 unsigned LaneAgIdx) const;
229
230 std::optional<Instruction *>
232
235 const APInt &DemandedElts,
236 APInt &UndefElts) const;
237
239 IntrinsicInst &II) const;
240
241 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
242 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
243 APInt &UndefElts2, APInt &UndefElts3,
244 std::function<void(Instruction *, unsigned, APInt, APInt &)>
245 SimplifyAndSetOp) const override;
246
248
252 VectorType *SubTp, ArrayRef<const Value *> Args = {},
253 const Instruction *CxtI = nullptr) const override;
254
255 bool isProfitableToSinkOperands(Instruction *I,
256 SmallVectorImpl<Use *> &Ops) const override;
257
258 bool areInlineCompatible(const Function *Caller,
259 const Function *Callee) const override;
260
261 int getInliningLastCallToStaticBonus() const override;
262 unsigned getInliningThresholdMultiplier() const override { return 11; }
263 unsigned adjustInliningThreshold(const CallBase *CB) const override;
264 unsigned getCallerAllocaCost(const CallBase *CB,
265 const AllocaInst *AI) const override;
266
267 int getInlinerVectorBonusPercent() const override {
268 return InlinerVectorBonusPercent;
269 }
270
272 getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
273 std::optional<FastMathFlags> FMF,
274 TTI::TargetCostKind CostKind) const override;
275
277 unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
279 TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp,
281 std::optional<FastMathFlags> FMF) const override {
283 }
284
287 TTI::TargetCostKind CostKind) const override;
290 TTI::TargetCostKind CostKind) const override;
291
292 /// Data cache line size for LoopDataPrefetch pass. Has no use before GFX12.
293 unsigned getCacheLineSize() const override { return 128; }
294
295 /// How much before a load we should place the prefetch instruction.
296 /// This is currently measured in number of IR instructions.
297 unsigned getPrefetchDistance() const override;
298
299 /// \return if target want to issue a prefetch in address space \p AS.
300 bool shouldPrefetchAddressSpace(unsigned AS) const override;
302 const Function &F,
303 SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const override;
304
305 enum class KnownIEEEMode { Unknown, On, Off };
306
307 /// Return KnownIEEEMode::On if we know if the use context can assume
308 /// "amdgpu-ieee"="true" and KnownIEEEMode::Off if we can assume
309 /// "amdgpu-ieee"="false".
311
312 /// Account for loads of i8 vector types to have reduced cost. For
313 /// example the cost of load 4 i8s values is one is the cost of loading
314 /// a single i32 value.
316 unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
319 const Instruction *I = nullptr) const override;
320
321 /// When counting parts on AMD GPUs, account for i8s being grouped
322 /// together under a single i32 value. Otherwise fall back to base
323 /// implementation.
324 unsigned getNumberOfParts(Type *Tp) const override;
325
326 ValueUniformity getValueUniformity(const Value *V) const override;
327
328 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
329 StackOffset BaseOffset, bool HasBaseReg,
330 int64_t Scale,
331 unsigned AddrSpace) const override;
332
333 bool isLSRCostLess(const TTI::LSRCost &A,
334 const TTI::LSRCost &B) const override;
335 bool isNumRegsMajorCostOfLSR() const override;
336 bool shouldDropLSRSolutionIfLessProfitable() const override;
337
338 bool isUniform(const Instruction *I,
339 const SmallBitVector &UniformArgs) const override;
340};
341
342} // end namespace llvm
343
344#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
uint64_t IntrinsicInst * II
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override
AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
Class for arbitrary precision integers.
Definition APInt.h:78
an instruction to allocate memory on the stack
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
Container class for subtarget features.
bool simplifyDemandedLaneMaskArg(InstCombiner &IC, IntrinsicInst &II, unsigned LaneAgIdx) const
Simplify a lane index operand (e.g.
GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Account for loads of i8 vector types to have reduced cost.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
void collectKernelLaunchBounds(const Function &F, SmallVectorImpl< std::pair< StringRef, int64_t > > &LB) const override
bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const override
Instruction * hoistLaneIntrinsicThroughOperand(InstCombiner &IC, IntrinsicInst &II) const
bool isUniform(const Instruction *I, const SmallBitVector &UniformArgs) const override
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const override
int getInlinerVectorBonusPercent() const override
bool isInlineAsmSourceOfDivergence(const CallInst *CI, ArrayRef< unsigned > Indices={}) const
Analyze if the results of inline asm are divergent.
bool isReadRegisterSourceOfDivergence(const IntrinsicInst *ReadReg) const
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override
unsigned getNumberOfRegisters(unsigned RCID) const override
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const override
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind, std::optional< FastMathFlags > FMF) const override
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const override
bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
unsigned getCacheLineSize() const override
Data cache line size for LoopDataPrefetch pass. Has no use before GFX12.
bool isLSRCostLess(const TTI::LSRCost &A, const TTI::LSRCost &B) const override
bool shouldPrefetchAddressSpace(unsigned AS) const override
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
bool hasBranchDivergence(const Function *F=nullptr) const override
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const override
unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const override
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicCpySize) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Get intrinsic cost based on arguments.
unsigned getInliningThresholdMultiplier() const override
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const override
unsigned getPrefetchDistance() const override
How much before a load we should place the prefetch instruction.
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
KnownIEEEMode fpenvIEEEMode(const Instruction &I) const
Return KnownIEEEMode::On if we know if the use context can assume "amdgpu-ieee"="true" and KnownIEEEM...
unsigned adjustInliningThreshold(const CallBase *CB) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Whether it is profitable to sink the operands of an Instruction I to the basic block of I.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
Try to calculate op costs for min/max reduction operations.
bool shouldDropLSRSolutionIfLessProfitable() const override
unsigned getMaxInterleaveFactor(ElementCount VF, bool HasUnorderedReductions) const override
int getInliningLastCallToStaticBonus() const override
unsigned getFlatAddressSpace() const override
InstructionCost getVectorSplitCost() const
Value * simplifyAMDGCNLaneIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, const APInt &DemandedElts, APInt &UndefElts) const
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
ValueUniformity getValueUniformity(const Value *V) const override
unsigned getNumberOfParts(Type *Tp) const override
When counting parts on AMD GPUs, account for i8s being grouped together under a single i32 value.
bool preferSLPInstCountCheck() const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0, const Value *Op1, InstCombiner &IC) const
unsigned getMinVectorRegisterBitWidth() const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const override
bool isNumRegsMajorCostOfLSR() const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicElementSize) const override
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
The optimization diagnostic interface.
The main scalar evolution driver.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
TargetSubtargetInfo - Generic base class for all target subtargets.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
VectorInstrContext
Represents a hint about the context in which an insert/extract is used.
@ None
The insert/extract is not used with a load/store.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
PopcntSupportKind
Flags indicating the kind of support for population count.
@ TCC_Basic
The cost of a typical 'add' instruction.
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM Value Representation.
Definition Value.h:75
Base class of all SIMD vector types.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static bool addrspacesMayAlias(unsigned AS1, unsigned AS2)
Definition AMDGPU.h:617
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
@ Length
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
ArrayRef(const T &OneElt) -> ArrayRef< T >
ValueUniformity
Enum describing how values behave with respect to uniformity and divergence, to answer the question: ...
Definition Uniformity.h:18
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Information about a load/store intrinsic defined by the target.
Parameters that control the generic loop unrolling transformation.