LLVM 20.0.0git
AMDGPULateCodeGenPrepare.cpp
Go to the documentation of this file.
1//===-- AMDGPUCodeGenPrepare.cpp ------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This pass does misc. AMDGPU optimizations on IR *just* before instruction
11/// selection.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPU.h"
16#include "AMDGPUTargetMachine.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/InstVisitor.h"
27
28#define DEBUG_TYPE "amdgpu-late-codegenprepare"
29
30using namespace llvm;
31
32// Scalar load widening needs running after load-store-vectorizer as that pass
33// doesn't handle overlapping cases. In addition, this pass enhances the
34// widening to handle cases where scalar sub-dword loads are naturally aligned
35// only but not dword aligned.
36static cl::opt<bool>
37 WidenLoads("amdgpu-late-codegenprepare-widen-constant-loads",
38 cl::desc("Widen sub-dword constant address space loads in "
39 "AMDGPULateCodeGenPrepare"),
41
42namespace {
43
44class AMDGPULateCodeGenPrepare
45 : public InstVisitor<AMDGPULateCodeGenPrepare, bool> {
46 Module *Mod = nullptr;
47 const DataLayout *DL = nullptr;
48 const GCNSubtarget &ST;
49
50 AssumptionCache *AC = nullptr;
51 UniformityInfo *UA = nullptr;
52
54
55public:
56 AMDGPULateCodeGenPrepare(Module &M, const GCNSubtarget &ST,
58 : Mod(&M), DL(&M.getDataLayout()), ST(ST), AC(AC), UA(UA) {}
59 bool run(Function &F);
60 bool visitInstruction(Instruction &) { return false; }
61
62 // Check if the specified value is at least DWORD aligned.
63 bool isDWORDAligned(const Value *V) const {
64 KnownBits Known = computeKnownBits(V, *DL, 0, AC);
65 return Known.countMinTrailingZeros() >= 2;
66 }
67
68 bool canWidenScalarExtLoad(LoadInst &LI) const;
69 bool visitLoadInst(LoadInst &LI);
70};
71
73
74class LiveRegOptimizer {
75private:
76 Module *Mod = nullptr;
77 const DataLayout *DL = nullptr;
78 const GCNSubtarget *ST;
79 /// The scalar type to convert to
80 Type *ConvertToScalar;
81 /// The set of visited Instructions
83 /// Map of Value -> Converted Value
84 ValueToValueMap ValMap;
85 /// Map of containing conversions from Optimal Type -> Original Type per BB.
87
88public:
89 /// Calculate the and \p return the type to convert to given a problematic \p
90 /// OriginalType. In some instances, we may widen the type (e.g. v2i8 -> i32).
91 Type *calculateConvertType(Type *OriginalType);
92 /// Convert the virtual register defined by \p V to the compatible vector of
93 /// legal type
94 Value *convertToOptType(Instruction *V, BasicBlock::iterator &InstPt);
95 /// Convert the virtual register defined by \p V back to the original type \p
96 /// ConvertType, stripping away the MSBs in cases where there was an imperfect
97 /// fit (e.g. v2i32 -> v7i8)
98 Value *convertFromOptType(Type *ConvertType, Instruction *V,
100 BasicBlock *InsertBlock);
101 /// Check for problematic PHI nodes or cross-bb values based on the value
102 /// defined by \p I, and coerce to legal types if necessary. For problematic
103 /// PHI node, we coerce all incoming values in a single invocation.
104 bool optimizeLiveType(Instruction *I,
106
107 // Whether or not the type should be replaced to avoid inefficient
108 // legalization code
109 bool shouldReplace(Type *ITy) {
110 FixedVectorType *VTy = dyn_cast<FixedVectorType>(ITy);
111 if (!VTy)
112 return false;
113
114 auto TLI = ST->getTargetLowering();
115
116 Type *EltTy = VTy->getElementType();
117 // If the element size is not less than the convert to scalar size, then we
118 // can't do any bit packing
119 if (!EltTy->isIntegerTy() ||
120 EltTy->getScalarSizeInBits() > ConvertToScalar->getScalarSizeInBits())
121 return false;
122
123 // Only coerce illegal types
125 TLI->getTypeConversion(EltTy->getContext(), EVT::getEVT(EltTy, false));
126 return LK.first != TargetLoweringBase::TypeLegal;
127 }
128
129 LiveRegOptimizer(Module *Mod, const GCNSubtarget *ST) : Mod(Mod), ST(ST) {
130 DL = &Mod->getDataLayout();
131 ConvertToScalar = Type::getInt32Ty(Mod->getContext());
132 }
133};
134
135} // end anonymous namespace
136
137bool AMDGPULateCodeGenPrepare::run(Function &F) {
138 // "Optimize" the virtual regs that cross basic block boundaries. When
139 // building the SelectionDAG, vectors of illegal types that cross basic blocks
140 // will be scalarized and widened, with each scalar living in its
141 // own register. To work around this, this optimization converts the
142 // vectors to equivalent vectors of legal type (which are converted back
143 // before uses in subsequent blocks), to pack the bits into fewer physical
144 // registers (used in CopyToReg/CopyFromReg pairs).
145 LiveRegOptimizer LRO(Mod, &ST);
146
147 bool Changed = false;
148
149 bool HasScalarSubwordLoads = ST.hasScalarSubwordLoads();
150
151 for (auto &BB : reverse(F))
153 Changed |= !HasScalarSubwordLoads && visit(I);
154 Changed |= LRO.optimizeLiveType(&I, DeadInsts);
155 }
156
158 return Changed;
159}
160
161Type *LiveRegOptimizer::calculateConvertType(Type *OriginalType) {
162 assert(OriginalType->getScalarSizeInBits() <=
163 ConvertToScalar->getScalarSizeInBits());
164
165 FixedVectorType *VTy = cast<FixedVectorType>(OriginalType);
166
167 TypeSize OriginalSize = DL->getTypeSizeInBits(VTy);
168 TypeSize ConvertScalarSize = DL->getTypeSizeInBits(ConvertToScalar);
169 unsigned ConvertEltCount =
170 (OriginalSize + ConvertScalarSize - 1) / ConvertScalarSize;
171
172 if (OriginalSize <= ConvertScalarSize)
173 return IntegerType::get(Mod->getContext(), ConvertScalarSize);
174
175 return VectorType::get(Type::getIntNTy(Mod->getContext(), ConvertScalarSize),
176 ConvertEltCount, false);
177}
178
179Value *LiveRegOptimizer::convertToOptType(Instruction *V,
180 BasicBlock::iterator &InsertPt) {
181 FixedVectorType *VTy = cast<FixedVectorType>(V->getType());
182 Type *NewTy = calculateConvertType(V->getType());
183
184 TypeSize OriginalSize = DL->getTypeSizeInBits(VTy);
185 TypeSize NewSize = DL->getTypeSizeInBits(NewTy);
186
187 IRBuilder<> Builder(V->getParent(), InsertPt);
188 // If there is a bitsize match, we can fit the old vector into a new vector of
189 // desired type.
190 if (OriginalSize == NewSize)
191 return Builder.CreateBitCast(V, NewTy, V->getName() + ".bc");
192
193 // If there is a bitsize mismatch, we must use a wider vector.
194 assert(NewSize > OriginalSize);
195 uint64_t ExpandedVecElementCount = NewSize / VTy->getScalarSizeInBits();
196
197 SmallVector<int, 8> ShuffleMask;
198 uint64_t OriginalElementCount = VTy->getElementCount().getFixedValue();
199 for (unsigned I = 0; I < OriginalElementCount; I++)
200 ShuffleMask.push_back(I);
201
202 for (uint64_t I = OriginalElementCount; I < ExpandedVecElementCount; I++)
203 ShuffleMask.push_back(OriginalElementCount);
204
205 Value *ExpandedVec = Builder.CreateShuffleVector(V, ShuffleMask);
206 return Builder.CreateBitCast(ExpandedVec, NewTy, V->getName() + ".bc");
207}
208
209Value *LiveRegOptimizer::convertFromOptType(Type *ConvertType, Instruction *V,
210 BasicBlock::iterator &InsertPt,
211 BasicBlock *InsertBB) {
212 FixedVectorType *NewVTy = cast<FixedVectorType>(ConvertType);
213
214 TypeSize OriginalSize = DL->getTypeSizeInBits(V->getType());
215 TypeSize NewSize = DL->getTypeSizeInBits(NewVTy);
216
217 IRBuilder<> Builder(InsertBB, InsertPt);
218 // If there is a bitsize match, we simply convert back to the original type.
219 if (OriginalSize == NewSize)
220 return Builder.CreateBitCast(V, NewVTy, V->getName() + ".bc");
221
222 // If there is a bitsize mismatch, then we must have used a wider value to
223 // hold the bits.
224 assert(OriginalSize > NewSize);
225 // For wide scalars, we can just truncate the value.
226 if (!V->getType()->isVectorTy()) {
227 Instruction *Trunc = cast<Instruction>(
228 Builder.CreateTrunc(V, IntegerType::get(Mod->getContext(), NewSize)));
229 return cast<Instruction>(Builder.CreateBitCast(Trunc, NewVTy));
230 }
231
232 // For wider vectors, we must strip the MSBs to convert back to the original
233 // type.
234 VectorType *ExpandedVT = VectorType::get(
236 (OriginalSize / NewVTy->getScalarSizeInBits()), false);
237 Instruction *Converted =
238 cast<Instruction>(Builder.CreateBitCast(V, ExpandedVT));
239
240 unsigned NarrowElementCount = NewVTy->getElementCount().getFixedValue();
241 SmallVector<int, 8> ShuffleMask(NarrowElementCount);
242 std::iota(ShuffleMask.begin(), ShuffleMask.end(), 0);
243
244 return Builder.CreateShuffleVector(Converted, ShuffleMask);
245}
246
247bool LiveRegOptimizer::optimizeLiveType(
253
254 Worklist.push_back(cast<Instruction>(I));
255 while (!Worklist.empty()) {
256 Instruction *II = Worklist.pop_back_val();
257
258 if (!Visited.insert(II).second)
259 continue;
260
261 if (!shouldReplace(II->getType()))
262 continue;
263
264 if (PHINode *Phi = dyn_cast<PHINode>(II)) {
265 PhiNodes.insert(Phi);
266 // Collect all the incoming values of problematic PHI nodes.
267 for (Value *V : Phi->incoming_values()) {
268 // Repeat the collection process for newly found PHI nodes.
269 if (PHINode *OpPhi = dyn_cast<PHINode>(V)) {
270 if (!PhiNodes.count(OpPhi) && !Visited.count(OpPhi))
271 Worklist.push_back(OpPhi);
272 continue;
273 }
274
275 Instruction *IncInst = dyn_cast<Instruction>(V);
276 // Other incoming value types (e.g. vector literals) are unhandled
277 if (!IncInst && !isa<ConstantAggregateZero>(V))
278 return false;
279
280 // Collect all other incoming values for coercion.
281 if (IncInst)
282 Defs.insert(IncInst);
283 }
284 }
285
286 // Collect all relevant uses.
287 for (User *V : II->users()) {
288 // Repeat the collection process for problematic PHI nodes.
289 if (PHINode *OpPhi = dyn_cast<PHINode>(V)) {
290 if (!PhiNodes.count(OpPhi) && !Visited.count(OpPhi))
291 Worklist.push_back(OpPhi);
292 continue;
293 }
294
295 Instruction *UseInst = cast<Instruction>(V);
296 // Collect all uses of PHINodes and any use the crosses BB boundaries.
297 if (UseInst->getParent() != II->getParent() || isa<PHINode>(II)) {
298 Uses.insert(UseInst);
299 if (!Defs.count(II) && !isa<PHINode>(II)) {
300 Defs.insert(II);
301 }
302 }
303 }
304 }
305
306 // Coerce and track the defs.
307 for (Instruction *D : Defs) {
308 if (!ValMap.contains(D)) {
309 BasicBlock::iterator InsertPt = std::next(D->getIterator());
310 Value *ConvertVal = convertToOptType(D, InsertPt);
311 assert(ConvertVal);
312 ValMap[D] = ConvertVal;
313 }
314 }
315
316 // Construct new-typed PHI nodes.
317 for (PHINode *Phi : PhiNodes) {
318 ValMap[Phi] = PHINode::Create(calculateConvertType(Phi->getType()),
319 Phi->getNumIncomingValues(),
320 Phi->getName() + ".tc", Phi->getIterator());
321 }
322
323 // Connect all the PHI nodes with their new incoming values.
324 for (PHINode *Phi : PhiNodes) {
325 PHINode *NewPhi = cast<PHINode>(ValMap[Phi]);
326 bool MissingIncVal = false;
327 for (int I = 0, E = Phi->getNumIncomingValues(); I < E; I++) {
328 Value *IncVal = Phi->getIncomingValue(I);
329 if (isa<ConstantAggregateZero>(IncVal)) {
330 Type *NewType = calculateConvertType(Phi->getType());
331 NewPhi->addIncoming(ConstantInt::get(NewType, 0, false),
332 Phi->getIncomingBlock(I));
333 } else if (ValMap.contains(IncVal) && ValMap[IncVal])
334 NewPhi->addIncoming(ValMap[IncVal], Phi->getIncomingBlock(I));
335 else
336 MissingIncVal = true;
337 }
338 if (MissingIncVal) {
339 Value *DeadVal = ValMap[Phi];
340 // The coercion chain of the PHI is broken. Delete the Phi
341 // from the ValMap and any connected / user Phis.
342 SmallVector<Value *, 4> PHIWorklist;
343 SmallPtrSet<Value *, 4> VisitedPhis;
344 PHIWorklist.push_back(DeadVal);
345 while (!PHIWorklist.empty()) {
346 Value *NextDeadValue = PHIWorklist.pop_back_val();
347 VisitedPhis.insert(NextDeadValue);
348 auto OriginalPhi =
349 std::find_if(PhiNodes.begin(), PhiNodes.end(),
350 [this, &NextDeadValue](PHINode *CandPhi) {
351 return ValMap[CandPhi] == NextDeadValue;
352 });
353 // This PHI may have already been removed from maps when
354 // unwinding a previous Phi
355 if (OriginalPhi != PhiNodes.end())
356 ValMap.erase(*OriginalPhi);
357
358 DeadInsts.emplace_back(cast<Instruction>(NextDeadValue));
359
360 for (User *U : NextDeadValue->users()) {
361 if (!VisitedPhis.contains(cast<PHINode>(U)))
362 PHIWorklist.push_back(U);
363 }
364 }
365 } else {
366 DeadInsts.emplace_back(cast<Instruction>(Phi));
367 }
368 }
369 // Coerce back to the original type and replace the uses.
370 for (Instruction *U : Uses) {
371 // Replace all converted operands for a use.
372 for (auto [OpIdx, Op] : enumerate(U->operands())) {
373 if (ValMap.contains(Op) && ValMap[Op]) {
374 Value *NewVal = nullptr;
375 if (BBUseValMap.contains(U->getParent()) &&
376 BBUseValMap[U->getParent()].contains(ValMap[Op]))
377 NewVal = BBUseValMap[U->getParent()][ValMap[Op]];
378 else {
379 BasicBlock::iterator InsertPt = U->getParent()->getFirstNonPHIIt();
380 // We may pick up ops that were previously converted for users in
381 // other blocks. If there is an originally typed definition of the Op
382 // already in this block, simply reuse it.
383 if (isa<Instruction>(Op) && !isa<PHINode>(Op) &&
384 U->getParent() == cast<Instruction>(Op)->getParent()) {
385 NewVal = Op;
386 } else {
387 NewVal =
388 convertFromOptType(Op->getType(), cast<Instruction>(ValMap[Op]),
389 InsertPt, U->getParent());
390 BBUseValMap[U->getParent()][ValMap[Op]] = NewVal;
391 }
392 }
393 assert(NewVal);
394 U->setOperand(OpIdx, NewVal);
395 }
396 }
397 }
398
399 return true;
400}
401
402bool AMDGPULateCodeGenPrepare::canWidenScalarExtLoad(LoadInst &LI) const {
403 unsigned AS = LI.getPointerAddressSpace();
404 // Skip non-constant address space.
405 if (AS != AMDGPUAS::CONSTANT_ADDRESS &&
407 return false;
408 // Skip non-simple loads.
409 if (!LI.isSimple())
410 return false;
411 Type *Ty = LI.getType();
412 // Skip aggregate types.
413 if (Ty->isAggregateType())
414 return false;
415 unsigned TySize = DL->getTypeStoreSize(Ty);
416 // Only handle sub-DWORD loads.
417 if (TySize >= 4)
418 return false;
419 // That load must be at least naturally aligned.
420 if (LI.getAlign() < DL->getABITypeAlign(Ty))
421 return false;
422 // It should be uniform, i.e. a scalar load.
423 return UA->isUniform(&LI);
424}
425
426bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) {
427 if (!WidenLoads)
428 return false;
429
430 // Skip if that load is already aligned on DWORD at least as it's handled in
431 // SDAG.
432 if (LI.getAlign() >= 4)
433 return false;
434
435 if (!canWidenScalarExtLoad(LI))
436 return false;
437
438 int64_t Offset = 0;
439 auto *Base =
441 // If that base is not DWORD aligned, it's not safe to perform the following
442 // transforms.
443 if (!isDWORDAligned(Base))
444 return false;
445
446 int64_t Adjust = Offset & 0x3;
447 if (Adjust == 0) {
448 // With a zero adjust, the original alignment could be promoted with a
449 // better one.
450 LI.setAlignment(Align(4));
451 return true;
452 }
453
454 IRBuilder<> IRB(&LI);
455 IRB.SetCurrentDebugLocation(LI.getDebugLoc());
456
457 unsigned LdBits = DL->getTypeStoreSizeInBits(LI.getType());
458 auto IntNTy = Type::getIntNTy(LI.getContext(), LdBits);
459
460 auto *NewPtr = IRB.CreateConstGEP1_64(
461 IRB.getInt8Ty(),
462 IRB.CreateAddrSpaceCast(Base, LI.getPointerOperand()->getType()),
463 Offset - Adjust);
464
465 LoadInst *NewLd = IRB.CreateAlignedLoad(IRB.getInt32Ty(), NewPtr, Align(4));
466 NewLd->copyMetadata(LI);
467 NewLd->setMetadata(LLVMContext::MD_range, nullptr);
468
469 unsigned ShAmt = Adjust * 8;
470 auto *NewVal = IRB.CreateBitCast(
471 IRB.CreateTrunc(IRB.CreateLShr(NewLd, ShAmt), IntNTy), LI.getType());
472 LI.replaceAllUsesWith(NewVal);
473 DeadInsts.emplace_back(&LI);
474
475 return true;
476}
477
480 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
481
484
485 AMDGPULateCodeGenPrepare Impl(*F.getParent(), ST, &AC, &UI);
486
487 bool Changed = Impl.run(F);
488
490 if (!Changed)
491 return PA;
493 return PA;
494}
495
497public:
498 static char ID;
499
501
502 StringRef getPassName() const override {
503 return "AMDGPU IR late optimizations";
504 }
505
506 void getAnalysisUsage(AnalysisUsage &AU) const override {
510 AU.setPreservesAll();
511 }
512
513 bool runOnFunction(Function &F) override;
514};
515
517 if (skipFunction(F))
518 return false;
519
520 const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
521 const TargetMachine &TM = TPC.getTM<TargetMachine>();
522 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
523
524 AssumptionCache &AC =
525 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
526 UniformityInfo &UI =
527 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
528
529 AMDGPULateCodeGenPrepare Impl(*F.getParent(), ST, &AC, &UI);
530
531 return Impl.run(F);
532}
533
535 "AMDGPU IR late optimizations", false, false)
541
543
546}
aarch64 falkor hwpf fix late
static cl::opt< bool > WidenLoads("amdgpu-late-codegenprepare-widen-constant-loads", cl::desc("Widen sub-dword constant address space loads in " "AMDGPULateCodeGenPrepare"), cl::ReallyHidden, cl::init(true))
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static bool runOnFunction(Function &F, bool PostInlining)
Rewrite Partial Register Uses
#define DEBUG_TYPE
Legalize the Machine IR a function s Machine IR
Definition: Legalizer.cpp:81
Generic memory optimizations
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
uint64_t IntrinsicInst * II
Module * Mod
FunctionAnalysisManager FAM
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target-Independent Code Generator Pass Configuration Options pass.
LLVM IR instance of the generic uniformity analysis.
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
PreservedAnalyses run(Function &, FunctionAnalysisManager &)
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:103
bool erase(const KeyT &Val)
Definition: DenseMap.h:336
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:146
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:539
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
bool isUniform(ConstValueRefT V) const
Whether V is uniform/non-divergent.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2674
Base class for instruction visitors.
Definition: InstVisitor.h:78
void visitInstruction(Instruction &I)
Definition: InstVisitor.h:280
RetTy visitLoadInst(LoadInst &I)
Definition: InstVisitor.h:169
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:466
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1642
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:266
An instruction for reading from memory.
Definition: Instructions.h:174
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:259
void setAlignment(Align Align)
Definition: Instructions.h:213
Value * getPointerOperand()
Definition: Instructions.h:253
bool isSimple() const
Definition: Instructions.h:245
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:209
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:299
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:291
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:436
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:368
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:442
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:503
bool empty() const
Definition: SmallVector.h:95
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:587
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:951
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Target-Independent Code Generator Pass Configuration Options.
TMC & getTM() const
Get the right type of TargetMachine for this target.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:291
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:224
Analysis pass which computes UniformityInfo.
Legacy analysis pass which computes a CycleInfo.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:641
Type * getElementType() const
Definition: DerivedTypes.h:436
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
const ParentTy * getParent() const
Definition: ilist_node.h:32
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ ReallyHidden
Definition: CommandLine.h:138
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2406
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
DWARFExpression::Operation Op
bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
Definition: Local.cpp:555
FunctionPass * createAMDGPULateCodeGenPrepareLegacyPass()
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:275
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:231