LLVM 19.0.0git
AMDGPUAttributor.cpp
Go to the documentation of this file.
1//===- AMDGPUAttributor.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPU.h"
14#include "GCNSubtarget.h"
18#include "llvm/IR/IntrinsicsAMDGPU.h"
19#include "llvm/IR/IntrinsicsR600.h"
22
23#define DEBUG_TYPE "amdgpu-attributor"
24
25namespace llvm {
27}
28
29using namespace llvm;
30
32 "amdgpu-kernarg-preload-count",
33 cl::desc("How many kernel arguments to preload onto SGPRs"), cl::init(0));
34
35#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
36
38 #include "AMDGPUAttributes.def"
40};
41
42#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
43
46 #include "AMDGPUAttributes.def"
48};
49
50#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
51static constexpr std::pair<ImplicitArgumentMask,
53 #include "AMDGPUAttributes.def"
54};
55
56// We do not need to note the x workitem or workgroup id because they are always
57// initialized.
58//
59// TODO: We should not add the attributes if the known compile time workgroup
60// size is 1 for y/z.
62intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
63 bool HasApertureRegs, bool SupportsGetDoorBellID,
64 unsigned CodeObjectVersion) {
65 switch (ID) {
66 case Intrinsic::amdgcn_workitem_id_x:
67 NonKernelOnly = true;
68 return WORKITEM_ID_X;
69 case Intrinsic::amdgcn_workgroup_id_x:
70 NonKernelOnly = true;
71 return WORKGROUP_ID_X;
72 case Intrinsic::amdgcn_workitem_id_y:
73 case Intrinsic::r600_read_tidig_y:
74 return WORKITEM_ID_Y;
75 case Intrinsic::amdgcn_workitem_id_z:
76 case Intrinsic::r600_read_tidig_z:
77 return WORKITEM_ID_Z;
78 case Intrinsic::amdgcn_workgroup_id_y:
79 case Intrinsic::r600_read_tgid_y:
80 return WORKGROUP_ID_Y;
81 case Intrinsic::amdgcn_workgroup_id_z:
82 case Intrinsic::r600_read_tgid_z:
83 return WORKGROUP_ID_Z;
84 case Intrinsic::amdgcn_lds_kernel_id:
85 return LDS_KERNEL_ID;
86 case Intrinsic::amdgcn_dispatch_ptr:
87 return DISPATCH_PTR;
88 case Intrinsic::amdgcn_dispatch_id:
89 return DISPATCH_ID;
90 case Intrinsic::amdgcn_implicitarg_ptr:
91 return IMPLICIT_ARG_PTR;
92 // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access
93 // queue_ptr.
94 case Intrinsic::amdgcn_queue_ptr:
95 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
96 return QUEUE_PTR;
97 case Intrinsic::amdgcn_is_shared:
98 case Intrinsic::amdgcn_is_private:
99 if (HasApertureRegs)
100 return NOT_IMPLICIT_INPUT;
101 // Under V5, we need implicitarg_ptr + offsets to access private_base or
102 // shared_base. For pre-V5, however, need to access them through queue_ptr +
103 // offsets.
104 return CodeObjectVersion >= AMDGPU::AMDHSA_COV5 ? IMPLICIT_ARG_PTR :
105 QUEUE_PTR;
106 case Intrinsic::trap:
107 if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.
108 return CodeObjectVersion >= AMDGPU::AMDHSA_COV4 ? NOT_IMPLICIT_INPUT :
109 QUEUE_PTR;
110 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
111 return QUEUE_PTR;
112 default:
113 return NOT_IMPLICIT_INPUT;
114 }
115}
116
117static bool castRequiresQueuePtr(unsigned SrcAS) {
118 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
119}
120
121static bool isDSAddress(const Constant *C) {
122 const GlobalValue *GV = dyn_cast<GlobalValue>(C);
123 if (!GV)
124 return false;
125 unsigned AS = GV->getAddressSpace();
127}
128
129/// Returns true if the function requires the implicit argument be passed
130/// regardless of the function contents.
131static bool funcRequiresHostcallPtr(const Function &F) {
132 // Sanitizers require the hostcall buffer passed in the implicit arguments.
133 return F.hasFnAttribute(Attribute::SanitizeAddress) ||
134 F.hasFnAttribute(Attribute::SanitizeThread) ||
135 F.hasFnAttribute(Attribute::SanitizeMemory) ||
136 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
137 F.hasFnAttribute(Attribute::SanitizeMemTag);
138}
139
140namespace {
141class AMDGPUInformationCache : public InformationCache {
142public:
143 AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
144 BumpPtrAllocator &Allocator,
147 CodeObjectVersion(AMDGPU::getAMDHSACodeObjectVersion(M)) {}
148
150
151 enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
152
153 /// Check if the subtarget has aperture regs.
154 bool hasApertureRegs(Function &F) {
155 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
156 return ST.hasApertureRegs();
157 }
158
159 /// Check if the subtarget supports GetDoorbellID.
160 bool supportsGetDoorbellID(Function &F) {
161 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
162 return ST.supportsGetDoorbellID();
163 }
164
165 std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {
166 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
167 return ST.getFlatWorkGroupSizes(F);
168 }
169
170 std::pair<unsigned, unsigned>
171 getMaximumFlatWorkGroupRange(const Function &F) {
172 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
173 return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
174 }
175
176 /// Get code object version.
177 unsigned getCodeObjectVersion() const {
178 return CodeObjectVersion;
179 }
180
181 /// Get the effective value of "amdgpu-waves-per-eu" for the function,
182 /// accounting for the interaction with the passed value to use for
183 /// "amdgpu-flat-work-group-size".
184 std::pair<unsigned, unsigned>
185 getWavesPerEU(const Function &F,
186 std::pair<unsigned, unsigned> FlatWorkGroupSize) {
187 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
188 return ST.getWavesPerEU(F, FlatWorkGroupSize);
189 }
190
191 std::pair<unsigned, unsigned>
192 getEffectiveWavesPerEU(const Function &F,
193 std::pair<unsigned, unsigned> WavesPerEU,
194 std::pair<unsigned, unsigned> FlatWorkGroupSize) {
195 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
196 return ST.getEffectiveWavesPerEU(WavesPerEU, FlatWorkGroupSize);
197 }
198
199 unsigned getMaxWavesPerEU(const Function &F) {
200 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
201 return ST.getMaxWavesPerEU();
202 }
203
204private:
205 /// Check if the ConstantExpr \p CE requires the queue pointer.
206 static bool visitConstExpr(const ConstantExpr *CE) {
207 if (CE->getOpcode() == Instruction::AddrSpaceCast) {
208 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
209 return castRequiresQueuePtr(SrcAS);
210 }
211 return false;
212 }
213
214 /// Get the constant access bitmap for \p C.
215 uint8_t getConstantAccess(const Constant *C,
217 auto It = ConstantStatus.find(C);
218 if (It != ConstantStatus.end())
219 return It->second;
220
221 uint8_t Result = 0;
222 if (isDSAddress(C))
223 Result = DS_GLOBAL;
224
225 if (const auto *CE = dyn_cast<ConstantExpr>(C))
226 if (visitConstExpr(CE))
227 Result |= ADDR_SPACE_CAST;
228
229 for (const Use &U : C->operands()) {
230 const auto *OpC = dyn_cast<Constant>(U);
231 if (!OpC || !Visited.insert(OpC).second)
232 continue;
233
234 Result |= getConstantAccess(OpC, Visited);
235 }
236 return Result;
237 }
238
239public:
240 /// Returns true if \p Fn needs the queue pointer because of \p C.
241 bool needsQueuePtr(const Constant *C, Function &Fn) {
242 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
243 bool HasAperture = hasApertureRegs(Fn);
244
245 // No need to explore the constants.
246 if (!IsNonEntryFunc && HasAperture)
247 return false;
248
250 uint8_t Access = getConstantAccess(C, Visited);
251
252 // We need to trap on DS globals in non-entry functions.
253 if (IsNonEntryFunc && (Access & DS_GLOBAL))
254 return true;
255
256 return !HasAperture && (Access & ADDR_SPACE_CAST);
257 }
258
259private:
260 /// Used to determine if the Constant needs the queue pointer.
262 const unsigned CodeObjectVersion;
263};
264
265struct AAAMDAttributes
266 : public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
267 AbstractAttribute> {
270
271 AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
272
273 /// Create an abstract attribute view for the position \p IRP.
274 static AAAMDAttributes &createForPosition(const IRPosition &IRP,
275 Attributor &A);
276
277 /// See AbstractAttribute::getName().
278 const std::string getName() const override { return "AAAMDAttributes"; }
279
280 /// See AbstractAttribute::getIdAddr().
281 const char *getIdAddr() const override { return &ID; }
282
283 /// This function should return true if the type of the \p AA is
284 /// AAAMDAttributes.
285 static bool classof(const AbstractAttribute *AA) {
286 return (AA->getIdAddr() == &ID);
287 }
288
289 /// Unique ID (due to the unique address)
290 static const char ID;
291};
292const char AAAMDAttributes::ID = 0;
293
294struct AAUniformWorkGroupSize
295 : public StateWrapper<BooleanState, AbstractAttribute> {
297 AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
298
299 /// Create an abstract attribute view for the position \p IRP.
300 static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
301 Attributor &A);
302
303 /// See AbstractAttribute::getName().
304 const std::string getName() const override {
305 return "AAUniformWorkGroupSize";
306 }
307
308 /// See AbstractAttribute::getIdAddr().
309 const char *getIdAddr() const override { return &ID; }
310
311 /// This function should return true if the type of the \p AA is
312 /// AAAMDAttributes.
313 static bool classof(const AbstractAttribute *AA) {
314 return (AA->getIdAddr() == &ID);
315 }
316
317 /// Unique ID (due to the unique address)
318 static const char ID;
319};
320const char AAUniformWorkGroupSize::ID = 0;
321
322struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
323 AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
324 : AAUniformWorkGroupSize(IRP, A) {}
325
326 void initialize(Attributor &A) override {
327 Function *F = getAssociatedFunction();
328 CallingConv::ID CC = F->getCallingConv();
329
331 return;
332
333 bool InitialValue = false;
334 if (F->hasFnAttribute("uniform-work-group-size"))
335 InitialValue =
336 F->getFnAttribute("uniform-work-group-size").getValueAsString() ==
337 "true";
338
339 if (InitialValue)
340 indicateOptimisticFixpoint();
341 else
342 indicatePessimisticFixpoint();
343 }
344
345 ChangeStatus updateImpl(Attributor &A) override {
346 ChangeStatus Change = ChangeStatus::UNCHANGED;
347
348 auto CheckCallSite = [&](AbstractCallSite CS) {
349 Function *Caller = CS.getInstruction()->getFunction();
350 LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
351 << "->" << getAssociatedFunction()->getName() << "\n");
352
353 const auto *CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
354 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
355 if (!CallerInfo)
356 return false;
357
358 Change = Change | clampStateAndIndicateChange(this->getState(),
359 CallerInfo->getState());
360
361 return true;
362 };
363
364 bool AllCallSitesKnown = true;
365 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
366 return indicatePessimisticFixpoint();
367
368 return Change;
369 }
370
371 ChangeStatus manifest(Attributor &A) override {
373 LLVMContext &Ctx = getAssociatedFunction()->getContext();
374
375 AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
376 getAssumed() ? "true" : "false"));
377 return A.manifestAttrs(getIRPosition(), AttrList,
378 /* ForceReplace */ true);
379 }
380
381 bool isValidState() const override {
382 // This state is always valid, even when the state is false.
383 return true;
384 }
385
386 const std::string getAsStr(Attributor *) const override {
387 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
388 }
389
390 /// See AbstractAttribute::trackStatistics()
391 void trackStatistics() const override {}
392};
393
394AAUniformWorkGroupSize &
395AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
396 Attributor &A) {
398 return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
400 "AAUniformWorkGroupSize is only valid for function position");
401}
402
403struct AAAMDAttributesFunction : public AAAMDAttributes {
404 AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
405 : AAAMDAttributes(IRP, A) {}
406
407 void initialize(Attributor &A) override {
408 Function *F = getAssociatedFunction();
409
410 // If the function requires the implicit arg pointer due to sanitizers,
411 // assume it's needed even if explicitly marked as not requiring it.
412 const bool NeedsHostcall = funcRequiresHostcallPtr(*F);
413 if (NeedsHostcall) {
414 removeAssumedBits(IMPLICIT_ARG_PTR);
415 removeAssumedBits(HOSTCALL_PTR);
416 }
417
418 for (auto Attr : ImplicitAttrs) {
419 if (NeedsHostcall &&
420 (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
421 continue;
422
423 if (F->hasFnAttribute(Attr.second))
424 addKnownBits(Attr.first);
425 }
426
427 if (F->isDeclaration())
428 return;
429
430 // Ignore functions with graphics calling conventions, these are currently
431 // not allowed to have kernel arguments.
432 if (AMDGPU::isGraphics(F->getCallingConv())) {
433 indicatePessimisticFixpoint();
434 return;
435 }
436 }
437
438 ChangeStatus updateImpl(Attributor &A) override {
439 Function *F = getAssociatedFunction();
440 // The current assumed state used to determine a change.
441 auto OrigAssumed = getAssumed();
442
443 // Check for Intrinsics and propagate attributes.
444 const AACallEdges *AAEdges = A.getAAFor<AACallEdges>(
445 *this, this->getIRPosition(), DepClassTy::REQUIRED);
446 if (!AAEdges || AAEdges->hasNonAsmUnknownCallee())
447 return indicatePessimisticFixpoint();
448
449 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
450
451 bool NeedsImplicit = false;
452 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
453 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
454 bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F);
455 unsigned COV = InfoCache.getCodeObjectVersion();
456
457 for (Function *Callee : AAEdges->getOptimisticEdges()) {
458 Intrinsic::ID IID = Callee->getIntrinsicID();
459 if (IID == Intrinsic::not_intrinsic) {
460 const AAAMDAttributes *AAAMD = A.getAAFor<AAAMDAttributes>(
461 *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
462 if (!AAAMD)
463 return indicatePessimisticFixpoint();
464 *this &= *AAAMD;
465 continue;
466 }
467
468 bool NonKernelOnly = false;
469 ImplicitArgumentMask AttrMask =
470 intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
471 HasApertureRegs, SupportsGetDoorbellID, COV);
472 if (AttrMask != NOT_IMPLICIT_INPUT) {
473 if ((IsNonEntryFunc || !NonKernelOnly))
474 removeAssumedBits(AttrMask);
475 }
476 }
477
478 // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base.
479 if (NeedsImplicit)
480 removeAssumedBits(IMPLICIT_ARG_PTR);
481
482 if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {
483 // Under V5, we need implicitarg_ptr + offsets to access private_base or
484 // shared_base. We do not actually need queue_ptr.
485 if (COV >= 5)
486 removeAssumedBits(IMPLICIT_ARG_PTR);
487 else
488 removeAssumedBits(QUEUE_PTR);
489 }
490
491 if (funcRetrievesMultigridSyncArg(A, COV)) {
492 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
493 "multigrid_sync_arg needs implicitarg_ptr");
494 removeAssumedBits(MULTIGRID_SYNC_ARG);
495 }
496
497 if (funcRetrievesHostcallPtr(A, COV)) {
498 assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");
499 removeAssumedBits(HOSTCALL_PTR);
500 }
501
502 if (funcRetrievesHeapPtr(A, COV)) {
503 assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");
504 removeAssumedBits(HEAP_PTR);
505 }
506
507 if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A, COV)) {
508 assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");
509 removeAssumedBits(QUEUE_PTR);
510 }
511
512 if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) {
513 removeAssumedBits(LDS_KERNEL_ID);
514 }
515
516 if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A, COV))
517 removeAssumedBits(DEFAULT_QUEUE);
518
519 if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV))
520 removeAssumedBits(COMPLETION_ACTION);
521
522 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
523 : ChangeStatus::UNCHANGED;
524 }
525
526 ChangeStatus manifest(Attributor &A) override {
528 LLVMContext &Ctx = getAssociatedFunction()->getContext();
529
530 for (auto Attr : ImplicitAttrs) {
531 if (isKnown(Attr.first))
532 AttrList.push_back(Attribute::get(Ctx, Attr.second));
533 }
534
535 return A.manifestAttrs(getIRPosition(), AttrList,
536 /* ForceReplace */ true);
537 }
538
539 const std::string getAsStr(Attributor *) const override {
540 std::string Str;
542 OS << "AMDInfo[";
543 for (auto Attr : ImplicitAttrs)
544 if (isAssumed(Attr.first))
545 OS << ' ' << Attr.second;
546 OS << " ]";
547 return OS.str();
548 }
549
550 /// See AbstractAttribute::trackStatistics()
551 void trackStatistics() const override {}
552
553private:
554 bool checkForQueuePtr(Attributor &A) {
555 Function *F = getAssociatedFunction();
556 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
557
558 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
559
560 bool NeedsQueuePtr = false;
561
562 auto CheckAddrSpaceCasts = [&](Instruction &I) {
563 unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
564 if (castRequiresQueuePtr(SrcAS)) {
565 NeedsQueuePtr = true;
566 return false;
567 }
568 return true;
569 };
570
571 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
572
573 // `checkForAllInstructions` is much more cheaper than going through all
574 // instructions, try it first.
575
576 // The queue pointer is not needed if aperture regs is present.
577 if (!HasApertureRegs) {
578 bool UsedAssumedInformation = false;
579 A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
580 {Instruction::AddrSpaceCast},
581 UsedAssumedInformation);
582 }
583
584 // If we found that we need the queue pointer, nothing else to do.
585 if (NeedsQueuePtr)
586 return true;
587
588 if (!IsNonEntryFunc && HasApertureRegs)
589 return false;
590
591 for (BasicBlock &BB : *F) {
592 for (Instruction &I : BB) {
593 for (const Use &U : I.operands()) {
594 if (const auto *C = dyn_cast<Constant>(U)) {
595 if (InfoCache.needsQueuePtr(C, *F))
596 return true;
597 }
598 }
599 }
600 }
601
602 return false;
603 }
604
605 bool funcRetrievesMultigridSyncArg(Attributor &A, unsigned COV) {
607 AA::RangeTy Range(Pos, 8);
608 return funcRetrievesImplicitKernelArg(A, Range);
609 }
610
611 bool funcRetrievesHostcallPtr(Attributor &A, unsigned COV) {
613 AA::RangeTy Range(Pos, 8);
614 return funcRetrievesImplicitKernelArg(A, Range);
615 }
616
617 bool funcRetrievesDefaultQueue(Attributor &A, unsigned COV) {
619 AA::RangeTy Range(Pos, 8);
620 return funcRetrievesImplicitKernelArg(A, Range);
621 }
622
623 bool funcRetrievesCompletionAction(Attributor &A, unsigned COV) {
625 AA::RangeTy Range(Pos, 8);
626 return funcRetrievesImplicitKernelArg(A, Range);
627 }
628
629 bool funcRetrievesHeapPtr(Attributor &A, unsigned COV) {
630 if (COV < 5)
631 return false;
633 return funcRetrievesImplicitKernelArg(A, Range);
634 }
635
636 bool funcRetrievesQueuePtr(Attributor &A, unsigned COV) {
637 if (COV < 5)
638 return false;
640 return funcRetrievesImplicitKernelArg(A, Range);
641 }
642
643 bool funcRetrievesImplicitKernelArg(Attributor &A, AA::RangeTy Range) {
644 // Check if this is a call to the implicitarg_ptr builtin and it
645 // is used to retrieve the hostcall pointer. The implicit arg for
646 // hostcall is not used only if every use of the implicitarg_ptr
647 // is a load that clearly does not retrieve any byte of the
648 // hostcall pointer. We check this by tracing all the uses of the
649 // initial call to the implicitarg_ptr intrinsic.
650 auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {
651 auto &Call = cast<CallBase>(I);
652 if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
653 return true;
654
655 const auto *PointerInfoAA = A.getAAFor<AAPointerInfo>(
656 *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED);
657 if (!PointerInfoAA)
658 return false;
659
660 return PointerInfoAA->forallInterferingAccesses(
661 Range, [](const AAPointerInfo::Access &Acc, bool IsExact) {
662 return Acc.getRemoteInst()->isDroppable();
663 });
664 };
665
666 bool UsedAssumedInformation = false;
667 return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,
668 UsedAssumedInformation);
669 }
670
671 bool funcRetrievesLDSKernelId(Attributor &A) {
672 auto DoesNotRetrieve = [&](Instruction &I) {
673 auto &Call = cast<CallBase>(I);
674 return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;
675 };
676 bool UsedAssumedInformation = false;
677 return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,
678 UsedAssumedInformation);
679 }
680};
681
682AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
683 Attributor &A) {
685 return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
686 llvm_unreachable("AAAMDAttributes is only valid for function position");
687}
688
689/// Base class to derive different size ranges.
690struct AAAMDSizeRangeAttribute
691 : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
693
694 StringRef AttrName;
695
696 AAAMDSizeRangeAttribute(const IRPosition &IRP, Attributor &A,
697 StringRef AttrName)
698 : Base(IRP, 32), AttrName(AttrName) {}
699
700 /// See AbstractAttribute::trackStatistics()
701 void trackStatistics() const override {}
702
703 template <class AttributeImpl>
704 ChangeStatus updateImplImpl(Attributor &A) {
706
707 auto CheckCallSite = [&](AbstractCallSite CS) {
708 Function *Caller = CS.getInstruction()->getFunction();
709 LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
710 << "->" << getAssociatedFunction()->getName() << '\n');
711
712 const auto *CallerInfo = A.getAAFor<AttributeImpl>(
714 if (!CallerInfo)
715 return false;
716
717 Change |=
718 clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
719
720 return true;
721 };
722
723 bool AllCallSitesKnown = true;
724 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
725 return indicatePessimisticFixpoint();
726
727 return Change;
728 }
729
730 ChangeStatus emitAttributeIfNotDefault(Attributor &A, unsigned Min,
731 unsigned Max) {
732 // Don't add the attribute if it's the implied default.
733 if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
735
736 Function *F = getAssociatedFunction();
737 LLVMContext &Ctx = F->getContext();
738 SmallString<10> Buffer;
739 raw_svector_ostream OS(Buffer);
740 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
741 return A.manifestAttrs(getIRPosition(),
742 {Attribute::get(Ctx, AttrName, OS.str())},
743 /* ForceReplace */ true);
744 }
745
746 const std::string getAsStr(Attributor *) const override {
747 std::string Str;
749 OS << getName() << '[';
750 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
751 OS << ']';
752 return OS.str();
753 }
754};
755
756/// Propagate amdgpu-flat-work-group-size attribute.
757struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
758 AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
759 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-flat-work-group-size") {}
760
761 void initialize(Attributor &A) override {
762 Function *F = getAssociatedFunction();
763 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
764 unsigned MinGroupSize, MaxGroupSize;
765 std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);
766 intersectKnown(
767 ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));
768
769 if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
770 indicatePessimisticFixpoint();
771 }
772
773 ChangeStatus updateImpl(Attributor &A) override {
774 return updateImplImpl<AAAMDFlatWorkGroupSize>(A);
775 }
776
777 /// Create an abstract attribute view for the position \p IRP.
778 static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
779 Attributor &A);
780
781 ChangeStatus manifest(Attributor &A) override {
782 Function *F = getAssociatedFunction();
783 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
784 unsigned Min, Max;
785 std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F);
786 return emitAttributeIfNotDefault(A, Min, Max);
787 }
788
789 /// See AbstractAttribute::getName()
790 const std::string getName() const override {
791 return "AAAMDFlatWorkGroupSize";
792 }
793
794 /// See AbstractAttribute::getIdAddr()
795 const char *getIdAddr() const override { return &ID; }
796
797 /// This function should return true if the type of the \p AA is
798 /// AAAMDFlatWorkGroupSize
799 static bool classof(const AbstractAttribute *AA) {
800 return (AA->getIdAddr() == &ID);
801 }
802
803 /// Unique ID (due to the unique address)
804 static const char ID;
805};
806
807const char AAAMDFlatWorkGroupSize::ID = 0;
808
809AAAMDFlatWorkGroupSize &
810AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
811 Attributor &A) {
813 return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
815 "AAAMDFlatWorkGroupSize is only valid for function position");
816}
817
818/// Propagate amdgpu-waves-per-eu attribute.
819struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
820 AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)
821 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {}
822
823 bool isValidState() const override {
824 return !Assumed.isEmptySet() && IntegerRangeState::isValidState();
825 }
826
827 void initialize(Attributor &A) override {
828 Function *F = getAssociatedFunction();
829 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
830
831 if (const auto *AssumedGroupSize = A.getAAFor<AAAMDFlatWorkGroupSize>(
832 *this, IRPosition::function(*F), DepClassTy::REQUIRED)) {
833
834 unsigned Min, Max;
835 std::tie(Min, Max) = InfoCache.getWavesPerEU(
836 *F, {AssumedGroupSize->getAssumed().getLower().getZExtValue(),
837 AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});
838
839 ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
840 intersectKnown(Range);
841 }
842
843 if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
844 indicatePessimisticFixpoint();
845 }
846
847 ChangeStatus updateImpl(Attributor &A) override {
848 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
849 ChangeStatus Change = ChangeStatus::UNCHANGED;
850
851 auto CheckCallSite = [&](AbstractCallSite CS) {
852 Function *Caller = CS.getInstruction()->getFunction();
853 Function *Func = getAssociatedFunction();
854 LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
855 << "->" << Func->getName() << '\n');
856
857 const auto *CallerInfo = A.getAAFor<AAAMDWavesPerEU>(
858 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
859 const auto *AssumedGroupSize = A.getAAFor<AAAMDFlatWorkGroupSize>(
860 *this, IRPosition::function(*Func), DepClassTy::REQUIRED);
861 if (!CallerInfo || !AssumedGroupSize)
862 return false;
863
864 unsigned Min, Max;
865 std::tie(Min, Max) = InfoCache.getEffectiveWavesPerEU(
866 *Caller,
867 {CallerInfo->getAssumed().getLower().getZExtValue(),
868 CallerInfo->getAssumed().getUpper().getZExtValue() - 1},
869 {AssumedGroupSize->getAssumed().getLower().getZExtValue(),
870 AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});
871 ConstantRange CallerRange(APInt(32, Min), APInt(32, Max + 1));
872 IntegerRangeState CallerRangeState(CallerRange);
873 Change |= clampStateAndIndicateChange(this->getState(), CallerRangeState);
874
875 return true;
876 };
877
878 bool AllCallSitesKnown = true;
879 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
880 return indicatePessimisticFixpoint();
881
882 return Change;
883 }
884
885 /// Create an abstract attribute view for the position \p IRP.
886 static AAAMDWavesPerEU &createForPosition(const IRPosition &IRP,
887 Attributor &A);
888
889 ChangeStatus manifest(Attributor &A) override {
890 Function *F = getAssociatedFunction();
891 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
892 unsigned Max = InfoCache.getMaxWavesPerEU(*F);
893 return emitAttributeIfNotDefault(A, 1, Max);
894 }
895
896 /// See AbstractAttribute::getName()
897 const std::string getName() const override { return "AAAMDWavesPerEU"; }
898
899 /// See AbstractAttribute::getIdAddr()
900 const char *getIdAddr() const override { return &ID; }
901
902 /// This function should return true if the type of the \p AA is
903 /// AAAMDWavesPerEU
904 static bool classof(const AbstractAttribute *AA) {
905 return (AA->getIdAddr() == &ID);
906 }
907
908 /// Unique ID (due to the unique address)
909 static const char ID;
910};
911
912const char AAAMDWavesPerEU::ID = 0;
913
914AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
915 Attributor &A) {
917 return *new (A.Allocator) AAAMDWavesPerEU(IRP, A);
918 llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
919}
920
921static bool inlineAsmUsesAGPRs(const InlineAsm *IA) {
922 for (const auto &CI : IA->ParseConstraints()) {
923 for (StringRef Code : CI.Codes) {
924 Code.consume_front("{");
925 if (Code.starts_with("a"))
926 return true;
927 }
928 }
929
930 return false;
931}
932
933struct AAAMDGPUNoAGPR
934 : public IRAttribute<Attribute::NoUnwind,
935 StateWrapper<BooleanState, AbstractAttribute>,
936 AAAMDGPUNoAGPR> {
937 AAAMDGPUNoAGPR(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
938
939 static AAAMDGPUNoAGPR &createForPosition(const IRPosition &IRP,
940 Attributor &A) {
942 return *new (A.Allocator) AAAMDGPUNoAGPR(IRP, A);
943 llvm_unreachable("AAAMDGPUNoAGPR is only valid for function position");
944 }
945
946 void initialize(Attributor &A) override {
947 Function *F = getAssociatedFunction();
948 if (F->hasFnAttribute("amdgpu-no-agpr"))
949 indicateOptimisticFixpoint();
950 }
951
952 const std::string getAsStr(Attributor *A) const override {
953 return getAssumed() ? "amdgpu-no-agpr" : "amdgpu-maybe-agpr";
954 }
955
956 void trackStatistics() const override {}
957
958 ChangeStatus updateImpl(Attributor &A) override {
959 // TODO: Use AACallEdges, but then we need a way to inspect asm edges.
960
961 auto CheckForNoAGPRs = [&](Instruction &I) {
962 const auto &CB = cast<CallBase>(I);
963 const Value *CalleeOp = CB.getCalledOperand();
964 const Function *Callee = dyn_cast<Function>(CalleeOp);
965 if (!Callee) {
966 if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp))
967 return !inlineAsmUsesAGPRs(IA);
968 return false;
969 }
970
971 // Some intrinsics may use AGPRs, but if we have a choice, we are not
972 // required to use AGPRs.
973 if (Callee->isIntrinsic())
974 return true;
975
976 // TODO: Handle callsite attributes
977 const auto *CalleeInfo = A.getAAFor<AAAMDGPUNoAGPR>(
979 return CalleeInfo && CalleeInfo->getAssumed();
980 };
981
982 bool UsedAssumedInformation = false;
983 if (!A.checkForAllCallLikeInstructions(CheckForNoAGPRs, *this,
984 UsedAssumedInformation))
985 return indicatePessimisticFixpoint();
987 }
988
989 ChangeStatus manifest(Attributor &A) override {
990 if (!getAssumed())
992 LLVMContext &Ctx = getAssociatedFunction()->getContext();
993 return A.manifestAttrs(getIRPosition(),
994 {Attribute::get(Ctx, "amdgpu-no-agpr")});
995 }
996
997 const std::string getName() const override { return "AAAMDGPUNoAGPR"; }
998 const char *getIdAddr() const override { return &ID; }
999
1000 /// This function should return true if the type of the \p AA is
1001 /// AAAMDGPUNoAGPRs
1002 static bool classof(const AbstractAttribute *AA) {
1003 return (AA->getIdAddr() == &ID);
1004 }
1005
1006 static const char ID;
1007};
1008
1009const char AAAMDGPUNoAGPR::ID = 0;
1010
1011static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
1012 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
1013 for (unsigned I = 0;
1014 I < F.arg_size() &&
1015 I < std::min(KernargPreloadCount.getValue(), ST.getMaxNumUserSGPRs());
1016 ++I) {
1017 Argument &Arg = *F.getArg(I);
1018 // Check for incompatible attributes.
1019 if (Arg.hasByRefAttr() || Arg.hasNestAttr())
1020 break;
1021
1022 Arg.addAttr(Attribute::InReg);
1023 }
1024}
1025
1026static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) {
1027 SetVector<Function *> Functions;
1028 for (Function &F : M) {
1029 if (!F.isIntrinsic())
1030 Functions.insert(&F);
1031 }
1032
1033 CallGraphUpdater CGUpdater;
1035 AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, TM);
1037 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
1038 &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
1039 &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID,
1042
1043 AttributorConfig AC(CGUpdater);
1044 AC.Allowed = &Allowed;
1045 AC.IsModulePass = true;
1046 AC.DefaultInitializeLiveInternals = false;
1047 AC.IPOAmendableCB = [](const Function &F) {
1048 return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
1049 };
1050
1051 Attributor A(Functions, InfoCache, AC);
1052
1053 for (Function &F : M) {
1054 if (!F.isIntrinsic()) {
1055 A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
1056 A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
1057 A.getOrCreateAAFor<AAAMDGPUNoAGPR>(IRPosition::function(F));
1058 CallingConv::ID CC = F.getCallingConv();
1060 A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
1061 A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(F));
1062 } else if (CC == CallingConv::AMDGPU_KERNEL) {
1063 addPreloadKernArgHint(F, TM);
1064 }
1065 }
1066 }
1067
1068 ChangeStatus Change = A.run();
1069 return Change == ChangeStatus::CHANGED;
1070}
1071
1072class AMDGPUAttributorLegacy : public ModulePass {
1073public:
1074 AMDGPUAttributorLegacy() : ModulePass(ID) {}
1075
1076 /// doInitialization - Virtual method overridden by subclasses to do
1077 /// any necessary initialization before any pass is run.
1078 bool doInitialization(Module &) override {
1079 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
1080 if (!TPC)
1081 report_fatal_error("TargetMachine is required");
1082
1083 TM = &TPC->getTM<TargetMachine>();
1084 return false;
1085 }
1086
1087 bool runOnModule(Module &M) override {
1088 AnalysisGetter AG(this);
1089 return runImpl(M, AG, *TM);
1090 }
1091
1092 void getAnalysisUsage(AnalysisUsage &AU) const override {
1094 }
1095
1096 StringRef getPassName() const override { return "AMDGPU Attributor"; }
1098 static char ID;
1099};
1100} // namespace
1101
1104
1107 AnalysisGetter AG(FAM);
1108
1109 // TODO: Probably preserves CFG
1110 return runImpl(M, AG, TM) ? PreservedAnalyses::none()
1112}
1113
1114char AMDGPUAttributorLegacy::ID = 0;
1115
1117 return new AMDGPUAttributorLegacy();
1118}
1119INITIALIZE_PASS_BEGIN(AMDGPUAttributorLegacy, DEBUG_TYPE, "AMDGPU Attributor",
1120 false, false)
1122INITIALIZE_PASS_END(AMDGPUAttributorLegacy, DEBUG_TYPE, "AMDGPU Attributor",
1123 false, false)
static cl::opt< unsigned > KernargPreloadCount("amdgpu-kernarg-preload-count", cl::desc("How many kernel arguments to preload onto SGPRs"), cl::init(0))
static bool isDSAddress(const Constant *C)
static constexpr std::pair< ImplicitArgumentMask, StringLiteral > ImplicitAttrs[]
static ImplicitArgumentMask intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, bool HasApertureRegs, bool SupportsGetDoorBellID, unsigned CodeObjectVersion)
ImplicitArgumentMask
@ NOT_IMPLICIT_INPUT
@ ALL_ARGUMENT_MASK
static bool funcRequiresHostcallPtr(const Function &F)
Returns true if the function requires the implicit argument be passed regardless of the function cont...
ImplicitArgumentPositions
@ LAST_ARG_POS
static bool castRequiresQueuePtr(unsigned SrcAS)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file declares an analysis pass that computes CycleInfo for LLVM IR, specialized from GenericCycl...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static bool runImpl(Function &F, const TargetLowering &TLI)
AMD GCN specific subclass of TargetSubtarget.
#define DEBUG_TYPE
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
FunctionAnalysisManager FAM
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
static StringRef getName(Value *V)
Basic Register Allocator
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Target-Independent Code Generator Pass Configuration Options pass.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Class for arbitrary precision integers.
Definition: APInt.h:77
AbstractCallSite.
This class represents a conversion between pointers from one address space to another.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:321
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:473
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
bool hasByRefAttr() const
Return true if this argument has the byref attribute.
Definition: Function.cpp:146
void addAttr(Attribute::AttrKind Kind)
Definition: Function.cpp:328
bool hasNestAttr() const
Return true if this argument has the nest attribute.
Definition: Function.cpp:275
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:93
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:66
Wrapper to unify "old style" CallGraph and "new style" LazyCallGraph.
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1084
This class represents a range of values.
Definition: ConstantRange.h:47
This is an important base class in LLVM.
Definition: Constant.h:41
Legacy analysis pass which computes a CycleInfo.
Definition: CycleAnalysis.h:26
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:264
unsigned getAddressSpace() const
Definition: GlobalValue.h:204
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:631
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:251
virtual bool runOnModule(Module &M)=0
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:37
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
virtual bool doInitialization(Module &)
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
Definition: Pass.h:119
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
A vector that has set insertion semantics.
Definition: SetVector.h:57
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:846
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
Definition: User.cpp:115
LLVM Value Representation.
Definition: Value.h:74
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:691
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
bool isEntryFunctionCC(CallingConv::ID CC)
unsigned getAMDHSACodeObjectVersion(const Module &M)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
bool isGraphics(CallingConv::ID cc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:200
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
NodeAddr< CodeNode * > Code
Definition: RDFGraph.h:388
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void initializeCycleInfoWrapperPassPass(PassRegistry &)
@ CGSCC
Definition: Attributor.h:6426
Pass * createAMDGPUAttributorLegacyPass()
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:159
ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R)
Helper function to clamp a state S of type StateType with the information in R and indicate/return if...
Definition: Attributor.h:3462
ChangeStatus
{
Definition: Attributor.h:483
@ REQUIRED
The target cannot be valid if the source is not.
An abstract state for querying live call edges.
Definition: Attributor.h:5485
virtual const SetVector< Function * > & getOptimisticEdges() const =0
Get the optimistic edges.
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:5528
virtual bool hasNonAsmUnknownCallee() const =0
Is there any call with a unknown callee, excluding any inline asm.
An access description.
Definition: Attributor.h:5939
Instruction * getRemoteInst() const
Return the actual instruction that causes the access.
Definition: Attributor.h:6039
An abstract interface for struct information.
Definition: Attributor.h:5753
virtual bool forallInterferingAccesses(AA::RangeTy Range, function_ref< bool(const Access &, bool)> CB) const =0
Call CB on all accesses that might interfere with Range and return true if all such accesses were kno...
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:6153
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:5302
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:5339
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:6223
Helper to represent an access offset and size, with logic to deal with uncertainty and check for over...
Definition: Attributor.h:236
Base struct for all "concrete attribute" deductions.
Definition: Attributor.h:3282
virtual const char * getIdAddr() const =0
This function should return the address of the ID of the AbstractAttribute.
Wrapper for FunctionAnalysisManager.
Definition: Attributor.h:1121
Configuration for the Attributor.
Definition: Attributor.h:1413
The fixpoint analysis framework that orchestrates the attribute deduction.
Definition: Attributor.h:1507
Class to accumulate and hold information about a callee.
Helper class that provides common functionality to manifest IR attributes.
Definition: Attributor.h:3187
ChangeStatus manifest(Attributor &A) override
See AbstractAttribute::manifest(...).
Definition: Attributor.h:3219
Helper to describe and deal with positions in the LLVM-IR.
Definition: Attributor.h:580
static const IRPosition callsite_returned(const CallBase &CB)
Create a position describing the returned value of CB.
Definition: Attributor.h:648
@ IRP_FUNCTION
An attribute for a function (scope).
Definition: Attributor.h:592
static const IRPosition function(const Function &F, const CallBaseContext *CBContext=nullptr)
Create a position describing the function scope of F.
Definition: Attributor.h:623
Kind getPositionKind() const
Return the associated position kind.
Definition: Attributor.h:876
Data structure to hold cached (LLVM-IR) information.
Definition: Attributor.h:1197
State for an integer range.
Definition: Attributor.h:2928
bool isValidState() const override
See AbstractState::isValidState()
Definition: Attributor.h:2964
Helper to tie a abstract state implementation to an abstract attribute.
Definition: Attributor.h:3171
StateType & getState() override
See AbstractAttribute::getState(...).
Definition: Attributor.h:3179