LLVM 20.0.0git
AMDGPUAttributor.cpp
Go to the documentation of this file.
1//===- AMDGPUAttributor.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPU.h"
14#include "GCNSubtarget.h"
19#include "llvm/IR/IntrinsicsAMDGPU.h"
20#include "llvm/IR/IntrinsicsR600.h"
23
24#define DEBUG_TYPE "amdgpu-attributor"
25
26namespace llvm {
28} // namespace llvm
29
30using namespace llvm;
31
33 "amdgpu-kernarg-preload-count",
34 cl::desc("How many kernel arguments to preload onto SGPRs"), cl::init(0));
35
36#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
37
39 #include "AMDGPUAttributes.def"
41};
42
43#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
44
47 #include "AMDGPUAttributes.def"
49};
50
51#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
52static constexpr std::pair<ImplicitArgumentMask,
54 #include "AMDGPUAttributes.def"
55};
56
57// We do not need to note the x workitem or workgroup id because they are always
58// initialized.
59//
60// TODO: We should not add the attributes if the known compile time workgroup
61// size is 1 for y/z.
63intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
64 bool HasApertureRegs, bool SupportsGetDoorBellID,
65 unsigned CodeObjectVersion) {
66 switch (ID) {
67 case Intrinsic::amdgcn_workitem_id_x:
68 NonKernelOnly = true;
69 return WORKITEM_ID_X;
70 case Intrinsic::amdgcn_workgroup_id_x:
71 NonKernelOnly = true;
72 return WORKGROUP_ID_X;
73 case Intrinsic::amdgcn_workitem_id_y:
74 case Intrinsic::r600_read_tidig_y:
75 return WORKITEM_ID_Y;
76 case Intrinsic::amdgcn_workitem_id_z:
77 case Intrinsic::r600_read_tidig_z:
78 return WORKITEM_ID_Z;
79 case Intrinsic::amdgcn_workgroup_id_y:
80 case Intrinsic::r600_read_tgid_y:
81 return WORKGROUP_ID_Y;
82 case Intrinsic::amdgcn_workgroup_id_z:
83 case Intrinsic::r600_read_tgid_z:
84 return WORKGROUP_ID_Z;
85 case Intrinsic::amdgcn_lds_kernel_id:
86 return LDS_KERNEL_ID;
87 case Intrinsic::amdgcn_dispatch_ptr:
88 return DISPATCH_PTR;
89 case Intrinsic::amdgcn_dispatch_id:
90 return DISPATCH_ID;
91 case Intrinsic::amdgcn_implicitarg_ptr:
92 return IMPLICIT_ARG_PTR;
93 // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access
94 // queue_ptr.
95 case Intrinsic::amdgcn_queue_ptr:
96 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
97 return QUEUE_PTR;
98 case Intrinsic::amdgcn_is_shared:
99 case Intrinsic::amdgcn_is_private:
100 if (HasApertureRegs)
101 return NOT_IMPLICIT_INPUT;
102 // Under V5, we need implicitarg_ptr + offsets to access private_base or
103 // shared_base. For pre-V5, however, need to access them through queue_ptr +
104 // offsets.
105 return CodeObjectVersion >= AMDGPU::AMDHSA_COV5 ? IMPLICIT_ARG_PTR :
106 QUEUE_PTR;
107 case Intrinsic::trap:
108 if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.
109 return CodeObjectVersion >= AMDGPU::AMDHSA_COV4 ? NOT_IMPLICIT_INPUT :
110 QUEUE_PTR;
111 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
112 return QUEUE_PTR;
113 default:
114 return NOT_IMPLICIT_INPUT;
115 }
116}
117
118static bool castRequiresQueuePtr(unsigned SrcAS) {
119 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
120}
121
122static bool isDSAddress(const Constant *C) {
123 const GlobalValue *GV = dyn_cast<GlobalValue>(C);
124 if (!GV)
125 return false;
126 unsigned AS = GV->getAddressSpace();
128}
129
130/// Returns true if the function requires the implicit argument be passed
131/// regardless of the function contents.
132static bool funcRequiresHostcallPtr(const Function &F) {
133 // Sanitizers require the hostcall buffer passed in the implicit arguments.
134 return F.hasFnAttribute(Attribute::SanitizeAddress) ||
135 F.hasFnAttribute(Attribute::SanitizeThread) ||
136 F.hasFnAttribute(Attribute::SanitizeMemory) ||
137 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
138 F.hasFnAttribute(Attribute::SanitizeMemTag);
139}
140
141namespace {
142class AMDGPUInformationCache : public InformationCache {
143public:
144 AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
145 BumpPtrAllocator &Allocator,
148 CodeObjectVersion(AMDGPU::getAMDHSACodeObjectVersion(M)) {}
149
151
152 enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
153
154 /// Check if the subtarget has aperture regs.
155 bool hasApertureRegs(Function &F) {
156 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
157 return ST.hasApertureRegs();
158 }
159
160 /// Check if the subtarget supports GetDoorbellID.
161 bool supportsGetDoorbellID(Function &F) {
162 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
163 return ST.supportsGetDoorbellID();
164 }
165
166 std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {
167 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
168 return ST.getFlatWorkGroupSizes(F);
169 }
170
171 std::pair<unsigned, unsigned>
172 getMaximumFlatWorkGroupRange(const Function &F) {
173 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
174 return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
175 }
176
177 /// Get code object version.
178 unsigned getCodeObjectVersion() const {
179 return CodeObjectVersion;
180 }
181
182 /// Get the effective value of "amdgpu-waves-per-eu" for the function,
183 /// accounting for the interaction with the passed value to use for
184 /// "amdgpu-flat-work-group-size".
185 std::pair<unsigned, unsigned>
186 getWavesPerEU(const Function &F,
187 std::pair<unsigned, unsigned> FlatWorkGroupSize) {
188 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
189 return ST.getWavesPerEU(F, FlatWorkGroupSize);
190 }
191
192 std::pair<unsigned, unsigned>
193 getEffectiveWavesPerEU(const Function &F,
194 std::pair<unsigned, unsigned> WavesPerEU,
195 std::pair<unsigned, unsigned> FlatWorkGroupSize) {
196 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
197 return ST.getEffectiveWavesPerEU(WavesPerEU, FlatWorkGroupSize);
198 }
199
200 unsigned getMaxWavesPerEU(const Function &F) {
201 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
202 return ST.getMaxWavesPerEU();
203 }
204
205private:
206 /// Check if the ConstantExpr \p CE requires the queue pointer.
207 static bool visitConstExpr(const ConstantExpr *CE) {
208 if (CE->getOpcode() == Instruction::AddrSpaceCast) {
209 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
210 return castRequiresQueuePtr(SrcAS);
211 }
212 return false;
213 }
214
215 /// Get the constant access bitmap for \p C.
216 uint8_t getConstantAccess(const Constant *C,
218 auto It = ConstantStatus.find(C);
219 if (It != ConstantStatus.end())
220 return It->second;
221
222 uint8_t Result = 0;
223 if (isDSAddress(C))
224 Result = DS_GLOBAL;
225
226 if (const auto *CE = dyn_cast<ConstantExpr>(C))
227 if (visitConstExpr(CE))
228 Result |= ADDR_SPACE_CAST;
229
230 for (const Use &U : C->operands()) {
231 const auto *OpC = dyn_cast<Constant>(U);
232 if (!OpC || !Visited.insert(OpC).second)
233 continue;
234
235 Result |= getConstantAccess(OpC, Visited);
236 }
237 return Result;
238 }
239
240public:
241 /// Returns true if \p Fn needs the queue pointer because of \p C.
242 bool needsQueuePtr(const Constant *C, Function &Fn) {
243 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
244 bool HasAperture = hasApertureRegs(Fn);
245
246 // No need to explore the constants.
247 if (!IsNonEntryFunc && HasAperture)
248 return false;
249
251 uint8_t Access = getConstantAccess(C, Visited);
252
253 // We need to trap on DS globals in non-entry functions.
254 if (IsNonEntryFunc && (Access & DS_GLOBAL))
255 return true;
256
257 return !HasAperture && (Access & ADDR_SPACE_CAST);
258 }
259
260private:
261 /// Used to determine if the Constant needs the queue pointer.
263 const unsigned CodeObjectVersion;
264};
265
266struct AAAMDAttributes
267 : public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
268 AbstractAttribute> {
271
272 AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
273
274 /// Create an abstract attribute view for the position \p IRP.
275 static AAAMDAttributes &createForPosition(const IRPosition &IRP,
276 Attributor &A);
277
278 /// See AbstractAttribute::getName().
279 const std::string getName() const override { return "AAAMDAttributes"; }
280
281 /// See AbstractAttribute::getIdAddr().
282 const char *getIdAddr() const override { return &ID; }
283
284 /// This function should return true if the type of the \p AA is
285 /// AAAMDAttributes.
286 static bool classof(const AbstractAttribute *AA) {
287 return (AA->getIdAddr() == &ID);
288 }
289
290 /// Unique ID (due to the unique address)
291 static const char ID;
292};
293const char AAAMDAttributes::ID = 0;
294
295struct AAUniformWorkGroupSize
296 : public StateWrapper<BooleanState, AbstractAttribute> {
298 AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
299
300 /// Create an abstract attribute view for the position \p IRP.
301 static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
302 Attributor &A);
303
304 /// See AbstractAttribute::getName().
305 const std::string getName() const override {
306 return "AAUniformWorkGroupSize";
307 }
308
309 /// See AbstractAttribute::getIdAddr().
310 const char *getIdAddr() const override { return &ID; }
311
312 /// This function should return true if the type of the \p AA is
313 /// AAAMDAttributes.
314 static bool classof(const AbstractAttribute *AA) {
315 return (AA->getIdAddr() == &ID);
316 }
317
318 /// Unique ID (due to the unique address)
319 static const char ID;
320};
321const char AAUniformWorkGroupSize::ID = 0;
322
323struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
324 AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
325 : AAUniformWorkGroupSize(IRP, A) {}
326
327 void initialize(Attributor &A) override {
328 Function *F = getAssociatedFunction();
329 CallingConv::ID CC = F->getCallingConv();
330
332 return;
333
334 bool InitialValue = false;
335 if (F->hasFnAttribute("uniform-work-group-size"))
336 InitialValue =
337 F->getFnAttribute("uniform-work-group-size").getValueAsString() ==
338 "true";
339
340 if (InitialValue)
341 indicateOptimisticFixpoint();
342 else
343 indicatePessimisticFixpoint();
344 }
345
346 ChangeStatus updateImpl(Attributor &A) override {
347 ChangeStatus Change = ChangeStatus::UNCHANGED;
348
349 auto CheckCallSite = [&](AbstractCallSite CS) {
350 Function *Caller = CS.getInstruction()->getFunction();
351 LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
352 << "->" << getAssociatedFunction()->getName() << "\n");
353
354 const auto *CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
355 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
356 if (!CallerInfo)
357 return false;
358
359 Change = Change | clampStateAndIndicateChange(this->getState(),
360 CallerInfo->getState());
361
362 return true;
363 };
364
365 bool AllCallSitesKnown = true;
366 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
367 return indicatePessimisticFixpoint();
368
369 return Change;
370 }
371
372 ChangeStatus manifest(Attributor &A) override {
374 LLVMContext &Ctx = getAssociatedFunction()->getContext();
375
376 AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
377 getAssumed() ? "true" : "false"));
378 return A.manifestAttrs(getIRPosition(), AttrList,
379 /* ForceReplace */ true);
380 }
381
382 bool isValidState() const override {
383 // This state is always valid, even when the state is false.
384 return true;
385 }
386
387 const std::string getAsStr(Attributor *) const override {
388 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
389 }
390
391 /// See AbstractAttribute::trackStatistics()
392 void trackStatistics() const override {}
393};
394
395AAUniformWorkGroupSize &
396AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
397 Attributor &A) {
399 return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
401 "AAUniformWorkGroupSize is only valid for function position");
402}
403
404struct AAAMDAttributesFunction : public AAAMDAttributes {
405 AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
406 : AAAMDAttributes(IRP, A) {}
407
408 void initialize(Attributor &A) override {
409 Function *F = getAssociatedFunction();
410
411 // If the function requires the implicit arg pointer due to sanitizers,
412 // assume it's needed even if explicitly marked as not requiring it.
413 const bool NeedsHostcall = funcRequiresHostcallPtr(*F);
414 if (NeedsHostcall) {
415 removeAssumedBits(IMPLICIT_ARG_PTR);
416 removeAssumedBits(HOSTCALL_PTR);
417 }
418
419 for (auto Attr : ImplicitAttrs) {
420 if (NeedsHostcall &&
421 (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
422 continue;
423
424 if (F->hasFnAttribute(Attr.second))
425 addKnownBits(Attr.first);
426 }
427
428 if (F->isDeclaration())
429 return;
430
431 // Ignore functions with graphics calling conventions, these are currently
432 // not allowed to have kernel arguments.
433 if (AMDGPU::isGraphics(F->getCallingConv())) {
434 indicatePessimisticFixpoint();
435 return;
436 }
437 }
438
439 ChangeStatus updateImpl(Attributor &A) override {
440 Function *F = getAssociatedFunction();
441 // The current assumed state used to determine a change.
442 auto OrigAssumed = getAssumed();
443
444 // Check for Intrinsics and propagate attributes.
445 const AACallEdges *AAEdges = A.getAAFor<AACallEdges>(
446 *this, this->getIRPosition(), DepClassTy::REQUIRED);
447 if (!AAEdges || AAEdges->hasNonAsmUnknownCallee())
448 return indicatePessimisticFixpoint();
449
450 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
451
452 bool NeedsImplicit = false;
453 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
454 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
455 bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F);
456 unsigned COV = InfoCache.getCodeObjectVersion();
457
458 for (Function *Callee : AAEdges->getOptimisticEdges()) {
459 Intrinsic::ID IID = Callee->getIntrinsicID();
460 if (IID == Intrinsic::not_intrinsic) {
461 const AAAMDAttributes *AAAMD = A.getAAFor<AAAMDAttributes>(
462 *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
463 if (!AAAMD)
464 return indicatePessimisticFixpoint();
465 *this &= *AAAMD;
466 continue;
467 }
468
469 bool NonKernelOnly = false;
470 ImplicitArgumentMask AttrMask =
471 intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
472 HasApertureRegs, SupportsGetDoorbellID, COV);
473 if (AttrMask != NOT_IMPLICIT_INPUT) {
474 if ((IsNonEntryFunc || !NonKernelOnly))
475 removeAssumedBits(AttrMask);
476 }
477 }
478
479 // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base.
480 if (NeedsImplicit)
481 removeAssumedBits(IMPLICIT_ARG_PTR);
482
483 if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {
484 // Under V5, we need implicitarg_ptr + offsets to access private_base or
485 // shared_base. We do not actually need queue_ptr.
486 if (COV >= 5)
487 removeAssumedBits(IMPLICIT_ARG_PTR);
488 else
489 removeAssumedBits(QUEUE_PTR);
490 }
491
492 if (funcRetrievesMultigridSyncArg(A, COV)) {
493 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
494 "multigrid_sync_arg needs implicitarg_ptr");
495 removeAssumedBits(MULTIGRID_SYNC_ARG);
496 }
497
498 if (funcRetrievesHostcallPtr(A, COV)) {
499 assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");
500 removeAssumedBits(HOSTCALL_PTR);
501 }
502
503 if (funcRetrievesHeapPtr(A, COV)) {
504 assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");
505 removeAssumedBits(HEAP_PTR);
506 }
507
508 if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A, COV)) {
509 assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");
510 removeAssumedBits(QUEUE_PTR);
511 }
512
513 if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) {
514 removeAssumedBits(LDS_KERNEL_ID);
515 }
516
517 if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A, COV))
518 removeAssumedBits(DEFAULT_QUEUE);
519
520 if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV))
521 removeAssumedBits(COMPLETION_ACTION);
522
523 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
524 : ChangeStatus::UNCHANGED;
525 }
526
527 ChangeStatus manifest(Attributor &A) override {
529 LLVMContext &Ctx = getAssociatedFunction()->getContext();
530
531 for (auto Attr : ImplicitAttrs) {
532 if (isKnown(Attr.first))
533 AttrList.push_back(Attribute::get(Ctx, Attr.second));
534 }
535
536 return A.manifestAttrs(getIRPosition(), AttrList,
537 /* ForceReplace */ true);
538 }
539
540 const std::string getAsStr(Attributor *) const override {
541 std::string Str;
543 OS << "AMDInfo[";
544 for (auto Attr : ImplicitAttrs)
545 if (isAssumed(Attr.first))
546 OS << ' ' << Attr.second;
547 OS << " ]";
548 return OS.str();
549 }
550
551 /// See AbstractAttribute::trackStatistics()
552 void trackStatistics() const override {}
553
554private:
555 bool checkForQueuePtr(Attributor &A) {
556 Function *F = getAssociatedFunction();
557 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
558
559 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
560
561 bool NeedsQueuePtr = false;
562
563 auto CheckAddrSpaceCasts = [&](Instruction &I) {
564 unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
565 if (castRequiresQueuePtr(SrcAS)) {
566 NeedsQueuePtr = true;
567 return false;
568 }
569 return true;
570 };
571
572 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
573
574 // `checkForAllInstructions` is much more cheaper than going through all
575 // instructions, try it first.
576
577 // The queue pointer is not needed if aperture regs is present.
578 if (!HasApertureRegs) {
579 bool UsedAssumedInformation = false;
580 A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
581 {Instruction::AddrSpaceCast},
582 UsedAssumedInformation);
583 }
584
585 // If we found that we need the queue pointer, nothing else to do.
586 if (NeedsQueuePtr)
587 return true;
588
589 if (!IsNonEntryFunc && HasApertureRegs)
590 return false;
591
592 for (BasicBlock &BB : *F) {
593 for (Instruction &I : BB) {
594 for (const Use &U : I.operands()) {
595 if (const auto *C = dyn_cast<Constant>(U)) {
596 if (InfoCache.needsQueuePtr(C, *F))
597 return true;
598 }
599 }
600 }
601 }
602
603 return false;
604 }
605
606 bool funcRetrievesMultigridSyncArg(Attributor &A, unsigned COV) {
608 AA::RangeTy Range(Pos, 8);
609 return funcRetrievesImplicitKernelArg(A, Range);
610 }
611
612 bool funcRetrievesHostcallPtr(Attributor &A, unsigned COV) {
614 AA::RangeTy Range(Pos, 8);
615 return funcRetrievesImplicitKernelArg(A, Range);
616 }
617
618 bool funcRetrievesDefaultQueue(Attributor &A, unsigned COV) {
620 AA::RangeTy Range(Pos, 8);
621 return funcRetrievesImplicitKernelArg(A, Range);
622 }
623
624 bool funcRetrievesCompletionAction(Attributor &A, unsigned COV) {
626 AA::RangeTy Range(Pos, 8);
627 return funcRetrievesImplicitKernelArg(A, Range);
628 }
629
630 bool funcRetrievesHeapPtr(Attributor &A, unsigned COV) {
631 if (COV < 5)
632 return false;
634 return funcRetrievesImplicitKernelArg(A, Range);
635 }
636
637 bool funcRetrievesQueuePtr(Attributor &A, unsigned COV) {
638 if (COV < 5)
639 return false;
641 return funcRetrievesImplicitKernelArg(A, Range);
642 }
643
644 bool funcRetrievesImplicitKernelArg(Attributor &A, AA::RangeTy Range) {
645 // Check if this is a call to the implicitarg_ptr builtin and it
646 // is used to retrieve the hostcall pointer. The implicit arg for
647 // hostcall is not used only if every use of the implicitarg_ptr
648 // is a load that clearly does not retrieve any byte of the
649 // hostcall pointer. We check this by tracing all the uses of the
650 // initial call to the implicitarg_ptr intrinsic.
651 auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {
652 auto &Call = cast<CallBase>(I);
653 if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
654 return true;
655
656 const auto *PointerInfoAA = A.getAAFor<AAPointerInfo>(
657 *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED);
658 if (!PointerInfoAA)
659 return false;
660
661 return PointerInfoAA->forallInterferingAccesses(
662 Range, [](const AAPointerInfo::Access &Acc, bool IsExact) {
663 return Acc.getRemoteInst()->isDroppable();
664 });
665 };
666
667 bool UsedAssumedInformation = false;
668 return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,
669 UsedAssumedInformation);
670 }
671
672 bool funcRetrievesLDSKernelId(Attributor &A) {
673 auto DoesNotRetrieve = [&](Instruction &I) {
674 auto &Call = cast<CallBase>(I);
675 return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;
676 };
677 bool UsedAssumedInformation = false;
678 return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,
679 UsedAssumedInformation);
680 }
681};
682
683AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
684 Attributor &A) {
686 return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
687 llvm_unreachable("AAAMDAttributes is only valid for function position");
688}
689
690/// Base class to derive different size ranges.
691struct AAAMDSizeRangeAttribute
692 : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
694
695 StringRef AttrName;
696
697 AAAMDSizeRangeAttribute(const IRPosition &IRP, Attributor &A,
698 StringRef AttrName)
699 : Base(IRP, 32), AttrName(AttrName) {}
700
701 /// See AbstractAttribute::trackStatistics()
702 void trackStatistics() const override {}
703
704 template <class AttributeImpl>
705 ChangeStatus updateImplImpl(Attributor &A) {
707
708 auto CheckCallSite = [&](AbstractCallSite CS) {
709 Function *Caller = CS.getInstruction()->getFunction();
710 LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
711 << "->" << getAssociatedFunction()->getName() << '\n');
712
713 const auto *CallerInfo = A.getAAFor<AttributeImpl>(
715 if (!CallerInfo)
716 return false;
717
718 Change |=
719 clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
720
721 return true;
722 };
723
724 bool AllCallSitesKnown = true;
725 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
726 return indicatePessimisticFixpoint();
727
728 return Change;
729 }
730
731 ChangeStatus emitAttributeIfNotDefault(Attributor &A, unsigned Min,
732 unsigned Max) {
733 // Don't add the attribute if it's the implied default.
734 if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
736
737 Function *F = getAssociatedFunction();
738 LLVMContext &Ctx = F->getContext();
739 SmallString<10> Buffer;
740 raw_svector_ostream OS(Buffer);
741 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
742 return A.manifestAttrs(getIRPosition(),
743 {Attribute::get(Ctx, AttrName, OS.str())},
744 /* ForceReplace */ true);
745 }
746
747 const std::string getAsStr(Attributor *) const override {
748 std::string Str;
750 OS << getName() << '[';
751 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
752 OS << ']';
753 return OS.str();
754 }
755};
756
757/// Propagate amdgpu-flat-work-group-size attribute.
758struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
759 AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
760 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-flat-work-group-size") {}
761
762 void initialize(Attributor &A) override {
763 Function *F = getAssociatedFunction();
764 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
765 unsigned MinGroupSize, MaxGroupSize;
766 std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);
767 intersectKnown(
768 ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));
769
770 if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
771 indicatePessimisticFixpoint();
772 }
773
774 ChangeStatus updateImpl(Attributor &A) override {
775 return updateImplImpl<AAAMDFlatWorkGroupSize>(A);
776 }
777
778 /// Create an abstract attribute view for the position \p IRP.
779 static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
780 Attributor &A);
781
782 ChangeStatus manifest(Attributor &A) override {
783 Function *F = getAssociatedFunction();
784 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
785 unsigned Min, Max;
786 std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F);
787 return emitAttributeIfNotDefault(A, Min, Max);
788 }
789
790 /// See AbstractAttribute::getName()
791 const std::string getName() const override {
792 return "AAAMDFlatWorkGroupSize";
793 }
794
795 /// See AbstractAttribute::getIdAddr()
796 const char *getIdAddr() const override { return &ID; }
797
798 /// This function should return true if the type of the \p AA is
799 /// AAAMDFlatWorkGroupSize
800 static bool classof(const AbstractAttribute *AA) {
801 return (AA->getIdAddr() == &ID);
802 }
803
804 /// Unique ID (due to the unique address)
805 static const char ID;
806};
807
808const char AAAMDFlatWorkGroupSize::ID = 0;
809
810AAAMDFlatWorkGroupSize &
811AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
812 Attributor &A) {
814 return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
816 "AAAMDFlatWorkGroupSize is only valid for function position");
817}
818
819/// Propagate amdgpu-waves-per-eu attribute.
820struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
821 AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)
822 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {}
823
824 bool isValidState() const override {
825 return !Assumed.isEmptySet() && IntegerRangeState::isValidState();
826 }
827
828 void initialize(Attributor &A) override {
829 Function *F = getAssociatedFunction();
830 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
831
832 if (const auto *AssumedGroupSize = A.getAAFor<AAAMDFlatWorkGroupSize>(
833 *this, IRPosition::function(*F), DepClassTy::REQUIRED)) {
834
835 unsigned Min, Max;
836 std::tie(Min, Max) = InfoCache.getWavesPerEU(
837 *F, {AssumedGroupSize->getAssumed().getLower().getZExtValue(),
838 AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});
839
840 ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
841 intersectKnown(Range);
842 }
843
844 if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
845 indicatePessimisticFixpoint();
846 }
847
848 ChangeStatus updateImpl(Attributor &A) override {
849 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
850 ChangeStatus Change = ChangeStatus::UNCHANGED;
851
852 auto CheckCallSite = [&](AbstractCallSite CS) {
853 Function *Caller = CS.getInstruction()->getFunction();
854 Function *Func = getAssociatedFunction();
855 LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
856 << "->" << Func->getName() << '\n');
857
858 const auto *CallerInfo = A.getAAFor<AAAMDWavesPerEU>(
859 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
860 const auto *AssumedGroupSize = A.getAAFor<AAAMDFlatWorkGroupSize>(
861 *this, IRPosition::function(*Func), DepClassTy::REQUIRED);
862 if (!CallerInfo || !AssumedGroupSize)
863 return false;
864
865 unsigned Min, Max;
866 std::tie(Min, Max) = InfoCache.getEffectiveWavesPerEU(
867 *Caller,
868 {CallerInfo->getAssumed().getLower().getZExtValue(),
869 CallerInfo->getAssumed().getUpper().getZExtValue() - 1},
870 {AssumedGroupSize->getAssumed().getLower().getZExtValue(),
871 AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});
872 ConstantRange CallerRange(APInt(32, Min), APInt(32, Max + 1));
873 IntegerRangeState CallerRangeState(CallerRange);
874 Change |= clampStateAndIndicateChange(this->getState(), CallerRangeState);
875
876 return true;
877 };
878
879 bool AllCallSitesKnown = true;
880 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
881 return indicatePessimisticFixpoint();
882
883 return Change;
884 }
885
886 /// Create an abstract attribute view for the position \p IRP.
887 static AAAMDWavesPerEU &createForPosition(const IRPosition &IRP,
888 Attributor &A);
889
890 ChangeStatus manifest(Attributor &A) override {
891 Function *F = getAssociatedFunction();
892 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
893 unsigned Max = InfoCache.getMaxWavesPerEU(*F);
894 return emitAttributeIfNotDefault(A, 1, Max);
895 }
896
897 /// See AbstractAttribute::getName()
898 const std::string getName() const override { return "AAAMDWavesPerEU"; }
899
900 /// See AbstractAttribute::getIdAddr()
901 const char *getIdAddr() const override { return &ID; }
902
903 /// This function should return true if the type of the \p AA is
904 /// AAAMDWavesPerEU
905 static bool classof(const AbstractAttribute *AA) {
906 return (AA->getIdAddr() == &ID);
907 }
908
909 /// Unique ID (due to the unique address)
910 static const char ID;
911};
912
913const char AAAMDWavesPerEU::ID = 0;
914
915AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
916 Attributor &A) {
918 return *new (A.Allocator) AAAMDWavesPerEU(IRP, A);
919 llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
920}
921
922static bool inlineAsmUsesAGPRs(const InlineAsm *IA) {
923 for (const auto &CI : IA->ParseConstraints()) {
924 for (StringRef Code : CI.Codes) {
925 Code.consume_front("{");
926 if (Code.starts_with("a"))
927 return true;
928 }
929 }
930
931 return false;
932}
933
934struct AAAMDGPUNoAGPR
935 : public IRAttribute<Attribute::NoUnwind,
936 StateWrapper<BooleanState, AbstractAttribute>,
937 AAAMDGPUNoAGPR> {
938 AAAMDGPUNoAGPR(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
939
940 static AAAMDGPUNoAGPR &createForPosition(const IRPosition &IRP,
941 Attributor &A) {
943 return *new (A.Allocator) AAAMDGPUNoAGPR(IRP, A);
944 llvm_unreachable("AAAMDGPUNoAGPR is only valid for function position");
945 }
946
947 void initialize(Attributor &A) override {
948 Function *F = getAssociatedFunction();
949 if (F->hasFnAttribute("amdgpu-no-agpr"))
950 indicateOptimisticFixpoint();
951 }
952
953 const std::string getAsStr(Attributor *A) const override {
954 return getAssumed() ? "amdgpu-no-agpr" : "amdgpu-maybe-agpr";
955 }
956
957 void trackStatistics() const override {}
958
959 ChangeStatus updateImpl(Attributor &A) override {
960 // TODO: Use AACallEdges, but then we need a way to inspect asm edges.
961
962 auto CheckForNoAGPRs = [&](Instruction &I) {
963 const auto &CB = cast<CallBase>(I);
964 const Value *CalleeOp = CB.getCalledOperand();
965 const Function *Callee = dyn_cast<Function>(CalleeOp);
966 if (!Callee) {
967 if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp))
968 return !inlineAsmUsesAGPRs(IA);
969 return false;
970 }
971
972 // Some intrinsics may use AGPRs, but if we have a choice, we are not
973 // required to use AGPRs.
974 if (Callee->isIntrinsic())
975 return true;
976
977 // TODO: Handle callsite attributes
978 const auto *CalleeInfo = A.getAAFor<AAAMDGPUNoAGPR>(
980 return CalleeInfo && CalleeInfo->getAssumed();
981 };
982
983 bool UsedAssumedInformation = false;
984 if (!A.checkForAllCallLikeInstructions(CheckForNoAGPRs, *this,
985 UsedAssumedInformation))
986 return indicatePessimisticFixpoint();
988 }
989
990 ChangeStatus manifest(Attributor &A) override {
991 if (!getAssumed())
993 LLVMContext &Ctx = getAssociatedFunction()->getContext();
994 return A.manifestAttrs(getIRPosition(),
995 {Attribute::get(Ctx, "amdgpu-no-agpr")});
996 }
997
998 const std::string getName() const override { return "AAAMDGPUNoAGPR"; }
999 const char *getIdAddr() const override { return &ID; }
1000
1001 /// This function should return true if the type of the \p AA is
1002 /// AAAMDGPUNoAGPRs
1003 static bool classof(const AbstractAttribute *AA) {
1004 return (AA->getIdAddr() == &ID);
1005 }
1006
1007 static const char ID;
1008};
1009
1010const char AAAMDGPUNoAGPR::ID = 0;
1011
1012static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
1013 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
1014 for (unsigned I = 0;
1015 I < F.arg_size() &&
1016 I < std::min(KernargPreloadCount.getValue(), ST.getMaxNumUserSGPRs());
1017 ++I) {
1018 Argument &Arg = *F.getArg(I);
1019 // Check for incompatible attributes.
1020 if (Arg.hasByRefAttr() || Arg.hasNestAttr())
1021 break;
1022
1023 Arg.addAttr(Attribute::InReg);
1024 }
1025}
1026
1027static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
1029 SetVector<Function *> Functions;
1030 for (Function &F : M) {
1031 if (!F.isIntrinsic())
1032 Functions.insert(&F);
1033 }
1034
1035 CallGraphUpdater CGUpdater;
1037 AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, TM);
1039 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
1040 &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
1041 &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID,
1045
1046 AttributorConfig AC(CGUpdater);
1047 AC.IsClosedWorldModule = Options.IsClosedWorld;
1048 AC.Allowed = &Allowed;
1049 AC.IsModulePass = true;
1050 AC.DefaultInitializeLiveInternals = false;
1051 AC.IndirectCalleeSpecializationCallback =
1052 [&TM](Attributor &A, const AbstractAttribute &AA, CallBase &CB,
1053 Function &Callee, unsigned NumAssumedCallees) {
1054 if (AMDGPU::isEntryFunctionCC(Callee.getCallingConv()))
1055 return false;
1056 // Singleton functions can be specialized.
1057 if (NumAssumedCallees == 1)
1058 return true;
1059 // Otherwise specialize uniform values.
1060 const auto &TTI = TM.getTargetTransformInfo(*CB.getCaller());
1061 return TTI.isAlwaysUniform(CB.getCalledOperand());
1062 };
1063 AC.IPOAmendableCB = [](const Function &F) {
1064 return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
1065 };
1066
1067 Attributor A(Functions, InfoCache, AC);
1068
1069 for (Function &F : M) {
1070 if (F.isIntrinsic())
1071 continue;
1072
1073 A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
1074 A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
1075 A.getOrCreateAAFor<AAAMDGPUNoAGPR>(IRPosition::function(F));
1076 CallingConv::ID CC = F.getCallingConv();
1078 A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
1079 A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(F));
1080 } else if (CC == CallingConv::AMDGPU_KERNEL) {
1081 addPreloadKernArgHint(F, TM);
1082 }
1083
1084 for (auto &I : instructions(F)) {
1085 if (auto *LI = dyn_cast<LoadInst>(&I)) {
1086 A.getOrCreateAAFor<AAAddressSpace>(
1087 IRPosition::value(*LI->getPointerOperand()));
1088 }
1089 if (auto *SI = dyn_cast<StoreInst>(&I)) {
1090 A.getOrCreateAAFor<AAAddressSpace>(
1091 IRPosition::value(*SI->getPointerOperand()));
1092 }
1093 }
1094 }
1095
1096 ChangeStatus Change = A.run();
1097 return Change == ChangeStatus::CHANGED;
1098}
1099
1100class AMDGPUAttributorLegacy : public ModulePass {
1101public:
1102 AMDGPUAttributorLegacy() : ModulePass(ID) {}
1103
1104 /// doInitialization - Virtual method overridden by subclasses to do
1105 /// any necessary initialization before any pass is run.
1106 bool doInitialization(Module &) override {
1107 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
1108 if (!TPC)
1109 report_fatal_error("TargetMachine is required");
1110
1111 TM = &TPC->getTM<TargetMachine>();
1112 return false;
1113 }
1114
1115 bool runOnModule(Module &M) override {
1116 AnalysisGetter AG(this);
1117 return runImpl(M, AG, *TM, /*Options=*/{});
1118 }
1119
1120 void getAnalysisUsage(AnalysisUsage &AU) const override {
1122 }
1123
1124 StringRef getPassName() const override { return "AMDGPU Attributor"; }
1126 static char ID;
1127};
1128} // namespace
1129
1132
1135 AnalysisGetter AG(FAM);
1136
1137 // TODO: Probably preserves CFG
1138 return runImpl(M, AG, TM, Options) ? PreservedAnalyses::none()
1140}
1141
1142char AMDGPUAttributorLegacy::ID = 0;
1143
1145 return new AMDGPUAttributorLegacy();
1146}
1147INITIALIZE_PASS_BEGIN(AMDGPUAttributorLegacy, DEBUG_TYPE, "AMDGPU Attributor",
1148 false, false)
1150INITIALIZE_PASS_END(AMDGPUAttributorLegacy, DEBUG_TYPE, "AMDGPU Attributor",
1151 false, false)
static cl::opt< unsigned > KernargPreloadCount("amdgpu-kernarg-preload-count", cl::desc("How many kernel arguments to preload onto SGPRs"), cl::init(0))
static bool isDSAddress(const Constant *C)
static constexpr std::pair< ImplicitArgumentMask, StringLiteral > ImplicitAttrs[]
static ImplicitArgumentMask intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, bool HasApertureRegs, bool SupportsGetDoorBellID, unsigned CodeObjectVersion)
ImplicitArgumentMask
@ NOT_IMPLICIT_INPUT
@ ALL_ARGUMENT_MASK
static bool funcRequiresHostcallPtr(const Function &F)
Returns true if the function requires the implicit argument be passed regardless of the function cont...
ImplicitArgumentPositions
@ LAST_ARG_POS
static bool castRequiresQueuePtr(unsigned SrcAS)
Expand Atomic instructions
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file declares an analysis pass that computes CycleInfo for LLVM IR, specialized from GenericCycl...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static bool runImpl(Function &F, const TargetLowering &TLI)
AMD GCN specific subclass of TargetSubtarget.
#define DEBUG_TYPE
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
static StringRef getName(Value *V)
Basic Register Allocator
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Class for arbitrary precision integers.
Definition: APInt.h:78
AbstractCallSite.
This class represents a conversion between pointers from one address space to another.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
bool hasByRefAttr() const
Return true if this argument has the byref attribute.
Definition: Function.cpp:168
void addAttr(Attribute::AttrKind Kind)
Definition: Function.cpp:350
bool hasNestAttr() const
Return true if this argument has the nest attribute.
Definition: Function.cpp:297
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:94
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:66
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
Wrapper to unify "old style" CallGraph and "new style" LazyCallGraph.
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1097
This class represents a range of values.
Definition: ConstantRange.h:47
This is an important base class in LLVM.
Definition: Constant.h:42
Legacy analysis pass which computes a CycleInfo.
Definition: CycleAnalysis.h:25
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:281
unsigned getAddressSpace() const
Definition: GlobalValue.h:205
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:563
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:251
virtual bool runOnModule(Module &M)=0
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:37
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
virtual bool doInitialization(Module &)
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
Definition: Pass.h:119
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
A vector that has set insertion semantics.
Definition: SetVector.h:57
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:346
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:367
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:502
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:838
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool isAlwaysUniform(const Value *V) const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
Definition: User.cpp:115
LLVM Value Representation.
Definition: Value.h:74
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:691
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
bool isEntryFunctionCC(CallingConv::ID CC)
unsigned getAMDHSACodeObjectVersion(const Module &M)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
bool isGraphics(CallingConv::ID cc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:200
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
NodeAddr< CodeNode * > Code
Definition: RDFGraph.h:388
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void initializeCycleInfoWrapperPassPass(PassRegistry &)
@ CGSCC
Definition: Attributor.h:6420
Pass * createAMDGPUAttributorLegacyPass()
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R)
Helper function to clamp a state S of type StateType with the information in R and indicate/return if...
Definition: Attributor.h:3456
ChangeStatus
{
Definition: Attributor.h:484
@ REQUIRED
The target cannot be valid if the source is not.
An abstract interface for address space information.
Definition: Attributor.h:6229
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:6268
An abstract state for querying live call edges.
Definition: Attributor.h:5479
virtual const SetVector< Function * > & getOptimisticEdges() const =0
Get the optimistic edges.
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:5522
virtual bool hasNonAsmUnknownCallee() const =0
Is there any call with a unknown callee, excluding any inline asm.
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:6383
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:4335
An access description.
Definition: Attributor.h:5933
Instruction * getRemoteInst() const
Return the actual instruction that causes the access.
Definition: Attributor.h:6033
An abstract interface for struct information.
Definition: Attributor.h:5747
virtual bool forallInterferingAccesses(AA::RangeTy Range, function_ref< bool(const Access &, bool)> CB) const =0
Call CB on all accesses that might interfere with Range and return true if all such accesses were kno...
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:6147
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:5296
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:5333
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:6217
Helper to represent an access offset and size, with logic to deal with uncertainty and check for over...
Definition: Attributor.h:237
Base struct for all "concrete attribute" deductions.
Definition: Attributor.h:3276
virtual const char * getIdAddr() const =0
This function should return the address of the ID of the AbstractAttribute.
Wrapper for FunctionAnalysisManager.
Definition: Attributor.h:1122
Configuration for the Attributor.
Definition: Attributor.h:1414
The fixpoint analysis framework that orchestrates the attribute deduction.
Definition: Attributor.h:1508
Class to accumulate and hold information about a callee.
Helper class that provides common functionality to manifest IR attributes.
Definition: Attributor.h:3181
ChangeStatus manifest(Attributor &A) override
See AbstractAttribute::manifest(...).
Definition: Attributor.h:3213
Helper to describe and deal with positions in the LLVM-IR.
Definition: Attributor.h:581
static const IRPosition callsite_returned(const CallBase &CB)
Create a position describing the returned value of CB.
Definition: Attributor.h:649
static const IRPosition value(const Value &V, const CallBaseContext *CBContext=nullptr)
Create a position describing the value of V.
Definition: Attributor.h:605
@ IRP_FUNCTION
An attribute for a function (scope).
Definition: Attributor.h:593
static const IRPosition function(const Function &F, const CallBaseContext *CBContext=nullptr)
Create a position describing the function scope of F.
Definition: Attributor.h:624
Kind getPositionKind() const
Return the associated position kind.
Definition: Attributor.h:877
Data structure to hold cached (LLVM-IR) information.
Definition: Attributor.h:1198
State for an integer range.
Definition: Attributor.h:2922
bool isValidState() const override
See AbstractState::isValidState()
Definition: Attributor.h:2958
Helper to tie a abstract state implementation to an abstract attribute.
Definition: Attributor.h:3165
StateType & getState() override
See AbstractAttribute::getState(...).
Definition: Attributor.h:3173