18 #include "llvm/IR/IntrinsicsAMDGPU.h"
19 #include "llvm/IR/IntrinsicsR600.h"
23 #define DEBUG_TYPE "amdgpu-attributor"
31 #define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
34 #include "AMDGPUAttributes.def"
38 #define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
42 #include "AMDGPUAttributes.def"
46 #define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
49 #include "AMDGPUAttributes.def"
59 bool HasApertureRegs,
bool SupportsGetDoorBellID) {
62 case Intrinsic::amdgcn_workitem_id_x:
65 case Intrinsic::amdgcn_workgroup_id_x:
67 return WORKGROUP_ID_X;
68 case Intrinsic::amdgcn_workitem_id_y:
69 case Intrinsic::r600_read_tidig_y:
71 case Intrinsic::amdgcn_workitem_id_z:
72 case Intrinsic::r600_read_tidig_z:
74 case Intrinsic::amdgcn_workgroup_id_y:
75 case Intrinsic::r600_read_tgid_y:
76 return WORKGROUP_ID_Y;
77 case Intrinsic::amdgcn_workgroup_id_z:
78 case Intrinsic::r600_read_tgid_z:
79 return WORKGROUP_ID_Z;
80 case Intrinsic::amdgcn_lds_kernel_id:
82 case Intrinsic::amdgcn_dispatch_ptr:
84 case Intrinsic::amdgcn_dispatch_id:
86 case Intrinsic::amdgcn_implicitarg_ptr:
87 return IMPLICIT_ARG_PTR;
90 case Intrinsic::amdgcn_queue_ptr:
91 NeedsImplicit = (CodeObjectVersion == 5);
93 case Intrinsic::amdgcn_is_shared:
94 case Intrinsic::amdgcn_is_private:
100 return CodeObjectVersion == 5 ? IMPLICIT_ARG_PTR : QUEUE_PTR;
101 case Intrinsic::trap:
102 if (SupportsGetDoorBellID)
104 NeedsImplicit = (CodeObjectVersion == 5);
127 return F.hasFnAttribute(Attribute::SanitizeAddress) ||
128 F.hasFnAttribute(Attribute::SanitizeThread) ||
129 F.hasFnAttribute(Attribute::SanitizeMemory) ||
130 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
131 F.hasFnAttribute(Attribute::SanitizeMemTag);
143 enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
148 return ST.hasApertureRegs();
152 bool supportsGetDoorbellID(
Function &
F) {
154 return ST.supportsGetDoorbellID();
157 std::pair<unsigned, unsigned> getFlatWorkGroupSizes(
const Function &
F) {
159 return ST.getFlatWorkGroupSizes(
F);
162 std::pair<unsigned, unsigned>
163 getMaximumFlatWorkGroupRange(
const Function &
F) {
165 return {
ST.getMinFlatWorkGroupSize(),
ST.getMaxFlatWorkGroupSize()};
171 if (
CE->getOpcode() == Instruction::AddrSpaceCast) {
172 unsigned SrcAS =
CE->getOperand(0)->getType()->getPointerAddressSpace();
179 uint8_t getConstantAccess(
const Constant *
C) {
180 auto It = ConstantStatus.find(
C);
181 if (It != ConstantStatus.end())
188 if (
const auto *CE = dyn_cast<ConstantExpr>(
C))
189 if (visitConstExpr(CE))
190 Result |= ADDR_SPACE_CAST;
192 for (
const Use &U :
C->operands()) {
193 const auto *OpC = dyn_cast<Constant>(U);
197 Result |= getConstantAccess(OpC);
206 bool HasAperture = hasApertureRegs(Fn);
209 if (!IsNonEntryFunc && HasAperture)
212 uint8_t Access = getConstantAccess(
C);
215 if (IsNonEntryFunc && (Access & DS_GLOBAL))
218 return !HasAperture && (Access & ADDR_SPACE_CAST);
226 struct AAAMDAttributes
227 :
public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
235 static AAAMDAttributes &createForPosition(
const IRPosition &IRP,
239 const std::string
getName()
const override {
return "AAAMDAttributes"; }
242 const char *getIdAddr()
const override {
return &
ID; }
251 static const char ID;
255 struct AAUniformWorkGroupSize
256 :
public StateWrapper<BooleanState, AbstractAttribute> {
261 static AAUniformWorkGroupSize &createForPosition(
const IRPosition &IRP,
265 const std::string
getName()
const override {
266 return "AAUniformWorkGroupSize";
270 const char *getIdAddr()
const override {
return &
ID; }
279 static const char ID;
283 struct AAUniformWorkGroupSizeFunction :
public AAUniformWorkGroupSize {
285 : AAUniformWorkGroupSize(IRP,
A) {}
294 bool InitialValue =
false;
295 if (
F->hasFnAttribute(
"uniform-work-group-size"))
296 InitialValue =
F->getFnAttribute(
"uniform-work-group-size")
301 indicateOptimisticFixpoint();
303 indicatePessimisticFixpoint();
312 <<
"->" << getAssociatedFunction()->
getName() <<
"\n");
314 const auto &CallerInfo =
A.getAAFor<AAUniformWorkGroupSize>(
318 CallerInfo.getState());
323 bool AllCallSitesKnown =
true;
324 if (!
A.checkForAllCallSites(CheckCallSite, *
this,
true, AllCallSitesKnown))
325 return indicatePessimisticFixpoint();
332 LLVMContext &Ctx = getAssociatedFunction()->getContext();
335 getAssumed() ?
"true" :
"false"));
340 bool isValidState()
const override {
345 const std::string getAsStr()
const override {
350 void trackStatistics()
const override {}
353 AAUniformWorkGroupSize &
354 AAUniformWorkGroupSize::createForPosition(
const IRPosition &IRP,
357 return *
new (
A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
359 "AAUniformWorkGroupSize is only valid for function position");
362 struct AAAMDAttributesFunction :
public AAAMDAttributes {
364 : AAAMDAttributes(IRP,
A) {}
373 removeAssumedBits(IMPLICIT_ARG_PTR);
374 removeAssumedBits(HOSTCALL_PTR);
379 (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
382 if (
F->hasFnAttribute(Attr.second))
383 addKnownBits(Attr.first);
386 if (
F->isDeclaration())
392 indicatePessimisticFixpoint();
400 auto OrigAssumed = getAssumed();
406 return indicatePessimisticFixpoint();
410 bool NeedsImplicit =
false;
411 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
412 bool HasApertureRegs = InfoCache.hasApertureRegs(*
F);
413 bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*
F);
418 const AAAMDAttributes &AAAMD =
A.getAAFor<AAAMDAttributes>(
424 bool NonKernelOnly =
false;
427 HasApertureRegs, SupportsGetDoorbellID);
429 if ((IsNonEntryFunc || !NonKernelOnly))
430 removeAssumedBits(AttrMask);
436 removeAssumedBits(IMPLICIT_ARG_PTR);
438 if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {
442 removeAssumedBits(IMPLICIT_ARG_PTR);
444 removeAssumedBits(QUEUE_PTR);
447 if (funcRetrievesMultigridSyncArg(A)) {
448 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
449 "multigrid_sync_arg needs implicitarg_ptr");
450 removeAssumedBits(MULTIGRID_SYNC_ARG);
453 if (funcRetrievesHostcallPtr(A)) {
454 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
"hostcall needs implicitarg_ptr");
455 removeAssumedBits(HOSTCALL_PTR);
458 if (funcRetrievesHeapPtr(A)) {
459 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
"heap_ptr needs implicitarg_ptr");
460 removeAssumedBits(HEAP_PTR);
463 if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A)) {
464 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
"queue_ptr needs implicitarg_ptr");
465 removeAssumedBits(QUEUE_PTR);
468 if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) {
469 removeAssumedBits(LDS_KERNEL_ID);
472 if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A))
473 removeAssumedBits(DEFAULT_QUEUE);
475 if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A))
476 removeAssumedBits(COMPLETION_ACTION);
484 LLVMContext &Ctx = getAssociatedFunction()->getContext();
487 if (isKnown(Attr.first))
495 const std::string getAsStr()
const override {
500 OS <<
' ' << Attr.second;
506 void trackStatistics()
const override {}
513 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
515 bool NeedsQueuePtr =
false;
520 NeedsQueuePtr =
true;
526 bool HasApertureRegs = InfoCache.hasApertureRegs(*
F);
532 if (!HasApertureRegs) {
533 bool UsedAssumedInformation =
false;
534 A.checkForAllInstructions(CheckAddrSpaceCasts, *
this,
535 {Instruction::AddrSpaceCast},
536 UsedAssumedInformation);
543 if (!IsNonEntryFunc && HasApertureRegs)
548 for (
const Use &U :
I.operands()) {
549 if (
const auto *
C = dyn_cast<Constant>(U)) {
550 if (InfoCache.needsQueuePtr(
C, *
F))
560 bool funcRetrievesMultigridSyncArg(
Attributor &A) {
563 return funcRetrievesImplicitKernelArg(A, Range);
566 bool funcRetrievesHostcallPtr(
Attributor &A) {
569 return funcRetrievesImplicitKernelArg(A, Range);
572 bool funcRetrievesDefaultQueue(
Attributor &A) {
575 return funcRetrievesImplicitKernelArg(A, Range);
578 bool funcRetrievesCompletionAction(
Attributor &A) {
581 return funcRetrievesImplicitKernelArg(A, Range);
588 return funcRetrievesImplicitKernelArg(A, Range);
595 return funcRetrievesImplicitKernelArg(A, Range);
606 auto &
Call = cast<CallBase>(
I);
607 if (
Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
619 bool UsedAssumedInformation =
false;
620 return !
A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *
this,
621 UsedAssumedInformation);
624 bool funcRetrievesLDSKernelId(
Attributor &A) {
626 auto &
Call = cast<CallBase>(
I);
627 return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;
629 bool UsedAssumedInformation =
false;
630 return !
A.checkForAllCallLikeInstructions(DoesNotRetrieve, *
this,
631 UsedAssumedInformation);
635 AAAMDAttributes &AAAMDAttributes::createForPosition(
const IRPosition &IRP,
638 return *
new (
A.Allocator) AAAMDAttributesFunction(IRP, A);
643 struct AAAMDFlatWorkGroupSize
644 :
public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
655 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
656 unsigned MinGroupSize, MaxGroupSize;
657 std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*
F);
662 indicatePessimisticFixpoint();
671 <<
"->" << getAssociatedFunction()->
getName() <<
'\n');
673 const auto &CallerInfo =
A.getAAFor<AAAMDFlatWorkGroupSize>(
682 bool AllCallSitesKnown =
true;
683 if (!
A.checkForAllCallSites(CheckCallSite, *
this,
true, AllCallSitesKnown))
684 return indicatePessimisticFixpoint();
694 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
696 std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*
F);
699 if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
704 OS << getAssumed().getLower() <<
',' << getAssumed().getUpper() - 1;
712 const std::string getAsStr()
const override {
715 OS <<
"AMDFlatWorkGroupSize[";
716 OS << getAssumed().getLower() <<
',' << getAssumed().getUpper() - 1;
722 void trackStatistics()
const override {}
725 static AAAMDFlatWorkGroupSize &createForPosition(
const IRPosition &IRP,
729 const std::string
getName()
const override {
730 return "AAAMDFlatWorkGroupSize";
734 const char *getIdAddr()
const override {
return &
ID; }
743 static const char ID;
748 AAAMDFlatWorkGroupSize &
749 AAAMDFlatWorkGroupSize::createForPosition(
const IRPosition &IRP,
752 return *
new (
A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
754 "AAAMDFlatWorkGroupSize is only valid for function position");
763 bool doInitialization(
Module &)
override {
764 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
772 bool runOnModule(
Module &M)
override {
776 if (!
F.isIntrinsic())
782 AMDGPUInformationCache InfoCache(M, AG,
Allocator,
nullptr, *
TM);
790 AC.IsModulePass =
true;
791 AC.DefaultInitializeLiveInternals =
false;
796 if (!
F.isIntrinsic()) {
813 StringRef getPassName()
const override {
return "AMDGPU Attributor"; }