18#include "llvm/IR/IntrinsicsAMDGPU.h"
19#include "llvm/IR/IntrinsicsR600.h"
23#define DEBUG_TYPE "amdgpu-attributor"
32 "amdgpu-kernarg-preload-count",
36 "amdgpu-indirect-call-specialization-threshold",
38 "A threshold controls whether an indirect call will be specialized"),
41#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
44#include "AMDGPUAttributes.def"
48#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
52#include "AMDGPUAttributes.def"
56#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
57static constexpr std::pair<ImplicitArgumentMask, StringLiteral>
59#include "AMDGPUAttributes.def"
69 bool HasApertureRegs,
bool SupportsGetDoorBellID,
70 unsigned CodeObjectVersion) {
72 case Intrinsic::amdgcn_workitem_id_x:
75 case Intrinsic::amdgcn_workgroup_id_x:
77 return WORKGROUP_ID_X;
78 case Intrinsic::amdgcn_workitem_id_y:
79 case Intrinsic::r600_read_tidig_y:
81 case Intrinsic::amdgcn_workitem_id_z:
82 case Intrinsic::r600_read_tidig_z:
84 case Intrinsic::amdgcn_workgroup_id_y:
85 case Intrinsic::r600_read_tgid_y:
86 return WORKGROUP_ID_Y;
87 case Intrinsic::amdgcn_workgroup_id_z:
88 case Intrinsic::r600_read_tgid_z:
89 return WORKGROUP_ID_Z;
90 case Intrinsic::amdgcn_lds_kernel_id:
92 case Intrinsic::amdgcn_dispatch_ptr:
94 case Intrinsic::amdgcn_dispatch_id:
96 case Intrinsic::amdgcn_implicitarg_ptr:
97 return IMPLICIT_ARG_PTR;
100 case Intrinsic::amdgcn_queue_ptr:
103 case Intrinsic::amdgcn_is_shared:
104 case Intrinsic::amdgcn_is_private:
112 case Intrinsic::trap:
113 if (SupportsGetDoorBellID)
139 return F.hasFnAttribute(Attribute::SanitizeAddress) ||
140 F.hasFnAttribute(Attribute::SanitizeThread) ||
141 F.hasFnAttribute(Attribute::SanitizeMemory) ||
142 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
143 F.hasFnAttribute(Attribute::SanitizeMemTag);
157 enum ConstantStatus :
uint8_t {
160 ADDR_SPACE_CAST_PRIVATE_TO_FLAT = 1 << 1,
161 ADDR_SPACE_CAST_LOCAL_TO_FLAT = 1 << 2,
162 ADDR_SPACE_CAST_BOTH_TO_FLAT =
163 ADDR_SPACE_CAST_PRIVATE_TO_FLAT | ADDR_SPACE_CAST_LOCAL_TO_FLAT
169 return ST.hasApertureRegs();
173 bool supportsGetDoorbellID(
Function &
F) {
175 return ST.supportsGetDoorbellID();
178 std::optional<std::pair<unsigned, unsigned>>
179 getFlatWorkGroupSizeAttr(
const Function &
F)
const {
183 return std::make_pair(
R->first, *(
R->second));
186 std::pair<unsigned, unsigned>
187 getDefaultFlatWorkGroupSize(
const Function &
F)
const {
189 return ST.getDefaultFlatWorkGroupSize(
F.getCallingConv());
192 std::pair<unsigned, unsigned>
193 getMaximumFlatWorkGroupRange(
const Function &
F) {
195 return {
ST.getMinFlatWorkGroupSize(),
ST.getMaxFlatWorkGroupSize()};
200 return ST.getMaxNumWorkGroups(
F);
204 unsigned getCodeObjectVersion()
const {
return CodeObjectVersion; }
209 std::pair<unsigned, unsigned>
211 std::pair<unsigned, unsigned> FlatWorkGroupSize) {
213 return ST.getWavesPerEU(
F, FlatWorkGroupSize);
216 std::optional<std::pair<unsigned, unsigned>>
224 Val->second =
ST.getMaxWavesPerEU();
226 return std::make_pair(Val->first, *(Val->second));
229 std::pair<unsigned, unsigned>
230 getEffectiveWavesPerEU(
const Function &
F,
231 std::pair<unsigned, unsigned> WavesPerEU,
232 std::pair<unsigned, unsigned> FlatWorkGroupSize) {
234 return ST.getEffectiveWavesPerEU(WavesPerEU, FlatWorkGroupSize);
239 return ST.getMaxWavesPerEU();
248 if (
CE->getOpcode() == Instruction::AddrSpaceCast) {
249 unsigned SrcAS =
CE->getOperand(0)->getType()->getPointerAddressSpace();
251 Status |= ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
253 Status |= ADDR_SPACE_CAST_LOCAL_TO_FLAT;
262 auto It = ConstantStatus.find(
C);
263 if (It != ConstantStatus.end())
270 if (
const auto *CE = dyn_cast<ConstantExpr>(
C))
271 Result |= visitConstExpr(CE);
273 for (
const Use &U :
C->operands()) {
274 const auto *OpC = dyn_cast<Constant>(U);
275 if (!OpC || !Visited.
insert(OpC).second)
278 Result |= getConstantAccess(OpC, Visited);
287 bool HasAperture = hasApertureRegs(Fn);
290 if (!IsNonEntryFunc && HasAperture)
297 if (IsNonEntryFunc && (
Access & DS_GLOBAL))
300 return !HasAperture && (
Access & ADDR_SPACE_CAST_BOTH_TO_FLAT);
303 bool checkConstForAddrSpaceCastFromPrivate(
const Constant *
C) {
306 return Access & ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
312 const unsigned CodeObjectVersion;
315struct AAAMDAttributes
316 :
public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
324 static AAAMDAttributes &createForPosition(
const IRPosition &IRP,
328 const std::string
getName()
const override {
return "AAAMDAttributes"; }
331 const char *getIdAddr()
const override {
return &
ID; }
340 static const char ID;
342const char AAAMDAttributes::ID = 0;
344struct AAUniformWorkGroupSize
345 :
public StateWrapper<BooleanState, AbstractAttribute> {
350 static AAUniformWorkGroupSize &createForPosition(
const IRPosition &IRP,
354 const std::string
getName()
const override {
355 return "AAUniformWorkGroupSize";
359 const char *getIdAddr()
const override {
return &
ID; }
368 static const char ID;
370const char AAUniformWorkGroupSize::ID = 0;
372struct AAUniformWorkGroupSizeFunction :
public AAUniformWorkGroupSize {
374 : AAUniformWorkGroupSize(IRP,
A) {}
383 bool InitialValue =
false;
384 if (
F->hasFnAttribute(
"uniform-work-group-size"))
386 F->getFnAttribute(
"uniform-work-group-size").getValueAsString() ==
390 indicateOptimisticFixpoint();
392 indicatePessimisticFixpoint();
401 <<
"->" << getAssociatedFunction()->
getName() <<
"\n");
403 const auto *CallerInfo =
A.getAAFor<AAUniformWorkGroupSize>(
405 if (!CallerInfo || !CallerInfo->isValidState())
409 CallerInfo->getState());
414 bool AllCallSitesKnown =
true;
415 if (!
A.checkForAllCallSites(CheckCallSite, *
this,
true, AllCallSitesKnown))
416 return indicatePessimisticFixpoint();
423 LLVMContext &Ctx = getAssociatedFunction()->getContext();
426 getAssumed() ?
"true" :
"false"));
427 return A.manifestAttrs(getIRPosition(), AttrList,
431 bool isValidState()
const override {
436 const std::string getAsStr(
Attributor *)
const override {
437 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) +
"]";
441 void trackStatistics()
const override {}
444AAUniformWorkGroupSize &
445AAUniformWorkGroupSize::createForPosition(
const IRPosition &IRP,
448 return *
new (
A.Allocator) AAUniformWorkGroupSizeFunction(IRP,
A);
450 "AAUniformWorkGroupSize is only valid for function position");
453struct AAAMDAttributesFunction :
public AAAMDAttributes {
455 : AAAMDAttributes(IRP,
A) {}
464 removeAssumedBits(IMPLICIT_ARG_PTR);
465 removeAssumedBits(HOSTCALL_PTR);
470 (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
473 if (
F->hasFnAttribute(Attr.second))
474 addKnownBits(Attr.first);
477 if (
F->isDeclaration())
483 indicatePessimisticFixpoint();
491 auto OrigAssumed = getAssumed();
495 *
this, this->getIRPosition(), DepClassTy::REQUIRED);
498 return indicatePessimisticFixpoint();
502 bool NeedsImplicit =
false;
503 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
504 bool HasApertureRegs = InfoCache.hasApertureRegs(*
F);
505 bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*
F);
506 unsigned COV = InfoCache.getCodeObjectVersion();
511 const AAAMDAttributes *AAAMD =
A.getAAFor<AAAMDAttributes>(
513 if (!AAAMD || !AAAMD->isValidState())
514 return indicatePessimisticFixpoint();
519 bool NonKernelOnly =
false;
522 HasApertureRegs, SupportsGetDoorbellID, COV);
524 if ((IsNonEntryFunc || !NonKernelOnly))
525 removeAssumedBits(AttrMask);
531 removeAssumedBits(IMPLICIT_ARG_PTR);
533 if (isAssumed(QUEUE_PTR) && checkForQueuePtr(
A)) {
537 removeAssumedBits(IMPLICIT_ARG_PTR);
539 removeAssumedBits(QUEUE_PTR);
542 if (funcRetrievesMultigridSyncArg(
A, COV)) {
543 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
544 "multigrid_sync_arg needs implicitarg_ptr");
545 removeAssumedBits(MULTIGRID_SYNC_ARG);
548 if (funcRetrievesHostcallPtr(
A, COV)) {
549 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
"hostcall needs implicitarg_ptr");
550 removeAssumedBits(HOSTCALL_PTR);
553 if (funcRetrievesHeapPtr(
A, COV)) {
554 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
"heap_ptr needs implicitarg_ptr");
555 removeAssumedBits(HEAP_PTR);
558 if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(
A, COV)) {
559 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
"queue_ptr needs implicitarg_ptr");
560 removeAssumedBits(QUEUE_PTR);
563 if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(
A)) {
564 removeAssumedBits(LDS_KERNEL_ID);
567 if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(
A, COV))
568 removeAssumedBits(DEFAULT_QUEUE);
570 if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(
A, COV))
571 removeAssumedBits(COMPLETION_ACTION);
573 if (isAssumed(FLAT_SCRATCH_INIT) && needFlatScratchInit(
A))
574 removeAssumedBits(FLAT_SCRATCH_INIT);
576 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
577 : ChangeStatus::UNCHANGED;
582 LLVMContext &Ctx = getAssociatedFunction()->getContext();
585 if (isKnown(Attr.first))
589 return A.manifestAttrs(getIRPosition(), AttrList,
593 const std::string getAsStr(
Attributor *)
const override {
598 if (isAssumed(Attr.first))
599 OS <<
' ' << Attr.second;
605 void trackStatistics()
const override {}
612 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
614 bool NeedsQueuePtr =
false;
619 NeedsQueuePtr =
true;
625 bool HasApertureRegs = InfoCache.hasApertureRegs(*
F);
631 if (!HasApertureRegs) {
632 bool UsedAssumedInformation =
false;
633 A.checkForAllInstructions(CheckAddrSpaceCasts, *
this,
634 {Instruction::AddrSpaceCast},
635 UsedAssumedInformation);
642 if (!IsNonEntryFunc && HasApertureRegs)
647 for (
const Use &U :
I.operands()) {
648 if (
const auto *
C = dyn_cast<Constant>(U)) {
649 if (InfoCache.needsQueuePtr(
C, *
F))
659 bool funcRetrievesMultigridSyncArg(
Attributor &
A,
unsigned COV) {
662 return funcRetrievesImplicitKernelArg(
A,
Range);
665 bool funcRetrievesHostcallPtr(
Attributor &
A,
unsigned COV) {
668 return funcRetrievesImplicitKernelArg(
A,
Range);
671 bool funcRetrievesDefaultQueue(
Attributor &
A,
unsigned COV) {
674 return funcRetrievesImplicitKernelArg(
A,
Range);
677 bool funcRetrievesCompletionAction(
Attributor &
A,
unsigned COV) {
680 return funcRetrievesImplicitKernelArg(
A,
Range);
683 bool funcRetrievesHeapPtr(
Attributor &
A,
unsigned COV) {
687 return funcRetrievesImplicitKernelArg(
A,
Range);
690 bool funcRetrievesQueuePtr(
Attributor &
A,
unsigned COV) {
694 return funcRetrievesImplicitKernelArg(
A,
Range);
705 auto &
Call = cast<CallBase>(
I);
706 if (
Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
711 if (!PointerInfoAA || !PointerInfoAA->getState().isValidState())
720 bool UsedAssumedInformation =
false;
721 return !
A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *
this,
722 UsedAssumedInformation);
727 auto &
Call = cast<CallBase>(
I);
728 return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;
730 bool UsedAssumedInformation =
false;
731 return !
A.checkForAllCallLikeInstructions(DoesNotRetrieve, *
this,
732 UsedAssumedInformation);
738 assert(isAssumed(FLAT_SCRATCH_INIT));
743 return cast<AddrSpaceCastInst>(
I).getSrcAddressSpace() !=
747 bool UsedAssumedInformation =
false;
748 if (!
A.checkForAllInstructions(AddrSpaceCastNotFromPrivate, *
this,
749 {Instruction::AddrSpaceCast},
750 UsedAssumedInformation))
754 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
758 for (
const Use &U :
I.operands()) {
759 if (
const auto *
C = dyn_cast<Constant>(U)) {
760 if (InfoCache.checkConstForAddrSpaceCastFromPrivate(
C))
771 const auto &CB = cast<CallBase>(
I);
782 return Callee->getIntrinsicID() !=
783 Intrinsic::amdgcn_addrspacecast_nonnull;
786 UsedAssumedInformation =
false;
790 return !
A.checkForAllCallLikeInstructions(CheckForNoFlatScratchInit, *
this,
791 UsedAssumedInformation);
795AAAMDAttributes &AAAMDAttributes::createForPosition(
const IRPosition &IRP,
798 return *
new (
A.Allocator) AAAMDAttributesFunction(IRP,
A);
803struct AAAMDSizeRangeAttribute
804 :
public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
811 :
Base(IRP, 32), AttrName(AttrName) {}
814 void trackStatistics()
const override {}
822 <<
"->" << getAssociatedFunction()->
getName() <<
'\n');
826 if (!CallerInfo || !CallerInfo->isValidState())
835 bool AllCallSitesKnown =
true;
836 if (!
A.checkForAllCallSites(CheckCallSite, *
this,
839 return indicatePessimisticFixpoint();
848 std::pair<unsigned, unsigned>
Default) {
850 unsigned Lower = getAssumed().getLower().getZExtValue();
851 unsigned Upper = getAssumed().getUpper().getZExtValue();
868 return A.manifestAttrs(getIRPosition(),
873 const std::string getAsStr(
Attributor *)
const override {
877 OS << getAssumed().getLower() <<
',' << getAssumed().getUpper() - 1;
884struct AAAMDFlatWorkGroupSize :
public AAAMDSizeRangeAttribute {
886 : AAAMDSizeRangeAttribute(IRP,
A,
"amdgpu-flat-work-group-size") {}
890 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
892 bool HasAttr =
false;
893 auto Range = InfoCache.getDefaultFlatWorkGroupSize(*
F);
894 auto MaxRange = InfoCache.getMaximumFlatWorkGroupRange(*
F);
896 if (
auto Attr = InfoCache.getFlatWorkGroupSizeAttr(*
F)) {
900 if (*Attr != MaxRange) {
908 if (
Range == MaxRange)
917 indicateOptimisticFixpoint();
921 return updateImplImpl<AAAMDFlatWorkGroupSize>(
A);
925 static AAAMDFlatWorkGroupSize &createForPosition(
const IRPosition &IRP,
930 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
931 return emitAttributeIfNotDefaultAfterClamp(
932 A, InfoCache.getMaximumFlatWorkGroupRange(*
F));
936 const std::string
getName()
const override {
937 return "AAAMDFlatWorkGroupSize";
941 const char *getIdAddr()
const override {
return &
ID; }
950 static const char ID;
953const char AAAMDFlatWorkGroupSize::ID = 0;
955AAAMDFlatWorkGroupSize &
956AAAMDFlatWorkGroupSize::createForPosition(
const IRPosition &IRP,
959 return *
new (
A.Allocator) AAAMDFlatWorkGroupSize(IRP,
A);
961 "AAAMDFlatWorkGroupSize is only valid for function position");
968 return X.isValidState() &&
Y.isValidState() &&
Z.isValidState();
972 return X.isAtFixpoint() &&
Y.isAtFixpoint() &&
Z.isAtFixpoint();
976 return X.indicateOptimisticFixpoint() |
Y.indicateOptimisticFixpoint() |
977 Z.indicateOptimisticFixpoint();
981 return X.indicatePessimisticFixpoint() |
Y.indicatePessimisticFixpoint() |
982 Z.indicatePessimisticFixpoint();
985 TupleDecIntegerRangeState
operator^=(
const TupleDecIntegerRangeState &
Other) {
996 TupleDecIntegerRangeState &getAssumed() {
return *
this; }
997 const TupleDecIntegerRangeState &getAssumed()
const {
return *
this; }
1000using AAAMDMaxNumWorkgroupsState =
1004struct AAAMDMaxNumWorkgroups
1005 :
public StateWrapper<TupleDecIntegerRangeState, AbstractAttribute> {
1012 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
1016 X.takeKnownMinimum(MaxNumWorkgroups[0]);
1017 Y.takeKnownMinimum(MaxNumWorkgroups[1]);
1018 Z.takeKnownMinimum(MaxNumWorkgroups[2]);
1021 indicatePessimisticFixpoint();
1030 <<
"->" << getAssociatedFunction()->
getName() <<
'\n');
1032 const auto *CallerInfo =
A.getAAFor<AAAMDMaxNumWorkgroups>(
1034 if (!CallerInfo || !CallerInfo->isValidState())
1042 bool AllCallSitesKnown =
true;
1043 if (!
A.checkForAllCallSites(CheckCallSite, *
this,
1046 return indicatePessimisticFixpoint();
1052 static AAAMDMaxNumWorkgroups &createForPosition(
const IRPosition &IRP,
1060 OS <<
X.getAssumed() <<
',' <<
Y.getAssumed() <<
',' <<
Z.getAssumed();
1064 return A.manifestAttrs(
1070 const std::string
getName()
const override {
return "AAAMDMaxNumWorkgroups"; }
1072 const std::string getAsStr(
Attributor *)
const override {
1073 std::string Buffer =
"AAAMDMaxNumWorkgroupsState[";
1075 OS <<
X.getAssumed() <<
',' <<
Y.getAssumed() <<
',' <<
Z.getAssumed()
1080 const char *getIdAddr()
const override {
return &
ID; }
1088 void trackStatistics()
const override {}
1091 static const char ID;
1094const char AAAMDMaxNumWorkgroups::ID = 0;
1096AAAMDMaxNumWorkgroups &
1099 return *
new (
A.Allocator) AAAMDMaxNumWorkgroups(IRP,
A);
1100 llvm_unreachable(
"AAAMDMaxNumWorkgroups is only valid for function position");
1104struct AAAMDWavesPerEU :
public AAAMDSizeRangeAttribute {
1106 : AAAMDSizeRangeAttribute(IRP,
A,
"amdgpu-waves-per-eu") {}
1110 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
1112 auto TakeRange = [&](std::pair<unsigned, unsigned>
R) {
1113 auto [Min,
Max] =
R;
1117 indicateOptimisticFixpoint();
1120 std::pair<unsigned, unsigned> MaxWavesPerEURange{
1121 1U, InfoCache.getMaxWavesPerEU(*
F)};
1124 if (
auto Attr = InfoCache.getWavesPerEUAttr(*
F)) {
1125 if (*Attr != MaxWavesPerEURange) {
1141 std::pair<unsigned, unsigned> FlatWorkGroupSize;
1142 if (
auto Attr = InfoCache.getFlatWorkGroupSizeAttr(*
F))
1143 FlatWorkGroupSize = *Attr;
1145 FlatWorkGroupSize = InfoCache.getDefaultFlatWorkGroupSize(*
F);
1146 TakeRange(InfoCache.getEffectiveWavesPerEU(*
F, MaxWavesPerEURange,
1147 FlatWorkGroupSize));
1152 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
1159 <<
"->" <<
Func->getName() <<
'\n');
1161 const auto *CallerInfo =
A.getAAFor<AAAMDWavesPerEU>(
1163 const auto *AssumedGroupSize =
A.getAAFor<AAAMDFlatWorkGroupSize>(
1165 if (!CallerInfo || !AssumedGroupSize || !CallerInfo->isValidState() ||
1166 !AssumedGroupSize->isValidState())
1170 std::tie(Min, Max) = InfoCache.getEffectiveWavesPerEU(
1172 {CallerInfo->getAssumed().getLower().getZExtValue(),
1173 CallerInfo->getAssumed().getUpper().getZExtValue() - 1},
1174 {AssumedGroupSize->getAssumed().getLower().getZExtValue(),
1175 AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});
1183 bool AllCallSitesKnown =
true;
1184 if (!
A.checkForAllCallSites(CheckCallSite, *
this,
true, AllCallSitesKnown))
1185 return indicatePessimisticFixpoint();
1191 static AAAMDWavesPerEU &createForPosition(
const IRPosition &IRP,
1196 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
1197 return emitAttributeIfNotDefaultAfterClamp(
1198 A, {1U, InfoCache.getMaxWavesPerEU(*
F)});
1202 const std::string
getName()
const override {
return "AAAMDWavesPerEU"; }
1205 const char *getIdAddr()
const override {
return &
ID; }
1214 static const char ID;
1217const char AAAMDWavesPerEU::ID = 0;
1219AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(
const IRPosition &IRP,
1222 return *
new (
A.Allocator) AAAMDWavesPerEU(IRP,
A);
1226static bool inlineAsmUsesAGPRs(
const InlineAsm *IA) {
1227 for (
const auto &CI :
IA->ParseConstraints()) {
1229 Code.consume_front(
"{");
1230 if (
Code.starts_with(
"a"))
1238struct AAAMDGPUNoAGPR
1240 StateWrapper<BooleanState, AbstractAttribute>,
1244 static AAAMDGPUNoAGPR &createForPosition(
const IRPosition &IRP,
1247 return *
new (
A.Allocator) AAAMDGPUNoAGPR(IRP,
A);
1253 if (
F->hasFnAttribute(
"amdgpu-no-agpr"))
1254 indicateOptimisticFixpoint();
1257 const std::string getAsStr(
Attributor *
A)
const override {
1258 return getAssumed() ?
"amdgpu-no-agpr" :
"amdgpu-maybe-agpr";
1261 void trackStatistics()
const override {}
1267 const auto &CB = cast<CallBase>(
I);
1268 const Value *CalleeOp = CB.getCalledOperand();
1271 if (
const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp))
1272 return !inlineAsmUsesAGPRs(IA);
1278 if (
Callee->isIntrinsic())
1282 const auto *
CalleeInfo =
A.getAAFor<AAAMDGPUNoAGPR>(
1288 bool UsedAssumedInformation =
false;
1289 if (!
A.checkForAllCallLikeInstructions(CheckForNoAGPRs, *
this,
1290 UsedAssumedInformation))
1291 return indicatePessimisticFixpoint();
1298 LLVMContext &Ctx = getAssociatedFunction()->getContext();
1299 return A.manifestAttrs(getIRPosition(),
1303 const std::string
getName()
const override {
return "AAAMDGPUNoAGPR"; }
1304 const char *getIdAddr()
const override {
return &
ID; }
1312 static const char ID;
1315const char AAAMDGPUNoAGPR::ID = 0;
1319 for (
unsigned I = 0;
1328 Arg.
addAttr(Attribute::InReg);
1336 if (!
F.isIntrinsic())
1342 AMDGPUInformationCache InfoCache(M, AG, Allocator,
nullptr, TM);
1344 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
1346 &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
1352 AC.IsClosedWorldModule =
Options.IsClosedWorld;
1354 AC.IsModulePass =
true;
1355 AC.DefaultInitializeLiveInternals =
false;
1356 AC.IndirectCalleeSpecializationCallback =
1362 AC.IPOAmendableCB = [](
const Function &
F) {
1368 LLVM_DEBUG(
dbgs() <<
"[AMDGPUAttributor] Module " <<
M.getName() <<
" is "
1369 << (AC.IsClosedWorldModule ?
"" :
"not ")
1370 <<
"assumed to be a closed world.\n");
1372 for (
auto *
F : Functions) {
1382 addPreloadKernArgHint(*
F, TM);
1386 if (
auto *LI = dyn_cast<LoadInst>(&
I)) {
1389 }
else if (
auto *SI = dyn_cast<StoreInst>(&
I)) {
1392 }
else if (
auto *RMW = dyn_cast<AtomicRMWInst>(&
I)) {
1395 }
else if (
auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&
I)) {
1403 return Change == ChangeStatus::CHANGED;
1406class AMDGPUAttributorLegacy :
public ModulePass {
1413 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
1423 return runImpl(M, AG, *TM, {});
1448char AMDGPUAttributorLegacy::ID = 0;
1451 return new AMDGPUAttributorLegacy();
static cl::opt< unsigned > KernargPreloadCount("amdgpu-kernarg-preload-count", cl::desc("How many kernel arguments to preload onto SGPRs"), cl::init(0))
static bool isDSAddress(const Constant *C)
static constexpr std::pair< ImplicitArgumentMask, StringLiteral > ImplicitAttrs[]
static cl::opt< unsigned > IndirectCallSpecializationThreshold("amdgpu-indirect-call-specialization-threshold", cl::desc("A threshold controls whether an indirect call will be specialized"), cl::init(3))
static ImplicitArgumentMask intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, bool HasApertureRegs, bool SupportsGetDoorBellID, unsigned CodeObjectVersion)
static bool funcRequiresHostcallPtr(const Function &F)
Returns true if the function requires the implicit argument be passed regardless of the function cont...
ImplicitArgumentPositions
static bool castRequiresQueuePtr(unsigned SrcAS)
Expand Atomic instructions
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file declares an analysis pass that computes CycleInfo for LLVM IR, specialized from GenericCycl...
std::optional< std::vector< StOtherPiece > > Other
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool runImpl(Function &F, const TargetLowering &TLI)
AMD GCN specific subclass of TargetSubtarget.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Target-Independent Code Generator Pass Configuration Options pass.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Class for arbitrary precision integers.
This class represents a conversion between pointers from one address space to another.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
This class represents an incoming formal argument to a Function.
bool hasByRefAttr() const
Return true if this argument has the byref attribute.
void addAttr(Attribute::AttrKind Kind)
bool hasNestAttr() const
Return true if this argument has the nest attribute.
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
LLVM Basic Block Representation.
Allocate memory in an ever growing pool, as if by bump-pointer.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Wrapper to unify "old style" CallGraph and "new style" LazyCallGraph.
A constant value that is initialized with an expression using other constant values.
This class represents a range of values.
This is an important base class in LLVM.
Legacy analysis pass which computes a CycleInfo.
Implements a dense probed hash-table based set.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
unsigned getAddressSpace() const
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
This is an important class for using LLVM in a threaded context.
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
virtual bool runOnModule(Module &M)=0
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
A Module instance is used to store all the information related to an LLVM module.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Pass interface - Implemented by all 'passes'.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual bool doInitialization(Module &)
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
A vector that has set insertion semantics.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Primary interface to the complete machine description for the target machine.
A Use represents the edge between a Value definition and its users.
bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
LLVM Value Representation.
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
bool isEntryFunctionCC(CallingConv::ID CC)
unsigned getAMDHSACodeObjectVersion(const Module &M)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
bool isGraphics(CallingConv::ID cc)
E & operator^=(E &LHS, E RHS)
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
NodeAddr< FuncNode * > Func
NodeAddr< CodeNode * > Code
This is an optimization pass for GlobalISel generic memory operations.
void initializeCycleInfoWrapperPassPass(PassRegistry &)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
Pass * createAMDGPUAttributorLegacyPass()
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R)
Helper function to clamp a state S of type StateType with the information in R and indicate/return if...
@ REQUIRED
The target cannot be valid if the source is not.
@ Default
The result values are uniform if and only if all operands are uniform.
An abstract interface for address space information.
static const char ID
Unique ID (due to the unique address)
An abstract state for querying live call edges.
virtual const SetVector< Function * > & getOptimisticEdges() const =0
Get the optimistic edges.
static const char ID
Unique ID (due to the unique address)
virtual bool hasNonAsmUnknownCallee() const =0
Is there any call with a unknown callee, excluding any inline asm.
static const char ID
Unique ID (due to the unique address)
static const char ID
Unique ID (due to the unique address)
Instruction * getRemoteInst() const
Return the actual instruction that causes the access.
An abstract interface for struct information.
virtual bool forallInterferingAccesses(AA::RangeTy Range, function_ref< bool(const Access &, bool)> CB) const =0
Call CB on all accesses that might interfere with Range and return true if all such accesses were kno...
static const char ID
Unique ID (due to the unique address)
static const char ID
Unique ID (due to the unique address)
static const char ID
Unique ID (due to the unique address)
static const char ID
Unique ID (due to the unique address)
Helper to represent an access offset and size, with logic to deal with uncertainty and check for over...
Base struct for all "concrete attribute" deductions.
virtual const char * getIdAddr() const =0
This function should return the address of the ID of the AbstractAttribute.
An interface to query the internal state of an abstract attribute.
virtual ChangeStatus indicatePessimisticFixpoint()=0
Indicate that the abstract state should converge to the pessimistic state.
virtual bool isAtFixpoint() const =0
Return if this abstract state is fixed, thus does not need to be updated if information changes as it...
virtual bool isValidState() const =0
Return if this abstract state is in a valid state.
virtual ChangeStatus indicateOptimisticFixpoint()=0
Indicate that the abstract state should converge to the optimistic state.
Wrapper for FunctionAnalysisManager.
Configuration for the Attributor.
The fixpoint analysis framework that orchestrates the attribute deduction.
Class to accumulate and hold information about a callee.
Specialization of the integer state for a decreasing value, hence 0 is the best state and ~0u the wor...
Helper class that provides common functionality to manifest IR attributes.
ChangeStatus manifest(Attributor &A) override
See AbstractAttribute::manifest(...).
Helper to describe and deal with positions in the LLVM-IR.
static const IRPosition callsite_returned(const CallBase &CB)
Create a position describing the returned value of CB.
static const IRPosition value(const Value &V, const CallBaseContext *CBContext=nullptr)
Create a position describing the value of V.
@ IRP_FUNCTION
An attribute for a function (scope).
static const IRPosition function(const Function &F, const CallBaseContext *CBContext=nullptr)
Create a position describing the function scope of F.
Kind getPositionKind() const
Return the associated position kind.
State for an integer range.
bool isValidState() const override
See AbstractState::isValidState() NOTE: For now we simply pretend that the worst possible state is in...
Helper to tie a abstract state implementation to an abstract attribute.
StateType & getState() override
See AbstractAttribute::getState(...).