LLVM 22.0.0git
AMDGPUAttributor.cpp
Go to the documentation of this file.
1//===- AMDGPUAttributor.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPU.h"
14#include "GCNSubtarget.h"
16#include "llvm/IR/IntrinsicsAMDGPU.h"
17#include "llvm/IR/IntrinsicsR600.h"
20
21#define DEBUG_TYPE "amdgpu-attributor"
22
23using namespace llvm;
24
26 "amdgpu-indirect-call-specialization-threshold",
28 "A threshold controls whether an indirect call will be specialized"),
29 cl::init(3));
30
31#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
32
34#include "AMDGPUAttributes.def"
36};
37
38#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
39
42#include "AMDGPUAttributes.def"
45};
46
47#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
48static constexpr std::pair<ImplicitArgumentMask, StringLiteral>
50#include "AMDGPUAttributes.def"
51};
52
53// We do not need to note the x workitem or workgroup id because they are always
54// initialized.
55//
56// TODO: We should not add the attributes if the known compile time workgroup
57// size is 1 for y/z.
59intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
60 bool HasApertureRegs, bool SupportsGetDoorBellID,
61 unsigned CodeObjectVersion) {
62 switch (ID) {
63 case Intrinsic::amdgcn_workitem_id_x:
64 NonKernelOnly = true;
65 return WORKITEM_ID_X;
66 case Intrinsic::amdgcn_workgroup_id_x:
67 NonKernelOnly = true;
68 return WORKGROUP_ID_X;
69 case Intrinsic::amdgcn_workitem_id_y:
70 case Intrinsic::r600_read_tidig_y:
71 return WORKITEM_ID_Y;
72 case Intrinsic::amdgcn_workitem_id_z:
73 case Intrinsic::r600_read_tidig_z:
74 return WORKITEM_ID_Z;
75 case Intrinsic::amdgcn_workgroup_id_y:
76 case Intrinsic::r600_read_tgid_y:
77 return WORKGROUP_ID_Y;
78 case Intrinsic::amdgcn_workgroup_id_z:
79 case Intrinsic::r600_read_tgid_z:
80 return WORKGROUP_ID_Z;
81 case Intrinsic::amdgcn_cluster_id_x:
82 NonKernelOnly = true;
83 return CLUSTER_ID_X;
84 case Intrinsic::amdgcn_cluster_id_y:
85 return CLUSTER_ID_Y;
86 case Intrinsic::amdgcn_cluster_id_z:
87 return CLUSTER_ID_Z;
88 case Intrinsic::amdgcn_lds_kernel_id:
89 return LDS_KERNEL_ID;
90 case Intrinsic::amdgcn_dispatch_ptr:
91 return DISPATCH_PTR;
92 case Intrinsic::amdgcn_dispatch_id:
93 return DISPATCH_ID;
94 case Intrinsic::amdgcn_implicitarg_ptr:
95 return IMPLICIT_ARG_PTR;
96 // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access
97 // queue_ptr.
98 case Intrinsic::amdgcn_queue_ptr:
99 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
100 return QUEUE_PTR;
101 case Intrinsic::amdgcn_is_shared:
102 case Intrinsic::amdgcn_is_private:
103 if (HasApertureRegs)
104 return NOT_IMPLICIT_INPUT;
105 // Under V5, we need implicitarg_ptr + offsets to access private_base or
106 // shared_base. For pre-V5, however, need to access them through queue_ptr +
107 // offsets.
108 return CodeObjectVersion >= AMDGPU::AMDHSA_COV5 ? IMPLICIT_ARG_PTR
109 : QUEUE_PTR;
110 case Intrinsic::trap:
111 case Intrinsic::debugtrap:
112 case Intrinsic::ubsantrap:
113 if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.
114 return CodeObjectVersion >= AMDGPU::AMDHSA_COV4 ? NOT_IMPLICIT_INPUT
115 : QUEUE_PTR;
116 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
117 return QUEUE_PTR;
118 default:
119 return UNKNOWN_INTRINSIC;
120 }
121}
122
123static bool castRequiresQueuePtr(unsigned SrcAS) {
124 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
125}
126
127static bool isDSAddress(const Constant *C) {
129 if (!GV)
130 return false;
131 unsigned AS = GV->getAddressSpace();
133}
134
135/// Returns true if sanitizer attributes are present on a function.
136static bool hasSanitizerAttributes(const Function &F) {
137 return F.hasFnAttribute(Attribute::SanitizeAddress) ||
138 F.hasFnAttribute(Attribute::SanitizeThread) ||
139 F.hasFnAttribute(Attribute::SanitizeMemory) ||
140 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
141 F.hasFnAttribute(Attribute::SanitizeMemTag);
142}
143
144namespace {
145class AMDGPUInformationCache : public InformationCache {
146public:
147 AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
149 SetVector<Function *> *CGSCC, TargetMachine &TM)
150 : InformationCache(M, AG, Allocator, CGSCC), TM(TM),
151 CodeObjectVersion(AMDGPU::getAMDHSACodeObjectVersion(M)) {}
152
153 TargetMachine &TM;
154
155 enum ConstantStatus : uint8_t {
156 NONE = 0,
157 DS_GLOBAL = 1 << 0,
158 ADDR_SPACE_CAST_PRIVATE_TO_FLAT = 1 << 1,
159 ADDR_SPACE_CAST_LOCAL_TO_FLAT = 1 << 2,
160 ADDR_SPACE_CAST_BOTH_TO_FLAT =
161 ADDR_SPACE_CAST_PRIVATE_TO_FLAT | ADDR_SPACE_CAST_LOCAL_TO_FLAT
162 };
163
164 /// Check if the subtarget has aperture regs.
165 bool hasApertureRegs(Function &F) {
166 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
167 return ST.hasApertureRegs();
168 }
169
170 /// Check if the subtarget supports GetDoorbellID.
171 bool supportsGetDoorbellID(Function &F) {
172 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
173 return ST.supportsGetDoorbellID();
174 }
175
176 std::optional<std::pair<unsigned, unsigned>>
177 getFlatWorkGroupSizeAttr(const Function &F) const {
178 auto R = AMDGPU::getIntegerPairAttribute(F, "amdgpu-flat-work-group-size");
179 if (!R)
180 return std::nullopt;
181 return std::make_pair(R->first, *(R->second));
182 }
183
184 std::pair<unsigned, unsigned>
185 getDefaultFlatWorkGroupSize(const Function &F) const {
186 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
187 return ST.getDefaultFlatWorkGroupSize(F.getCallingConv());
188 }
189
190 std::pair<unsigned, unsigned>
191 getMaximumFlatWorkGroupRange(const Function &F) {
192 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
193 return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
194 }
195
196 SmallVector<unsigned> getMaxNumWorkGroups(const Function &F) {
197 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
198 return ST.getMaxNumWorkGroups(F);
199 }
200
201 /// Get code object version.
202 unsigned getCodeObjectVersion() const { return CodeObjectVersion; }
203
204 /// Get the effective value of "amdgpu-waves-per-eu" for the function,
205 /// accounting for the interaction with the passed value to use for
206 /// "amdgpu-flat-work-group-size".
207 std::pair<unsigned, unsigned>
208 getWavesPerEU(const Function &F,
209 std::pair<unsigned, unsigned> FlatWorkGroupSize) {
210 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
211 return ST.getWavesPerEU(FlatWorkGroupSize, getLDSSize(F), F);
212 }
213
214 std::optional<std::pair<unsigned, unsigned>>
215 getWavesPerEUAttr(const Function &F) {
216 auto Val = AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu",
217 /*OnlyFirstRequired=*/true);
218 if (!Val)
219 return std::nullopt;
220 if (!Val->second) {
221 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
222 Val->second = ST.getMaxWavesPerEU();
223 }
224 return std::make_pair(Val->first, *(Val->second));
225 }
226
227 unsigned getMaxWavesPerEU(const Function &F) {
228 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
229 return ST.getMaxWavesPerEU();
230 }
231
232 unsigned getMaxAddrSpace() const override {
234 }
235
236private:
237 /// Check if the ConstantExpr \p CE uses an addrspacecast from private or
238 /// local to flat. These casts may require the queue pointer.
239 static uint8_t visitConstExpr(const ConstantExpr *CE) {
240 uint8_t Status = NONE;
241
242 if (CE->getOpcode() == Instruction::AddrSpaceCast) {
243 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
244 if (SrcAS == AMDGPUAS::PRIVATE_ADDRESS)
245 Status |= ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
246 else if (SrcAS == AMDGPUAS::LOCAL_ADDRESS)
247 Status |= ADDR_SPACE_CAST_LOCAL_TO_FLAT;
248 }
249
250 return Status;
251 }
252
253 /// Returns the minimum amount of LDS space used by a workgroup running
254 /// function \p F.
255 static unsigned getLDSSize(const Function &F) {
256 return AMDGPU::getIntegerPairAttribute(F, "amdgpu-lds-size",
257 {0, UINT32_MAX}, true)
258 .first;
259 }
260
261 /// Get the constant access bitmap for \p C.
262 uint8_t getConstantAccess(const Constant *C,
263 SmallPtrSetImpl<const Constant *> &Visited) {
264 auto It = ConstantStatus.find(C);
265 if (It != ConstantStatus.end())
266 return It->second;
267
268 uint8_t Result = 0;
269 if (isDSAddress(C))
270 Result = DS_GLOBAL;
271
272 if (const auto *CE = dyn_cast<ConstantExpr>(C))
273 Result |= visitConstExpr(CE);
274
275 for (const Use &U : C->operands()) {
276 const auto *OpC = dyn_cast<Constant>(U);
277 if (!OpC || !Visited.insert(OpC).second)
278 continue;
279
280 Result |= getConstantAccess(OpC, Visited);
281 }
282 return Result;
283 }
284
285public:
286 /// Returns true if \p Fn needs the queue pointer because of \p C.
287 bool needsQueuePtr(const Constant *C, Function &Fn) {
288 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
289 bool HasAperture = hasApertureRegs(Fn);
290
291 // No need to explore the constants.
292 if (!IsNonEntryFunc && HasAperture)
293 return false;
294
295 SmallPtrSet<const Constant *, 8> Visited;
296 uint8_t Access = getConstantAccess(C, Visited);
297
298 // We need to trap on DS globals in non-entry functions.
299 if (IsNonEntryFunc && (Access & DS_GLOBAL))
300 return true;
301
302 return !HasAperture && (Access & ADDR_SPACE_CAST_BOTH_TO_FLAT);
303 }
304
305 bool checkConstForAddrSpaceCastFromPrivate(const Constant *C) {
306 SmallPtrSet<const Constant *, 8> Visited;
307 uint8_t Access = getConstantAccess(C, Visited);
308 return Access & ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
309 }
310
311private:
312 /// Used to determine if the Constant needs the queue pointer.
313 DenseMap<const Constant *, uint8_t> ConstantStatus;
314 const unsigned CodeObjectVersion;
315};
316
317struct AAAMDAttributes
318 : public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
319 AbstractAttribute> {
320 using Base = StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
321 AbstractAttribute>;
322
323 AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
324
325 /// Create an abstract attribute view for the position \p IRP.
326 static AAAMDAttributes &createForPosition(const IRPosition &IRP,
327 Attributor &A);
328
329 /// See AbstractAttribute::getName().
330 StringRef getName() const override { return "AAAMDAttributes"; }
331
332 /// See AbstractAttribute::getIdAddr().
333 const char *getIdAddr() const override { return &ID; }
334
335 /// This function should return true if the type of the \p AA is
336 /// AAAMDAttributes.
337 static bool classof(const AbstractAttribute *AA) {
338 return (AA->getIdAddr() == &ID);
339 }
340
341 /// Unique ID (due to the unique address)
342 static const char ID;
343};
344const char AAAMDAttributes::ID = 0;
345
346struct AAUniformWorkGroupSize
347 : public StateWrapper<BooleanState, AbstractAttribute> {
348 using Base = StateWrapper<BooleanState, AbstractAttribute>;
349 AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
350
351 /// Create an abstract attribute view for the position \p IRP.
352 static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
353 Attributor &A);
354
355 /// See AbstractAttribute::getName().
356 StringRef getName() const override { return "AAUniformWorkGroupSize"; }
357
358 /// See AbstractAttribute::getIdAddr().
359 const char *getIdAddr() const override { return &ID; }
360
361 /// This function should return true if the type of the \p AA is
362 /// AAAMDAttributes.
363 static bool classof(const AbstractAttribute *AA) {
364 return (AA->getIdAddr() == &ID);
365 }
366
367 /// Unique ID (due to the unique address)
368 static const char ID;
369};
370const char AAUniformWorkGroupSize::ID = 0;
371
372struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
373 AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
374 : AAUniformWorkGroupSize(IRP, A) {}
375
376 void initialize(Attributor &A) override {
377 Function *F = getAssociatedFunction();
378 CallingConv::ID CC = F->getCallingConv();
379
380 if (CC != CallingConv::AMDGPU_KERNEL)
381 return;
382
383 bool InitialValue = false;
384 if (F->hasFnAttribute("uniform-work-group-size"))
385 InitialValue =
386 F->getFnAttribute("uniform-work-group-size").getValueAsString() ==
387 "true";
388
389 if (InitialValue)
390 indicateOptimisticFixpoint();
391 else
392 indicatePessimisticFixpoint();
393 }
394
395 ChangeStatus updateImpl(Attributor &A) override {
396 ChangeStatus Change = ChangeStatus::UNCHANGED;
397
398 auto CheckCallSite = [&](AbstractCallSite CS) {
399 Function *Caller = CS.getInstruction()->getFunction();
400 LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
401 << "->" << getAssociatedFunction()->getName() << "\n");
402
403 const auto *CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
404 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
405 if (!CallerInfo || !CallerInfo->isValidState())
406 return false;
407
408 Change = Change | clampStateAndIndicateChange(this->getState(),
409 CallerInfo->getState());
410
411 return true;
412 };
413
414 bool AllCallSitesKnown = true;
415 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
416 return indicatePessimisticFixpoint();
417
418 return Change;
419 }
420
421 ChangeStatus manifest(Attributor &A) override {
423 LLVMContext &Ctx = getAssociatedFunction()->getContext();
424
425 AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
426 getAssumed() ? "true" : "false"));
427 return A.manifestAttrs(getIRPosition(), AttrList,
428 /* ForceReplace */ true);
429 }
430
431 bool isValidState() const override {
432 // This state is always valid, even when the state is false.
433 return true;
434 }
435
436 const std::string getAsStr(Attributor *) const override {
437 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
438 }
439
440 /// See AbstractAttribute::trackStatistics()
441 void trackStatistics() const override {}
442};
443
444AAUniformWorkGroupSize &
445AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
446 Attributor &A) {
448 return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
450 "AAUniformWorkGroupSize is only valid for function position");
451}
452
453struct AAAMDAttributesFunction : public AAAMDAttributes {
454 AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
455 : AAAMDAttributes(IRP, A) {}
456
457 void initialize(Attributor &A) override {
458 Function *F = getAssociatedFunction();
459
460 // If the function requires the implicit arg pointer due to sanitizers,
461 // assume it's needed even if explicitly marked as not requiring it.
462 // Flat scratch initialization is needed because `asan_malloc_impl`
463 // calls introduced later in pipeline will have flat scratch accesses.
464 // FIXME: FLAT_SCRATCH_INIT will not be required here if device-libs
465 // implementation for `asan_malloc_impl` is updated.
466 const bool HasSanitizerAttrs = hasSanitizerAttributes(*F);
467 if (HasSanitizerAttrs) {
468 removeAssumedBits(IMPLICIT_ARG_PTR);
469 removeAssumedBits(HOSTCALL_PTR);
470 removeAssumedBits(FLAT_SCRATCH_INIT);
471 }
472
473 for (auto Attr : ImplicitAttrs) {
474 if (HasSanitizerAttrs &&
475 (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR ||
476 Attr.first == FLAT_SCRATCH_INIT))
477 continue;
478
479 if (F->hasFnAttribute(Attr.second))
480 addKnownBits(Attr.first);
481 }
482
483 if (F->isDeclaration())
484 return;
485
486 // Ignore functions with graphics calling conventions, these are currently
487 // not allowed to have kernel arguments.
488 if (AMDGPU::isGraphics(F->getCallingConv())) {
489 indicatePessimisticFixpoint();
490 return;
491 }
492 }
493
494 ChangeStatus updateImpl(Attributor &A) override {
495 Function *F = getAssociatedFunction();
496 // The current assumed state used to determine a change.
497 auto OrigAssumed = getAssumed();
498
499 // Check for Intrinsics and propagate attributes.
500 const AACallEdges *AAEdges = A.getAAFor<AACallEdges>(
501 *this, this->getIRPosition(), DepClassTy::REQUIRED);
502 if (!AAEdges || !AAEdges->isValidState() ||
503 AAEdges->hasNonAsmUnknownCallee())
504 return indicatePessimisticFixpoint();
505
506 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
507
508 bool NeedsImplicit = false;
509 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
510 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
511 bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F);
512 unsigned COV = InfoCache.getCodeObjectVersion();
513
514 for (Function *Callee : AAEdges->getOptimisticEdges()) {
515 Intrinsic::ID IID = Callee->getIntrinsicID();
516 if (IID == Intrinsic::not_intrinsic) {
517 const AAAMDAttributes *AAAMD = A.getAAFor<AAAMDAttributes>(
518 *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
519 if (!AAAMD || !AAAMD->isValidState())
520 return indicatePessimisticFixpoint();
521 *this &= *AAAMD;
522 continue;
523 }
524
525 bool NonKernelOnly = false;
526 ImplicitArgumentMask AttrMask =
527 intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
528 HasApertureRegs, SupportsGetDoorbellID, COV);
529
530 if (AttrMask == UNKNOWN_INTRINSIC) {
531 // Assume not-nocallback intrinsics may invoke a function which accesses
532 // implicit arguments.
533 //
534 // FIXME: This isn't really the correct check. We want to ensure it
535 // isn't calling any function that may use implicit arguments regardless
536 // of whether it's internal to the module or not.
537 //
538 // TODO: Ignoring callsite attributes.
539 if (!Callee->hasFnAttribute(Attribute::NoCallback))
540 return indicatePessimisticFixpoint();
541 continue;
542 }
543
544 if (AttrMask != NOT_IMPLICIT_INPUT) {
545 if ((IsNonEntryFunc || !NonKernelOnly))
546 removeAssumedBits(AttrMask);
547 }
548 }
549
550 // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base.
551 if (NeedsImplicit)
552 removeAssumedBits(IMPLICIT_ARG_PTR);
553
554 if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {
555 // Under V5, we need implicitarg_ptr + offsets to access private_base or
556 // shared_base. We do not actually need queue_ptr.
557 if (COV >= 5)
558 removeAssumedBits(IMPLICIT_ARG_PTR);
559 else
560 removeAssumedBits(QUEUE_PTR);
561 }
562
563 if (funcRetrievesMultigridSyncArg(A, COV)) {
564 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
565 "multigrid_sync_arg needs implicitarg_ptr");
566 removeAssumedBits(MULTIGRID_SYNC_ARG);
567 }
568
569 if (funcRetrievesHostcallPtr(A, COV)) {
570 assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");
571 removeAssumedBits(HOSTCALL_PTR);
572 }
573
574 if (funcRetrievesHeapPtr(A, COV)) {
575 assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");
576 removeAssumedBits(HEAP_PTR);
577 }
578
579 if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A, COV)) {
580 assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");
581 removeAssumedBits(QUEUE_PTR);
582 }
583
584 if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) {
585 removeAssumedBits(LDS_KERNEL_ID);
586 }
587
588 if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A, COV))
589 removeAssumedBits(DEFAULT_QUEUE);
590
591 if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV))
592 removeAssumedBits(COMPLETION_ACTION);
593
594 if (isAssumed(FLAT_SCRATCH_INIT) && needFlatScratchInit(A))
595 removeAssumedBits(FLAT_SCRATCH_INIT);
596
597 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
598 : ChangeStatus::UNCHANGED;
599 }
600
601 ChangeStatus manifest(Attributor &A) override {
603 LLVMContext &Ctx = getAssociatedFunction()->getContext();
604
605 for (auto Attr : ImplicitAttrs) {
606 if (isKnown(Attr.first))
607 AttrList.push_back(Attribute::get(Ctx, Attr.second));
608 }
609
610 return A.manifestAttrs(getIRPosition(), AttrList,
611 /* ForceReplace */ true);
612 }
613
614 const std::string getAsStr(Attributor *) const override {
615 std::string Str;
616 raw_string_ostream OS(Str);
617 OS << "AMDInfo[";
618 for (auto Attr : ImplicitAttrs)
619 if (isAssumed(Attr.first))
620 OS << ' ' << Attr.second;
621 OS << " ]";
622 return OS.str();
623 }
624
625 /// See AbstractAttribute::trackStatistics()
626 void trackStatistics() const override {}
627
628private:
629 bool checkForQueuePtr(Attributor &A) {
630 Function *F = getAssociatedFunction();
631 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
632
633 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
634
635 bool NeedsQueuePtr = false;
636
637 auto CheckAddrSpaceCasts = [&](Instruction &I) {
638 unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
639 if (castRequiresQueuePtr(SrcAS)) {
640 NeedsQueuePtr = true;
641 return false;
642 }
643 return true;
644 };
645
646 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
647
648 // `checkForAllInstructions` is much more cheaper than going through all
649 // instructions, try it first.
650
651 // The queue pointer is not needed if aperture regs is present.
652 if (!HasApertureRegs) {
653 bool UsedAssumedInformation = false;
654 A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
655 {Instruction::AddrSpaceCast},
656 UsedAssumedInformation);
657 }
658
659 // If we found that we need the queue pointer, nothing else to do.
660 if (NeedsQueuePtr)
661 return true;
662
663 if (!IsNonEntryFunc && HasApertureRegs)
664 return false;
665
666 for (BasicBlock &BB : *F) {
667 for (Instruction &I : BB) {
668 for (const Use &U : I.operands()) {
669 if (const auto *C = dyn_cast<Constant>(U)) {
670 if (InfoCache.needsQueuePtr(C, *F))
671 return true;
672 }
673 }
674 }
675 }
676
677 return false;
678 }
679
680 bool funcRetrievesMultigridSyncArg(Attributor &A, unsigned COV) {
682 AA::RangeTy Range(Pos, 8);
683 return funcRetrievesImplicitKernelArg(A, Range);
684 }
685
686 bool funcRetrievesHostcallPtr(Attributor &A, unsigned COV) {
688 AA::RangeTy Range(Pos, 8);
689 return funcRetrievesImplicitKernelArg(A, Range);
690 }
691
692 bool funcRetrievesDefaultQueue(Attributor &A, unsigned COV) {
694 AA::RangeTy Range(Pos, 8);
695 return funcRetrievesImplicitKernelArg(A, Range);
696 }
697
698 bool funcRetrievesCompletionAction(Attributor &A, unsigned COV) {
700 AA::RangeTy Range(Pos, 8);
701 return funcRetrievesImplicitKernelArg(A, Range);
702 }
703
704 bool funcRetrievesHeapPtr(Attributor &A, unsigned COV) {
705 if (COV < 5)
706 return false;
708 return funcRetrievesImplicitKernelArg(A, Range);
709 }
710
711 bool funcRetrievesQueuePtr(Attributor &A, unsigned COV) {
712 if (COV < 5)
713 return false;
715 return funcRetrievesImplicitKernelArg(A, Range);
716 }
717
718 bool funcRetrievesImplicitKernelArg(Attributor &A, AA::RangeTy Range) {
719 // Check if this is a call to the implicitarg_ptr builtin and it
720 // is used to retrieve the hostcall pointer. The implicit arg for
721 // hostcall is not used only if every use of the implicitarg_ptr
722 // is a load that clearly does not retrieve any byte of the
723 // hostcall pointer. We check this by tracing all the uses of the
724 // initial call to the implicitarg_ptr intrinsic.
725 auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {
726 auto &Call = cast<CallBase>(I);
727 if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
728 return true;
729
730 const auto *PointerInfoAA = A.getAAFor<AAPointerInfo>(
731 *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED);
732 if (!PointerInfoAA || !PointerInfoAA->getState().isValidState())
733 return false;
734
735 return PointerInfoAA->forallInterferingAccesses(
736 Range, [](const AAPointerInfo::Access &Acc, bool IsExact) {
737 return Acc.getRemoteInst()->isDroppable();
738 });
739 };
740
741 bool UsedAssumedInformation = false;
742 return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,
743 UsedAssumedInformation);
744 }
745
746 bool funcRetrievesLDSKernelId(Attributor &A) {
747 auto DoesNotRetrieve = [&](Instruction &I) {
748 auto &Call = cast<CallBase>(I);
749 return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;
750 };
751 bool UsedAssumedInformation = false;
752 return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,
753 UsedAssumedInformation);
754 }
755
756 // Returns true if FlatScratchInit is needed, i.e., no-flat-scratch-init is
757 // not to be set.
758 bool needFlatScratchInit(Attributor &A) {
759 assert(isAssumed(FLAT_SCRATCH_INIT)); // only called if the bit is still set
760
761 // Check all AddrSpaceCast instructions. FlatScratchInit is needed if
762 // there is a cast from PRIVATE_ADDRESS.
763 auto AddrSpaceCastNotFromPrivate = [](Instruction &I) {
764 return cast<AddrSpaceCastInst>(I).getSrcAddressSpace() !=
766 };
767
768 bool UsedAssumedInformation = false;
769 if (!A.checkForAllInstructions(AddrSpaceCastNotFromPrivate, *this,
770 {Instruction::AddrSpaceCast},
771 UsedAssumedInformation))
772 return true;
773
774 // Check for addrSpaceCast from PRIVATE_ADDRESS in constant expressions
775 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
776
777 Function *F = getAssociatedFunction();
778 for (Instruction &I : instructions(F)) {
779 for (const Use &U : I.operands()) {
780 if (const auto *C = dyn_cast<Constant>(U)) {
781 if (InfoCache.checkConstForAddrSpaceCastFromPrivate(C))
782 return true;
783 }
784 }
785 }
786
787 // Finally check callees.
788
789 // This is called on each callee; false means callee shouldn't have
790 // no-flat-scratch-init.
791 auto CheckForNoFlatScratchInit = [&](Instruction &I) {
792 const auto &CB = cast<CallBase>(I);
793 const Function *Callee = CB.getCalledFunction();
794
795 // Callee == 0 for inline asm or indirect call with known callees.
796 // In the latter case, updateImpl() already checked the callees and we
797 // know their FLAT_SCRATCH_INIT bit is set.
798 // If function has indirect call with unknown callees, the bit is
799 // already removed in updateImpl() and execution won't reach here.
800 if (!Callee)
801 return true;
802
803 return Callee->getIntrinsicID() !=
804 Intrinsic::amdgcn_addrspacecast_nonnull;
805 };
806
807 UsedAssumedInformation = false;
808 // If any callee is false (i.e. need FlatScratchInit),
809 // checkForAllCallLikeInstructions returns false, in which case this
810 // function returns true.
811 return !A.checkForAllCallLikeInstructions(CheckForNoFlatScratchInit, *this,
812 UsedAssumedInformation);
813 }
814};
815
816AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
817 Attributor &A) {
819 return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
820 llvm_unreachable("AAAMDAttributes is only valid for function position");
821}
822
823/// Base class to derive different size ranges.
824struct AAAMDSizeRangeAttribute
825 : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
826 using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
827
828 StringRef AttrName;
829
830 AAAMDSizeRangeAttribute(const IRPosition &IRP, Attributor &A,
831 StringRef AttrName)
832 : Base(IRP, 32), AttrName(AttrName) {}
833
834 /// See AbstractAttribute::trackStatistics()
835 void trackStatistics() const override {}
836
837 template <class AttributeImpl> ChangeStatus updateImplImpl(Attributor &A) {
838 ChangeStatus Change = ChangeStatus::UNCHANGED;
839
840 auto CheckCallSite = [&](AbstractCallSite CS) {
841 Function *Caller = CS.getInstruction()->getFunction();
842 LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
843 << "->" << getAssociatedFunction()->getName() << '\n');
844
845 const auto *CallerInfo = A.getAAFor<AttributeImpl>(
846 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
847 if (!CallerInfo || !CallerInfo->isValidState())
848 return false;
849
850 Change |=
851 clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
852
853 return true;
854 };
855
856 bool AllCallSitesKnown = true;
857 if (!A.checkForAllCallSites(CheckCallSite, *this,
858 /*RequireAllCallSites=*/true,
859 AllCallSitesKnown))
860 return indicatePessimisticFixpoint();
861
862 return Change;
863 }
864
865 /// Clamp the assumed range to the default value ([Min, Max]) and emit the
866 /// attribute if it is not same as default.
868 emitAttributeIfNotDefaultAfterClamp(Attributor &A,
869 std::pair<unsigned, unsigned> Default) {
870 auto [Min, Max] = Default;
871 unsigned Lower = getAssumed().getLower().getZExtValue();
872 unsigned Upper = getAssumed().getUpper().getZExtValue();
873
874 // Clamp the range to the default value.
875 if (Lower < Min)
876 Lower = Min;
877 if (Upper > Max + 1)
878 Upper = Max + 1;
879
880 // No manifest if the value is invalid or same as default after clamp.
881 if ((Lower == Min && Upper == Max + 1) || (Upper < Lower))
882 return ChangeStatus::UNCHANGED;
883
884 Function *F = getAssociatedFunction();
885 LLVMContext &Ctx = F->getContext();
886 SmallString<10> Buffer;
887 raw_svector_ostream OS(Buffer);
888 OS << Lower << ',' << Upper - 1;
889 return A.manifestAttrs(getIRPosition(),
890 {Attribute::get(Ctx, AttrName, OS.str())},
891 /*ForceReplace=*/true);
892 }
893
894 const std::string getAsStr(Attributor *) const override {
895 std::string Str;
896 raw_string_ostream OS(Str);
897 OS << getName() << '[';
898 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
899 OS << ']';
900 return OS.str();
901 }
902};
903
904/// Propagate amdgpu-flat-work-group-size attribute.
905struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
906 AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
907 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-flat-work-group-size") {}
908
909 void initialize(Attributor &A) override {
910 Function *F = getAssociatedFunction();
911 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
912
913 bool HasAttr = false;
914 auto Range = InfoCache.getDefaultFlatWorkGroupSize(*F);
915 auto MaxRange = InfoCache.getMaximumFlatWorkGroupRange(*F);
916
917 if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr(*F)) {
918 // We only consider an attribute that is not max range because the front
919 // end always emits the attribute, unfortunately, and sometimes it emits
920 // the max range.
921 if (*Attr != MaxRange) {
922 Range = *Attr;
923 HasAttr = true;
924 }
925 }
926
927 // We don't want to directly clamp the state if it's the max range because
928 // that is basically the worst state.
929 if (Range == MaxRange)
930 return;
931
932 auto [Min, Max] = Range;
933 ConstantRange CR(APInt(32, Min), APInt(32, Max + 1));
934 IntegerRangeState IRS(CR);
935 clampStateAndIndicateChange(this->getState(), IRS);
936
937 if (HasAttr || AMDGPU::isEntryFunctionCC(F->getCallingConv()))
938 indicateOptimisticFixpoint();
939 }
940
941 ChangeStatus updateImpl(Attributor &A) override {
942 return updateImplImpl<AAAMDFlatWorkGroupSize>(A);
943 }
944
945 /// Create an abstract attribute view for the position \p IRP.
946 static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
947 Attributor &A);
948
949 ChangeStatus manifest(Attributor &A) override {
950 Function *F = getAssociatedFunction();
951 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
952 return emitAttributeIfNotDefaultAfterClamp(
953 A, InfoCache.getMaximumFlatWorkGroupRange(*F));
954 }
955
956 /// See AbstractAttribute::getName()
957 StringRef getName() const override { return "AAAMDFlatWorkGroupSize"; }
958
959 /// See AbstractAttribute::getIdAddr()
960 const char *getIdAddr() const override { return &ID; }
961
962 /// This function should return true if the type of the \p AA is
963 /// AAAMDFlatWorkGroupSize
964 static bool classof(const AbstractAttribute *AA) {
965 return (AA->getIdAddr() == &ID);
966 }
967
968 /// Unique ID (due to the unique address)
969 static const char ID;
970};
971
972const char AAAMDFlatWorkGroupSize::ID = 0;
973
974AAAMDFlatWorkGroupSize &
975AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
976 Attributor &A) {
978 return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
980 "AAAMDFlatWorkGroupSize is only valid for function position");
981}
982
983struct TupleDecIntegerRangeState : public AbstractState {
984 DecIntegerState<uint32_t> X, Y, Z;
985
986 bool isValidState() const override {
987 return X.isValidState() && Y.isValidState() && Z.isValidState();
988 }
989
990 bool isAtFixpoint() const override {
991 return X.isAtFixpoint() && Y.isAtFixpoint() && Z.isAtFixpoint();
992 }
993
994 ChangeStatus indicateOptimisticFixpoint() override {
995 return X.indicateOptimisticFixpoint() | Y.indicateOptimisticFixpoint() |
996 Z.indicateOptimisticFixpoint();
997 }
998
999 ChangeStatus indicatePessimisticFixpoint() override {
1000 return X.indicatePessimisticFixpoint() | Y.indicatePessimisticFixpoint() |
1001 Z.indicatePessimisticFixpoint();
1002 }
1003
1004 TupleDecIntegerRangeState operator^=(const TupleDecIntegerRangeState &Other) {
1005 X ^= Other.X;
1006 Y ^= Other.Y;
1007 Z ^= Other.Z;
1008 return *this;
1009 }
1010
1011 bool operator==(const TupleDecIntegerRangeState &Other) const {
1012 return X == Other.X && Y == Other.Y && Z == Other.Z;
1013 }
1014
1015 TupleDecIntegerRangeState &getAssumed() { return *this; }
1016 const TupleDecIntegerRangeState &getAssumed() const { return *this; }
1017};
1018
1019using AAAMDMaxNumWorkgroupsState =
1020 StateWrapper<TupleDecIntegerRangeState, AbstractAttribute, uint32_t>;
1021
1022/// Propagate amdgpu-max-num-workgroups attribute.
1023struct AAAMDMaxNumWorkgroups
1024 : public StateWrapper<TupleDecIntegerRangeState, AbstractAttribute> {
1025 using Base = StateWrapper<TupleDecIntegerRangeState, AbstractAttribute>;
1026
1027 AAAMDMaxNumWorkgroups(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1028
1029 void initialize(Attributor &A) override {
1030 Function *F = getAssociatedFunction();
1031 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1032
1033 SmallVector<unsigned> MaxNumWorkgroups = InfoCache.getMaxNumWorkGroups(*F);
1034
1035 X.takeKnownMinimum(MaxNumWorkgroups[0]);
1036 Y.takeKnownMinimum(MaxNumWorkgroups[1]);
1037 Z.takeKnownMinimum(MaxNumWorkgroups[2]);
1038
1039 if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
1040 indicatePessimisticFixpoint();
1041 }
1042
1043 ChangeStatus updateImpl(Attributor &A) override {
1044 ChangeStatus Change = ChangeStatus::UNCHANGED;
1045
1046 auto CheckCallSite = [&](AbstractCallSite CS) {
1047 Function *Caller = CS.getInstruction()->getFunction();
1048 LLVM_DEBUG(dbgs() << "[AAAMDMaxNumWorkgroups] Call " << Caller->getName()
1049 << "->" << getAssociatedFunction()->getName() << '\n');
1050
1051 const auto *CallerInfo = A.getAAFor<AAAMDMaxNumWorkgroups>(
1052 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
1053 if (!CallerInfo || !CallerInfo->isValidState())
1054 return false;
1055
1056 Change |=
1057 clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
1058 return true;
1059 };
1060
1061 bool AllCallSitesKnown = true;
1062 if (!A.checkForAllCallSites(CheckCallSite, *this,
1063 /*RequireAllCallSites=*/true,
1064 AllCallSitesKnown))
1065 return indicatePessimisticFixpoint();
1066
1067 return Change;
1068 }
1069
1070 /// Create an abstract attribute view for the position \p IRP.
1071 static AAAMDMaxNumWorkgroups &createForPosition(const IRPosition &IRP,
1072 Attributor &A);
1073
1074 ChangeStatus manifest(Attributor &A) override {
1075 Function *F = getAssociatedFunction();
1076 LLVMContext &Ctx = F->getContext();
1077 SmallString<32> Buffer;
1078 raw_svector_ostream OS(Buffer);
1079 OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed();
1080
1081 // TODO: Should annotate loads of the group size for this to do anything
1082 // useful.
1083 return A.manifestAttrs(
1084 getIRPosition(),
1085 {Attribute::get(Ctx, "amdgpu-max-num-workgroups", OS.str())},
1086 /* ForceReplace= */ true);
1087 }
1088
1089 StringRef getName() const override { return "AAAMDMaxNumWorkgroups"; }
1090
1091 const std::string getAsStr(Attributor *) const override {
1092 std::string Buffer = "AAAMDMaxNumWorkgroupsState[";
1093 raw_string_ostream OS(Buffer);
1094 OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed()
1095 << ']';
1096 return OS.str();
1097 }
1098
1099 const char *getIdAddr() const override { return &ID; }
1100
1101 /// This function should return true if the type of the \p AA is
1102 /// AAAMDMaxNumWorkgroups
1103 static bool classof(const AbstractAttribute *AA) {
1104 return (AA->getIdAddr() == &ID);
1105 }
1106
1107 void trackStatistics() const override {}
1108
1109 /// Unique ID (due to the unique address)
1110 static const char ID;
1111};
1112
1113const char AAAMDMaxNumWorkgroups::ID = 0;
1114
1115AAAMDMaxNumWorkgroups &
1116AAAMDMaxNumWorkgroups::createForPosition(const IRPosition &IRP, Attributor &A) {
1118 return *new (A.Allocator) AAAMDMaxNumWorkgroups(IRP, A);
1119 llvm_unreachable("AAAMDMaxNumWorkgroups is only valid for function position");
1120}
1121
1122/// Propagate amdgpu-waves-per-eu attribute.
1123struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
1124 AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)
1125 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {}
1126
1127 void initialize(Attributor &A) override {
1128 Function *F = getAssociatedFunction();
1129 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1130
1131 // If the attribute exists, we will honor it if it is not the default.
1132 if (auto Attr = InfoCache.getWavesPerEUAttr(*F)) {
1133 std::pair<unsigned, unsigned> MaxWavesPerEURange{
1134 1U, InfoCache.getMaxWavesPerEU(*F)};
1135 if (*Attr != MaxWavesPerEURange) {
1136 auto [Min, Max] = *Attr;
1137 ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
1138 IntegerRangeState RangeState(Range);
1139 this->getState() = RangeState;
1140 indicateOptimisticFixpoint();
1141 return;
1142 }
1143 }
1144
1145 if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
1146 indicatePessimisticFixpoint();
1147 }
1148
1149 ChangeStatus updateImpl(Attributor &A) override {
1150 ChangeStatus Change = ChangeStatus::UNCHANGED;
1151
1152 auto CheckCallSite = [&](AbstractCallSite CS) {
1153 Function *Caller = CS.getInstruction()->getFunction();
1154 Function *Func = getAssociatedFunction();
1155 LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
1156 << "->" << Func->getName() << '\n');
1157 (void)Func;
1158
1159 const auto *CallerAA = A.getAAFor<AAAMDWavesPerEU>(
1160 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
1161 if (!CallerAA || !CallerAA->isValidState())
1162 return false;
1163
1164 ConstantRange Assumed = getAssumed();
1165 unsigned Min = std::max(Assumed.getLower().getZExtValue(),
1166 CallerAA->getAssumed().getLower().getZExtValue());
1167 unsigned Max = std::max(Assumed.getUpper().getZExtValue(),
1168 CallerAA->getAssumed().getUpper().getZExtValue());
1169 ConstantRange Range(APInt(32, Min), APInt(32, Max));
1170 IntegerRangeState RangeState(Range);
1171 getState() = RangeState;
1172 Change |= getState() == Assumed ? ChangeStatus::UNCHANGED
1173 : ChangeStatus::CHANGED;
1174
1175 return true;
1176 };
1177
1178 bool AllCallSitesKnown = true;
1179 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
1180 return indicatePessimisticFixpoint();
1181
1182 return Change;
1183 }
1184
1185 /// Create an abstract attribute view for the position \p IRP.
1186 static AAAMDWavesPerEU &createForPosition(const IRPosition &IRP,
1187 Attributor &A);
1188
1189 ChangeStatus manifest(Attributor &A) override {
1190 Function *F = getAssociatedFunction();
1191 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1192 return emitAttributeIfNotDefaultAfterClamp(
1193 A, {1U, InfoCache.getMaxWavesPerEU(*F)});
1194 }
1195
1196 /// See AbstractAttribute::getName()
1197 StringRef getName() const override { return "AAAMDWavesPerEU"; }
1198
1199 /// See AbstractAttribute::getIdAddr()
1200 const char *getIdAddr() const override { return &ID; }
1201
1202 /// This function should return true if the type of the \p AA is
1203 /// AAAMDWavesPerEU
1204 static bool classof(const AbstractAttribute *AA) {
1205 return (AA->getIdAddr() == &ID);
1206 }
1207
1208 /// Unique ID (due to the unique address)
1209 static const char ID;
1210};
1211
1212const char AAAMDWavesPerEU::ID = 0;
1213
1214AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
1215 Attributor &A) {
1217 return *new (A.Allocator) AAAMDWavesPerEU(IRP, A);
1218 llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
1219}
1220
1221/// Compute the minimum number of AGPRs required to allocate the inline asm.
1222static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
1223 const CallBase &Call) {
1224 unsigned ArgNo = 0;
1225 unsigned ResNo = 0;
1226 unsigned AGPRDefCount = 0;
1227 unsigned AGPRUseCount = 0;
1228 unsigned MaxPhysReg = 0;
1229 const DataLayout &DL = Call.getFunction()->getParent()->getDataLayout();
1230
1231 // TODO: Overestimates due to not accounting for tied operands
1232 for (const InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
1233 Type *Ty = nullptr;
1234 switch (CI.Type) {
1235 case InlineAsm::isOutput: {
1236 Ty = Call.getType();
1237 if (auto *STy = dyn_cast<StructType>(Ty))
1238 Ty = STy->getElementType(ResNo);
1239 ++ResNo;
1240 break;
1241 }
1242 case InlineAsm::isInput: {
1243 Ty = Call.getArgOperand(ArgNo++)->getType();
1244 break;
1245 }
1246 case InlineAsm::isLabel:
1247 continue;
1249 // Parse the physical register reference.
1250 break;
1251 }
1252
1253 for (StringRef Code : CI.Codes) {
1254 unsigned RegCount = 0;
1255 if (Code.starts_with("a")) {
1256 // Virtual register, compute number of registers based on the type.
1257 //
1258 // We ought to be going through TargetLowering to get the number of
1259 // registers, but we should avoid the dependence on CodeGen here.
1260 RegCount = divideCeil(DL.getTypeSizeInBits(Ty), 32);
1261 } else {
1262 // Physical register reference
1263 auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg(Code);
1264 if (Kind == 'a') {
1265 RegCount = NumRegs;
1266 MaxPhysReg = std::max(MaxPhysReg, std::min(RegIdx + NumRegs, 256u));
1267 }
1268
1269 continue;
1270 }
1271
1272 if (CI.Type == InlineAsm::isOutput) {
1273 // Apply tuple alignment requirement
1274 //
1275 // TODO: This is more conservative than necessary.
1276 AGPRDefCount = alignTo(AGPRDefCount, RegCount);
1277
1278 AGPRDefCount += RegCount;
1279 if (CI.isEarlyClobber) {
1280 AGPRUseCount = alignTo(AGPRUseCount, RegCount);
1281 AGPRUseCount += RegCount;
1282 }
1283 } else {
1284 AGPRUseCount = alignTo(AGPRUseCount, RegCount);
1285 AGPRUseCount += RegCount;
1286 }
1287 }
1288 }
1289
1290 unsigned MaxVirtReg = std::max(AGPRUseCount, AGPRDefCount);
1291
1292 // TODO: This is overly conservative. If there are any physical registers,
1293 // allocate any virtual registers after them so we don't have to solve optimal
1294 // packing.
1295 return std::min(MaxVirtReg + MaxPhysReg, 256u);
1296}
1297
1298struct AAAMDGPUMinAGPRAlloc
1299 : public StateWrapper<DecIntegerState<>, AbstractAttribute> {
1300 using Base = StateWrapper<DecIntegerState<>, AbstractAttribute>;
1301 AAAMDGPUMinAGPRAlloc(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1302
1303 static AAAMDGPUMinAGPRAlloc &createForPosition(const IRPosition &IRP,
1304 Attributor &A) {
1306 return *new (A.Allocator) AAAMDGPUMinAGPRAlloc(IRP, A);
1308 "AAAMDGPUMinAGPRAlloc is only valid for function position");
1309 }
1310
1311 void initialize(Attributor &A) override {
1312 Function *F = getAssociatedFunction();
1313 auto [MinNumAGPR, MaxNumAGPR] =
1314 AMDGPU::getIntegerPairAttribute(*F, "amdgpu-agpr-alloc", {~0u, ~0u},
1315 /*OnlyFirstRequired=*/true);
1316 if (MinNumAGPR == 0)
1317 indicateOptimisticFixpoint();
1318 }
1319
1320 const std::string getAsStr(Attributor *A) const override {
1321 std::string Str = "amdgpu-agpr-alloc=";
1322 raw_string_ostream OS(Str);
1323 OS << getAssumed();
1324 return OS.str();
1325 }
1326
1327 void trackStatistics() const override {}
1328
1329 ChangeStatus updateImpl(Attributor &A) override {
1330 DecIntegerState<> Maximum;
1331
1332 // Check for cases which require allocation of AGPRs. The only cases where
1333 // AGPRs are required are if there are direct references to AGPRs, so inline
1334 // assembly and special intrinsics.
1335 auto CheckForMinAGPRAllocs = [&](Instruction &I) {
1336 const auto &CB = cast<CallBase>(I);
1337 const Value *CalleeOp = CB.getCalledOperand();
1338
1339 if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp)) {
1340 // Technically, the inline asm could be invoking a call to an unknown
1341 // external function that requires AGPRs, but ignore that.
1342 unsigned NumRegs = inlineAsmGetNumRequiredAGPRs(IA, CB);
1343 Maximum.takeAssumedMaximum(NumRegs);
1344 return true;
1345 }
1346
1347 switch (CB.getIntrinsicID()) {
1349 break;
1350 case Intrinsic::write_register:
1351 case Intrinsic::read_register:
1352 case Intrinsic::read_volatile_register: {
1353 const MDString *RegName = cast<MDString>(
1355 cast<MetadataAsValue>(CB.getArgOperand(0))->getMetadata())
1356 ->getOperand(0));
1357 auto [Kind, RegIdx, NumRegs] =
1359 if (Kind == 'a')
1360 Maximum.takeAssumedMaximum(std::min(RegIdx + NumRegs, 256u));
1361
1362 return true;
1363 }
1364 default:
1365 // Some intrinsics may use AGPRs, but if we have a choice, we are not
1366 // required to use AGPRs.
1367
1368 // Assume !nocallback intrinsics may call a function which requires
1369 // AGPRs.
1370 return CB.hasFnAttr(Attribute::NoCallback);
1371 }
1372
1373 // TODO: Handle callsite attributes
1374 auto *CBEdges = A.getAAFor<AACallEdges>(
1375 *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
1376 if (!CBEdges || CBEdges->hasUnknownCallee()) {
1378 return false;
1379 }
1380
1381 for (const Function *PossibleCallee : CBEdges->getOptimisticEdges()) {
1382 const auto *CalleeInfo = A.getAAFor<AAAMDGPUMinAGPRAlloc>(
1383 *this, IRPosition::function(*PossibleCallee), DepClassTy::REQUIRED);
1384 if (!CalleeInfo || !CalleeInfo->isValidState()) {
1386 return false;
1387 }
1388
1389 Maximum.takeAssumedMaximum(CalleeInfo->getAssumed());
1390 }
1391
1392 return true;
1393 };
1394
1395 bool UsedAssumedInformation = false;
1396 if (!A.checkForAllCallLikeInstructions(CheckForMinAGPRAllocs, *this,
1397 UsedAssumedInformation))
1398 return indicatePessimisticFixpoint();
1399
1400 return clampStateAndIndicateChange(getState(), Maximum);
1401 }
1402
1403 ChangeStatus manifest(Attributor &A) override {
1404 LLVMContext &Ctx = getAssociatedFunction()->getContext();
1405 SmallString<4> Buffer;
1406 raw_svector_ostream OS(Buffer);
1407 OS << getAssumed();
1408
1409 return A.manifestAttrs(
1410 getIRPosition(), {Attribute::get(Ctx, "amdgpu-agpr-alloc", OS.str())});
1411 }
1412
1413 StringRef getName() const override { return "AAAMDGPUMinAGPRAlloc"; }
1414 const char *getIdAddr() const override { return &ID; }
1415
1416 /// This function should return true if the type of the \p AA is
1417 /// AAAMDGPUMinAGPRAllocs
1418 static bool classof(const AbstractAttribute *AA) {
1419 return (AA->getIdAddr() == &ID);
1420 }
1421
1422 static const char ID;
1423};
1424
1425const char AAAMDGPUMinAGPRAlloc::ID = 0;
1426
1427/// An abstract attribute to propagate the function attribute
1428/// "amdgpu-cluster-dims" from kernel entry functions to device functions.
1429struct AAAMDGPUClusterDims
1430 : public StateWrapper<BooleanState, AbstractAttribute> {
1431 using Base = StateWrapper<BooleanState, AbstractAttribute>;
1432 AAAMDGPUClusterDims(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1433
1434 /// Create an abstract attribute view for the position \p IRP.
1435 static AAAMDGPUClusterDims &createForPosition(const IRPosition &IRP,
1436 Attributor &A);
1437
1438 /// See AbstractAttribute::getName().
1439 StringRef getName() const override { return "AAAMDGPUClusterDims"; }
1440
1441 /// See AbstractAttribute::getIdAddr().
1442 const char *getIdAddr() const override { return &ID; }
1443
1444 /// This function should return true if the type of the \p AA is
1445 /// AAAMDGPUClusterDims.
1446 static bool classof(const AbstractAttribute *AA) {
1447 return AA->getIdAddr() == &ID;
1448 }
1449
1450 virtual const AMDGPU::ClusterDimsAttr &getClusterDims() const = 0;
1451
1452 /// Unique ID (due to the unique address)
1453 static const char ID;
1454};
1455
1456const char AAAMDGPUClusterDims::ID = 0;
1457
1458struct AAAMDGPUClusterDimsFunction : public AAAMDGPUClusterDims {
1459 AAAMDGPUClusterDimsFunction(const IRPosition &IRP, Attributor &A)
1460 : AAAMDGPUClusterDims(IRP, A) {}
1461
1462 void initialize(Attributor &A) override {
1463 Function *F = getAssociatedFunction();
1464 assert(F && "empty associated function");
1465
1467
1468 // No matter what a kernel function has, it is final.
1469 if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
1470 if (Attr.isUnknown())
1471 indicatePessimisticFixpoint();
1472 else
1473 indicateOptimisticFixpoint();
1474 }
1475 }
1476
1477 const std::string getAsStr(Attributor *A) const override {
1478 if (!getAssumed() || Attr.isUnknown())
1479 return "unknown";
1480 if (Attr.isNoCluster())
1481 return "no";
1482 if (Attr.isVariableDims())
1483 return "variable";
1484 return Attr.to_string();
1485 }
1486
1487 void trackStatistics() const override {}
1488
1489 ChangeStatus updateImpl(Attributor &A) override {
1490 auto OldState = Attr;
1491
1492 auto CheckCallSite = [&](AbstractCallSite CS) {
1493 const auto *CallerAA = A.getAAFor<AAAMDGPUClusterDims>(
1494 *this, IRPosition::function(*CS.getInstruction()->getFunction()),
1495 DepClassTy::REQUIRED);
1496 if (!CallerAA || !CallerAA->isValidState())
1497 return false;
1498
1499 return merge(CallerAA->getClusterDims());
1500 };
1501
1502 bool UsedAssumedInformation = false;
1503 if (!A.checkForAllCallSites(CheckCallSite, *this,
1504 /*RequireAllCallSites=*/true,
1505 UsedAssumedInformation))
1506 return indicatePessimisticFixpoint();
1507
1508 return OldState == Attr ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED;
1509 }
1510
1511 ChangeStatus manifest(Attributor &A) override {
1512 if (Attr.isUnknown())
1513 return ChangeStatus::UNCHANGED;
1514 return A.manifestAttrs(
1515 getIRPosition(),
1516 {Attribute::get(getAssociatedFunction()->getContext(), AttrName,
1517 Attr.to_string())},
1518 /*ForceReplace=*/true);
1519 }
1520
1521 const AMDGPU::ClusterDimsAttr &getClusterDims() const override {
1522 return Attr;
1523 }
1524
1525private:
1526 bool merge(const AMDGPU::ClusterDimsAttr &Other) {
1527 // Case 1: Both of them are unknown yet, we do nothing and continue wait for
1528 // propagation.
1529 if (Attr.isUnknown() && Other.isUnknown())
1530 return true;
1531
1532 // Case 2: The other is determined, but we are unknown yet, we simply take
1533 // the other's value.
1534 if (Attr.isUnknown()) {
1535 Attr = Other;
1536 return true;
1537 }
1538
1539 // Case 3: We are determined but the other is unknown yet, we simply keep
1540 // everything unchanged.
1541 if (Other.isUnknown())
1542 return true;
1543
1544 // After this point, both are determined.
1545
1546 // Case 4: If they are same, we do nothing.
1547 if (Attr == Other)
1548 return true;
1549
1550 // Now they are not same.
1551
1552 // Case 5: If either of us uses cluster (but not both; otherwise case 4
1553 // would hold), then it is unknown whether cluster will be used, and the
1554 // state is final, unlike case 1.
1555 if (Attr.isNoCluster() || Other.isNoCluster()) {
1556 Attr.setUnknown();
1557 return false;
1558 }
1559
1560 // Case 6: Both of us use cluster, but the dims are different, so the result
1561 // is, cluster is used, but we just don't have a fixed dims.
1562 Attr.setVariableDims();
1563 return true;
1564 }
1565
1566 AMDGPU::ClusterDimsAttr Attr;
1567
1568 static constexpr char AttrName[] = "amdgpu-cluster-dims";
1569};
1570
1571AAAMDGPUClusterDims &
1572AAAMDGPUClusterDims::createForPosition(const IRPosition &IRP, Attributor &A) {
1574 return *new (A.Allocator) AAAMDGPUClusterDimsFunction(IRP, A);
1575 llvm_unreachable("AAAMDGPUClusterDims is only valid for function position");
1576}
1577
1578static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
1579 AMDGPUAttributorOptions Options,
1580 ThinOrFullLTOPhase LTOPhase) {
1581 SetVector<Function *> Functions;
1582 for (Function &F : M) {
1583 if (!F.isIntrinsic())
1584 Functions.insert(&F);
1585 }
1586
1587 CallGraphUpdater CGUpdater;
1589 AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, TM);
1590 DenseSet<const char *> Allowed(
1591 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
1592 &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
1593 &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID,
1594 &AAAMDGPUMinAGPRAlloc::ID, &AACallEdges::ID, &AAPointerInfo::ID,
1597 &AAAMDGPUClusterDims::ID, &AAAlign::ID});
1598
1599 AttributorConfig AC(CGUpdater);
1600 AC.IsClosedWorldModule = Options.IsClosedWorld;
1601 AC.Allowed = &Allowed;
1602 AC.IsModulePass = true;
1603 AC.DefaultInitializeLiveInternals = false;
1604 AC.IndirectCalleeSpecializationCallback =
1605 [](Attributor &A, const AbstractAttribute &AA, CallBase &CB,
1606 Function &Callee, unsigned NumAssumedCallees) {
1607 return !AMDGPU::isEntryFunctionCC(Callee.getCallingConv()) &&
1608 (NumAssumedCallees <= IndirectCallSpecializationThreshold);
1609 };
1610 AC.IPOAmendableCB = [](const Function &F) {
1611 return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
1612 };
1613
1614 Attributor A(Functions, InfoCache, AC);
1615
1616 LLVM_DEBUG({
1617 StringRef LTOPhaseStr = to_string(LTOPhase);
1618 dbgs() << "[AMDGPUAttributor] Running at phase " << LTOPhaseStr << '\n'
1619 << "[AMDGPUAttributor] Module " << M.getName() << " is "
1620 << (AC.IsClosedWorldModule ? "" : "not ")
1621 << "assumed to be a closed world.\n";
1622 });
1623
1624 for (auto *F : Functions) {
1625 A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(*F));
1626 A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(*F));
1627 A.getOrCreateAAFor<AAAMDMaxNumWorkgroups>(IRPosition::function(*F));
1628 CallingConv::ID CC = F->getCallingConv();
1629 if (!AMDGPU::isEntryFunctionCC(CC)) {
1630 A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(*F));
1631 A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(*F));
1632 }
1633
1634 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(*F);
1635 if (!F->isDeclaration() && ST.hasClusters())
1636 A.getOrCreateAAFor<AAAMDGPUClusterDims>(IRPosition::function(*F));
1637
1638 if (ST.hasGFX90AInsts())
1639 A.getOrCreateAAFor<AAAMDGPUMinAGPRAlloc>(IRPosition::function(*F));
1640
1641 for (auto &I : instructions(F)) {
1642 Value *Ptr = nullptr;
1643 if (auto *LI = dyn_cast<LoadInst>(&I))
1644 Ptr = LI->getPointerOperand();
1645 else if (auto *SI = dyn_cast<StoreInst>(&I))
1646 Ptr = SI->getPointerOperand();
1647 else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I))
1648 Ptr = RMW->getPointerOperand();
1649 else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I))
1650 Ptr = CmpX->getPointerOperand();
1651
1652 if (Ptr) {
1653 A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(*Ptr));
1654 A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(*Ptr));
1655 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Ptr)) {
1656 if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc)
1657 A.getOrCreateAAFor<AAAlign>(IRPosition::value(*Ptr));
1658 }
1659 }
1660 }
1661 }
1662
1663 return A.run() == ChangeStatus::CHANGED;
1664}
1665} // namespace
1666
1669
1672 AnalysisGetter AG(FAM);
1673
1674 // TODO: Probably preserves CFG
1675 return runImpl(M, AG, TM, Options, LTOPhase) ? PreservedAnalyses::none()
1677}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isDSAddress(const Constant *C)
static constexpr std::pair< ImplicitArgumentMask, StringLiteral > ImplicitAttrs[]
static cl::opt< unsigned > IndirectCallSpecializationThreshold("amdgpu-indirect-call-specialization-threshold", cl::desc("A threshold controls whether an indirect call will be specialized"), cl::init(3))
static ImplicitArgumentMask intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, bool HasApertureRegs, bool SupportsGetDoorBellID, unsigned CodeObjectVersion)
static bool hasSanitizerAttributes(const Function &F)
Returns true if sanitizer attributes are present on a function.
ImplicitArgumentMask
@ UNKNOWN_INTRINSIC
@ NOT_IMPLICIT_INPUT
@ ALL_ARGUMENT_MASK
ImplicitArgumentPositions
@ LAST_ARG_POS
static bool castRequiresQueuePtr(unsigned SrcAS)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
DXIL Resource Access
@ Default
static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)
AMD GCN specific subclass of TargetSubtarget.
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
static StringRef getName(Value *V)
Basic Register Allocator
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ClusterDimsAttr get(const Function &F)
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Value * getArgOperand(unsigned i) const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
const APInt & getLower() const
Return the lower value for this range.
const APInt & getUpper() const
Return the upper value for this range.
This is an important base class in LLVM.
Definition Constant.h:43
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
unsigned getAddressSpace() const
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition Module.h:278
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void push_back(const T &Elt)
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:225
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
LLVM_ABI bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
Definition User.cpp:119
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getAMDHSACodeObjectVersion(const Module &M)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
std::tuple< char, unsigned, unsigned > parseAsmPhysRegName(StringRef RegName)
Returns a valid charcode or 0 in the first entry if this is a valid physical register name.
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
std::tuple< char, unsigned, unsigned > parseAsmConstraintPhysReg(StringRef Constraint)
Returns a valid charcode or 0 in the first entry if this is a valid physical register constraint.
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
E & operator^=(E &LHS, E RHS)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
initializer< Ty > init(const Ty &Val)
NodeAddr< CodeNode * > Code
Definition RDFGraph.h:388
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition Pass.h:77
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
const char * to_string(ThinOrFullLTOPhase Phase)
Definition Pass.cpp:301
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ Other
Any other memory.
Definition ModRef.h:68
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R)
Helper function to clamp a state S of type StateType with the information in R and indicate/return if...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
ChangeStatus
{
Definition Attributor.h:496
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
BumpPtrAllocatorImpl<> BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition Allocator.h:383
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual const SetVector< Function * > & getOptimisticEdges() const =0
Get the optimistic edges.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual bool hasNonAsmUnknownCallee() const =0
Is there any call with a unknown callee, excluding any inline asm.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
Instruction * getRemoteInst() const
Return the actual instruction that causes the access.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual const char * getIdAddr() const =0
This function should return the address of the ID of the AbstractAttribute.
Wrapper for FunctionAnalysisManager.
The fixpoint analysis framework that orchestrates the attribute deduction.
DecIntegerState & takeAssumedMaximum(base_t Value)
Take maximum of assumed and Value.
Helper to describe and deal with positions in the LLVM-IR.
Definition Attributor.h:593
static const IRPosition callsite_returned(const CallBase &CB)
Create a position describing the returned value of CB.
Definition Attributor.h:661
static const IRPosition value(const Value &V, const CallBaseContext *CBContext=nullptr)
Create a position describing the value of V.
Definition Attributor.h:617
@ IRP_FUNCTION
An attribute for a function (scope).
Definition Attributor.h:605
static const IRPosition function(const Function &F, const CallBaseContext *CBContext=nullptr)
Create a position describing the function scope of F.
Definition Attributor.h:636
Kind getPositionKind() const
Return the associated position kind.
Definition Attributor.h:889
static const IRPosition callsite_function(const CallBase &CB)
Create a position describing the function scope of CB.
Definition Attributor.h:656
Data structure to hold cached (LLVM-IR) information.
bool isValidState() const override
See AbstractState::isValidState() NOTE: For now we simply pretend that the worst possible state is in...
ChangeStatus indicatePessimisticFixpoint() override
See AbstractState::indicatePessimisticFixpoint(...)
Helper to tie a abstract state implementation to an abstract attribute.