LLVM 23.0.0git
AMDGPUAttributor.cpp
Go to the documentation of this file.
1//===- AMDGPUAttributor.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPU.h"
14#include "AMDGPUTargetMachine.h"
15#include "GCNSubtarget.h"
17#include "llvm/IR/IntrinsicsAMDGPU.h"
18#include "llvm/IR/IntrinsicsR600.h"
21
22#define DEBUG_TYPE "amdgpu-attributor"
23
24using namespace llvm;
25
27 "amdgpu-indirect-call-specialization-threshold",
29 "A threshold controls whether an indirect call will be specialized"),
30 cl::init(3));
31
32#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
33
35#include "AMDGPUAttributes.def"
37};
38
39#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
40
43#include "AMDGPUAttributes.def"
46};
47
48#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
49static constexpr std::pair<ImplicitArgumentMask, StringLiteral>
51#include "AMDGPUAttributes.def"
52};
53
54// We do not need to note the x workitem or workgroup id because they are always
55// initialized.
56//
57// TODO: We should not add the attributes if the known compile time workgroup
58// size is 1 for y/z.
60intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
61 bool HasApertureRegs, bool SupportsGetDoorBellID,
62 unsigned CodeObjectVersion) {
63 switch (ID) {
64 case Intrinsic::amdgcn_workitem_id_x:
65 NonKernelOnly = true;
66 return WORKITEM_ID_X;
67 case Intrinsic::amdgcn_workgroup_id_x:
68 NonKernelOnly = true;
69 return WORKGROUP_ID_X;
70 case Intrinsic::amdgcn_workitem_id_y:
71 case Intrinsic::r600_read_tidig_y:
72 return WORKITEM_ID_Y;
73 case Intrinsic::amdgcn_workitem_id_z:
74 case Intrinsic::r600_read_tidig_z:
75 return WORKITEM_ID_Z;
76 case Intrinsic::amdgcn_workgroup_id_y:
77 case Intrinsic::r600_read_tgid_y:
78 return WORKGROUP_ID_Y;
79 case Intrinsic::amdgcn_workgroup_id_z:
80 case Intrinsic::r600_read_tgid_z:
81 return WORKGROUP_ID_Z;
82 case Intrinsic::amdgcn_cluster_id_x:
83 NonKernelOnly = true;
84 return CLUSTER_ID_X;
85 case Intrinsic::amdgcn_cluster_id_y:
86 return CLUSTER_ID_Y;
87 case Intrinsic::amdgcn_cluster_id_z:
88 return CLUSTER_ID_Z;
89 case Intrinsic::amdgcn_lds_kernel_id:
90 return LDS_KERNEL_ID;
91 case Intrinsic::amdgcn_dispatch_ptr:
92 return DISPATCH_PTR;
93 case Intrinsic::amdgcn_dispatch_id:
94 return DISPATCH_ID;
95 case Intrinsic::amdgcn_implicitarg_ptr:
96 return IMPLICIT_ARG_PTR;
97 // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access
98 // queue_ptr.
99 case Intrinsic::amdgcn_queue_ptr:
100 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
101 return QUEUE_PTR;
102 case Intrinsic::amdgcn_is_shared:
103 case Intrinsic::amdgcn_is_private:
104 if (HasApertureRegs)
105 return NOT_IMPLICIT_INPUT;
106 // Under V5, we need implicitarg_ptr + offsets to access private_base or
107 // shared_base. For pre-V5, however, need to access them through queue_ptr +
108 // offsets.
109 return CodeObjectVersion >= AMDGPU::AMDHSA_COV5 ? IMPLICIT_ARG_PTR
110 : QUEUE_PTR;
111 case Intrinsic::amdgcn_wwm:
112 case Intrinsic::amdgcn_strict_wwm:
113 return WHOLE_WAVE_MODE;
114 case Intrinsic::trap:
115 case Intrinsic::debugtrap:
116 case Intrinsic::ubsantrap:
117 if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.
118 return CodeObjectVersion >= AMDGPU::AMDHSA_COV4 ? NOT_IMPLICIT_INPUT
119 : QUEUE_PTR;
120 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
121 return QUEUE_PTR;
122 default:
123 return UNKNOWN_INTRINSIC;
124 }
125}
126
127static bool castRequiresQueuePtr(unsigned SrcAS) {
128 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
129}
130
131static bool isDSAddress(const Constant *C) {
133 if (!GV)
134 return false;
135 unsigned AS = GV->getAddressSpace();
137}
138
139/// Returns true if sanitizer attributes are present on a function.
140static bool hasSanitizerAttributes(const Function &F) {
141 return F.hasFnAttribute(Attribute::SanitizeAddress) ||
142 F.hasFnAttribute(Attribute::SanitizeThread) ||
143 F.hasFnAttribute(Attribute::SanitizeMemory) ||
144 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
145 F.hasFnAttribute(Attribute::SanitizeMemTag);
146}
147
148namespace {
149class AMDGPUInformationCache : public InformationCache {
150public:
151 AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
153 SetVector<Function *> *CGSCC, TargetMachine &TM)
154 : InformationCache(M, AG, Allocator, CGSCC), TM(TM),
155 CodeObjectVersion(AMDGPU::getAMDHSACodeObjectVersion(M)) {}
156
157 TargetMachine &TM;
158
159 enum ConstantStatus : uint8_t {
160 NONE = 0,
161 DS_GLOBAL = 1 << 0,
162 ADDR_SPACE_CAST_PRIVATE_TO_FLAT = 1 << 1,
163 ADDR_SPACE_CAST_LOCAL_TO_FLAT = 1 << 2,
164 ADDR_SPACE_CAST_BOTH_TO_FLAT =
165 ADDR_SPACE_CAST_PRIVATE_TO_FLAT | ADDR_SPACE_CAST_LOCAL_TO_FLAT
166 };
167
168 /// Check if the subtarget has aperture regs.
169 bool hasApertureRegs(Function &F) {
170 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
171 return ST.hasApertureRegs();
172 }
173
174 /// Check if the subtarget supports GetDoorbellID.
175 bool supportsGetDoorbellID(Function &F) {
176 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
177 return ST.supportsGetDoorbellID();
178 }
179
180 std::optional<std::pair<unsigned, unsigned>>
181 getFlatWorkGroupSizeAttr(const Function &F) const {
182 auto R = AMDGPU::getIntegerPairAttribute(F, "amdgpu-flat-work-group-size");
183 if (!R)
184 return std::nullopt;
185 return std::make_pair(R->first, *(R->second));
186 }
187
188 std::pair<unsigned, unsigned>
189 getDefaultFlatWorkGroupSize(const Function &F) const {
190 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
191 return ST.getDefaultFlatWorkGroupSize(F.getCallingConv());
192 }
193
194 std::pair<unsigned, unsigned>
195 getMaximumFlatWorkGroupRange(const Function &F) {
196 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
197 return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
198 }
199
200 SmallVector<unsigned> getMaxNumWorkGroups(const Function &F) {
201 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
202 return ST.getMaxNumWorkGroups(F);
203 }
204
205 /// Get code object version.
206 unsigned getCodeObjectVersion() const { return CodeObjectVersion; }
207
208 std::optional<std::pair<unsigned, unsigned>>
209 getWavesPerEUAttr(const Function &F) {
210 auto Val = AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu",
211 /*OnlyFirstRequired=*/true);
212 if (!Val)
213 return std::nullopt;
214 if (!Val->second) {
215 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
216 Val->second = ST.getMaxWavesPerEU();
217 }
218 return std::make_pair(Val->first, *(Val->second));
219 }
220
221 unsigned getMaxWavesPerEU(const Function &F) {
222 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
223 return ST.getMaxWavesPerEU();
224 }
225
226 unsigned getMaxAddrSpace() const override {
228 }
229
230private:
231 /// Check if the ConstantExpr \p CE uses an addrspacecast from private or
232 /// local to flat. These casts may require the queue pointer.
233 static uint8_t visitConstExpr(const ConstantExpr *CE) {
234 uint8_t Status = NONE;
235
236 if (CE->getOpcode() == Instruction::AddrSpaceCast) {
237 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
238 if (SrcAS == AMDGPUAS::PRIVATE_ADDRESS)
239 Status |= ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
240 else if (SrcAS == AMDGPUAS::LOCAL_ADDRESS)
241 Status |= ADDR_SPACE_CAST_LOCAL_TO_FLAT;
242 }
243
244 return Status;
245 }
246
247 /// Get the constant access bitmap for \p C.
248 uint8_t getConstantAccess(const Constant *C,
249 SmallPtrSetImpl<const Constant *> &Visited) {
250 auto It = ConstantStatus.find(C);
251 if (It != ConstantStatus.end())
252 return It->second;
253
254 uint8_t Result = 0;
255 if (isDSAddress(C))
256 Result = DS_GLOBAL;
257
258 if (const auto *CE = dyn_cast<ConstantExpr>(C))
259 Result |= visitConstExpr(CE);
260
261 for (const Use &U : C->operands()) {
262 const auto *OpC = dyn_cast<Constant>(U);
263 if (!OpC || !Visited.insert(OpC).second)
264 continue;
265
266 Result |= getConstantAccess(OpC, Visited);
267 }
268 return Result;
269 }
270
271public:
272 /// Returns true if \p Fn needs the queue pointer because of \p C.
273 bool needsQueuePtr(const Constant *C, Function &Fn) {
274 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
275 bool HasAperture = hasApertureRegs(Fn);
276
277 // No need to explore the constants.
278 if (!IsNonEntryFunc && HasAperture)
279 return false;
280
281 SmallPtrSet<const Constant *, 8> Visited;
282 uint8_t Access = getConstantAccess(C, Visited);
283
284 // We need to trap on DS globals in non-entry functions.
285 if (IsNonEntryFunc && (Access & DS_GLOBAL))
286 return true;
287
288 return !HasAperture && (Access & ADDR_SPACE_CAST_BOTH_TO_FLAT);
289 }
290
291 bool checkConstForAddrSpaceCastFromPrivate(const Constant *C) {
292 SmallPtrSet<const Constant *, 8> Visited;
293 uint8_t Access = getConstantAccess(C, Visited);
294 return Access & ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
295 }
296
297private:
298 /// Used to determine if the Constant needs the queue pointer.
299 DenseMap<const Constant *, uint8_t> ConstantStatus;
300 const unsigned CodeObjectVersion;
301};
302
303struct AAAMDAttributes
304 : public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
305 AbstractAttribute> {
306 using Base = StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
307 AbstractAttribute>;
308
309 AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
310
311 /// Create an abstract attribute view for the position \p IRP.
312 static AAAMDAttributes &createForPosition(const IRPosition &IRP,
313 Attributor &A);
314
315 /// See AbstractAttribute::getName().
316 StringRef getName() const override { return "AAAMDAttributes"; }
317
318 /// See AbstractAttribute::getIdAddr().
319 const char *getIdAddr() const override { return &ID; }
320
321 /// This function should return true if the type of the \p AA is
322 /// AAAMDAttributes.
323 static bool classof(const AbstractAttribute *AA) {
324 return (AA->getIdAddr() == &ID);
325 }
326
327 /// Unique ID (due to the unique address)
328 static const char ID;
329};
330const char AAAMDAttributes::ID = 0;
331
332struct AAUniformWorkGroupSize
333 : public StateWrapper<BooleanState, AbstractAttribute> {
334 using Base = StateWrapper<BooleanState, AbstractAttribute>;
335 AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
336
337 /// Create an abstract attribute view for the position \p IRP.
338 static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
339 Attributor &A);
340
341 /// See AbstractAttribute::getName().
342 StringRef getName() const override { return "AAUniformWorkGroupSize"; }
343
344 /// See AbstractAttribute::getIdAddr().
345 const char *getIdAddr() const override { return &ID; }
346
347 /// This function should return true if the type of the \p AA is
348 /// AAAMDAttributes.
349 static bool classof(const AbstractAttribute *AA) {
350 return (AA->getIdAddr() == &ID);
351 }
352
353 /// Unique ID (due to the unique address)
354 static const char ID;
355};
356const char AAUniformWorkGroupSize::ID = 0;
357
358struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
359 AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
360 : AAUniformWorkGroupSize(IRP, A) {}
361
362 void initialize(Attributor &A) override {
363 Function *F = getAssociatedFunction();
364 CallingConv::ID CC = F->getCallingConv();
365
366 if (CC != CallingConv::AMDGPU_KERNEL)
367 return;
368
369 bool InitialValue = F->hasFnAttribute("uniform-work-group-size");
370
371 if (InitialValue)
372 indicateOptimisticFixpoint();
373 else
374 indicatePessimisticFixpoint();
375 }
376
377 ChangeStatus updateImpl(Attributor &A) override {
378 ChangeStatus Change = ChangeStatus::UNCHANGED;
379
380 auto CheckCallSite = [&](AbstractCallSite CS) {
381 Function *Caller = CS.getInstruction()->getFunction();
382 LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
383 << "->" << getAssociatedFunction()->getName() << "\n");
384
385 const auto *CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
386 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
387 if (!CallerInfo || !CallerInfo->isValidState())
388 return false;
389
390 Change = Change | clampStateAndIndicateChange(this->getState(),
391 CallerInfo->getState());
392
393 return true;
394 };
395
396 bool AllCallSitesKnown = true;
397 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
398 return indicatePessimisticFixpoint();
399
400 return Change;
401 }
402
403 ChangeStatus manifest(Attributor &A) override {
404 if (!getAssumed())
405 return ChangeStatus::UNCHANGED;
406
407 LLVMContext &Ctx = getAssociatedFunction()->getContext();
408 return A.manifestAttrs(getIRPosition(),
409 {Attribute::get(Ctx, "uniform-work-group-size")},
410 /*ForceReplace=*/true);
411 }
412
413 bool isValidState() const override {
414 // This state is always valid, even when the state is false.
415 return true;
416 }
417
418 const std::string getAsStr(Attributor *) const override {
419 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
420 }
421
422 /// See AbstractAttribute::trackStatistics()
423 void trackStatistics() const override {}
424};
425
426AAUniformWorkGroupSize &
427AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
428 Attributor &A) {
430 return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
432 "AAUniformWorkGroupSize is only valid for function position");
433}
434
435struct AAAMDAttributesFunction : public AAAMDAttributes {
436 AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
437 : AAAMDAttributes(IRP, A) {}
438
439 void initialize(Attributor &A) override {
440 Function *F = getAssociatedFunction();
441
442 // If the function requires the implicit arg pointer due to sanitizers,
443 // assume it's needed even if explicitly marked as not requiring it.
444 // Flat scratch initialization is needed because `asan_malloc_impl`
445 // calls introduced later in pipeline will have flat scratch accesses.
446 // FIXME: FLAT_SCRATCH_INIT will not be required here if device-libs
447 // implementation for `asan_malloc_impl` is updated.
448 const bool HasSanitizerAttrs = hasSanitizerAttributes(*F);
449 if (HasSanitizerAttrs) {
450 removeAssumedBits(IMPLICIT_ARG_PTR);
451 removeAssumedBits(HOSTCALL_PTR);
452 removeAssumedBits(FLAT_SCRATCH_INIT);
453 }
454
455 for (auto Attr : ImplicitAttrs) {
456 if (HasSanitizerAttrs &&
457 (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR ||
458 Attr.first == FLAT_SCRATCH_INIT))
459 continue;
460
461 if (F->hasFnAttribute(Attr.second))
462 addKnownBits(Attr.first);
463 }
464
465 if (F->isDeclaration())
466 return;
467
468 // Ignore functions with graphics calling conventions, these are currently
469 // not allowed to have kernel arguments.
470 if (AMDGPU::isGraphics(F->getCallingConv())) {
471 indicatePessimisticFixpoint();
472 return;
473 }
474 }
475
476 ChangeStatus updateImpl(Attributor &A) override {
477 Function *F = getAssociatedFunction();
478 // The current assumed state used to determine a change.
479 auto OrigAssumed = getAssumed();
480
481 // Check for Intrinsics and propagate attributes.
482 const AACallEdges *AAEdges = A.getAAFor<AACallEdges>(
483 *this, this->getIRPosition(), DepClassTy::REQUIRED);
484 if (!AAEdges || !AAEdges->isValidState() ||
485 AAEdges->hasNonAsmUnknownCallee())
486 return indicatePessimisticFixpoint();
487
488 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
489
490 bool NeedsImplicit = false;
491 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
492 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
493 bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F);
494 unsigned COV = InfoCache.getCodeObjectVersion();
495
496 for (Function *Callee : AAEdges->getOptimisticEdges()) {
497 Intrinsic::ID IID = Callee->getIntrinsicID();
498 if (IID == Intrinsic::not_intrinsic) {
499 const AAAMDAttributes *AAAMD = A.getAAFor<AAAMDAttributes>(
500 *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
501 if (!AAAMD || !AAAMD->isValidState())
502 return indicatePessimisticFixpoint();
503 *this &= *AAAMD;
504 continue;
505 }
506
507 bool NonKernelOnly = false;
508 ImplicitArgumentMask AttrMask =
509 intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
510 HasApertureRegs, SupportsGetDoorbellID, COV);
511
512 if (AttrMask == UNKNOWN_INTRINSIC) {
513 // Assume not-nocallback intrinsics may invoke a function which accesses
514 // implicit arguments.
515 //
516 // FIXME: This isn't really the correct check. We want to ensure it
517 // isn't calling any function that may use implicit arguments regardless
518 // of whether it's internal to the module or not.
519 //
520 // TODO: Ignoring callsite attributes.
521 if (!Callee->hasFnAttribute(Attribute::NoCallback))
522 return indicatePessimisticFixpoint();
523 continue;
524 }
525
526 if (AttrMask != NOT_IMPLICIT_INPUT) {
527 if ((IsNonEntryFunc || !NonKernelOnly))
528 removeAssumedBits(AttrMask);
529 }
530 }
531
532 // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base.
533 if (NeedsImplicit)
534 removeAssumedBits(IMPLICIT_ARG_PTR);
535
536 if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {
537 // Under V5, we need implicitarg_ptr + offsets to access private_base or
538 // shared_base. We do not actually need queue_ptr.
539 if (COV >= 5)
540 removeAssumedBits(IMPLICIT_ARG_PTR);
541 else
542 removeAssumedBits(QUEUE_PTR);
543 }
544
545 if (funcRetrievesMultigridSyncArg(A, COV)) {
546 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
547 "multigrid_sync_arg needs implicitarg_ptr");
548 removeAssumedBits(MULTIGRID_SYNC_ARG);
549 }
550
551 if (funcRetrievesHostcallPtr(A, COV)) {
552 assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");
553 removeAssumedBits(HOSTCALL_PTR);
554 }
555
556 if (funcRetrievesHeapPtr(A, COV)) {
557 assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");
558 removeAssumedBits(HEAP_PTR);
559 }
560
561 if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A, COV)) {
562 assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");
563 removeAssumedBits(QUEUE_PTR);
564 }
565
566 if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) {
567 removeAssumedBits(LDS_KERNEL_ID);
568 }
569
570 if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A, COV))
571 removeAssumedBits(DEFAULT_QUEUE);
572
573 if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV))
574 removeAssumedBits(COMPLETION_ACTION);
575
576 if (isAssumed(FLAT_SCRATCH_INIT) && needFlatScratchInit(A))
577 removeAssumedBits(FLAT_SCRATCH_INIT);
578
579 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
580 : ChangeStatus::UNCHANGED;
581 }
582
583 ChangeStatus manifest(Attributor &A) override {
585 LLVMContext &Ctx = getAssociatedFunction()->getContext();
586
587 for (auto Attr : ImplicitAttrs) {
588 if (isKnown(Attr.first))
589 AttrList.push_back(Attribute::get(Ctx, Attr.second));
590 }
591
592 return A.manifestAttrs(getIRPosition(), AttrList,
593 /* ForceReplace */ true);
594 }
595
596 const std::string getAsStr(Attributor *) const override {
597 std::string Str;
598 raw_string_ostream OS(Str);
599 OS << "AMDInfo[";
600 for (auto Attr : ImplicitAttrs)
601 if (isAssumed(Attr.first))
602 OS << ' ' << Attr.second;
603 OS << " ]";
604 return OS.str();
605 }
606
607 /// See AbstractAttribute::trackStatistics()
608 void trackStatistics() const override {}
609
610private:
611 bool checkForQueuePtr(Attributor &A) {
612 Function *F = getAssociatedFunction();
613 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
614
615 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
616
617 bool NeedsQueuePtr = false;
618
619 auto CheckAddrSpaceCasts = [&](Instruction &I) {
620 unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
621 if (castRequiresQueuePtr(SrcAS)) {
622 NeedsQueuePtr = true;
623 return false;
624 }
625 return true;
626 };
627
628 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
629
630 // `checkForAllInstructions` is much more cheaper than going through all
631 // instructions, try it first.
632
633 // The queue pointer is not needed if aperture regs is present.
634 if (!HasApertureRegs) {
635 bool UsedAssumedInformation = false;
636 A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
637 {Instruction::AddrSpaceCast},
638 UsedAssumedInformation);
639 }
640
641 // If we found that we need the queue pointer, nothing else to do.
642 if (NeedsQueuePtr)
643 return true;
644
645 if (!IsNonEntryFunc && HasApertureRegs)
646 return false;
647
648 for (BasicBlock &BB : *F) {
649 for (Instruction &I : BB) {
650 for (const Use &U : I.operands()) {
651 if (const auto *C = dyn_cast<Constant>(U)) {
652 if (InfoCache.needsQueuePtr(C, *F))
653 return true;
654 }
655 }
656 }
657 }
658
659 return false;
660 }
661
662 bool funcRetrievesMultigridSyncArg(Attributor &A, unsigned COV) {
664 AA::RangeTy Range(Pos, 8);
665 return funcRetrievesImplicitKernelArg(A, Range);
666 }
667
668 bool funcRetrievesHostcallPtr(Attributor &A, unsigned COV) {
670 AA::RangeTy Range(Pos, 8);
671 return funcRetrievesImplicitKernelArg(A, Range);
672 }
673
674 bool funcRetrievesDefaultQueue(Attributor &A, unsigned COV) {
676 AA::RangeTy Range(Pos, 8);
677 return funcRetrievesImplicitKernelArg(A, Range);
678 }
679
680 bool funcRetrievesCompletionAction(Attributor &A, unsigned COV) {
682 AA::RangeTy Range(Pos, 8);
683 return funcRetrievesImplicitKernelArg(A, Range);
684 }
685
686 bool funcRetrievesHeapPtr(Attributor &A, unsigned COV) {
687 if (COV < 5)
688 return false;
690 return funcRetrievesImplicitKernelArg(A, Range);
691 }
692
693 bool funcRetrievesQueuePtr(Attributor &A, unsigned COV) {
694 if (COV < 5)
695 return false;
697 return funcRetrievesImplicitKernelArg(A, Range);
698 }
699
700 bool funcRetrievesImplicitKernelArg(Attributor &A, AA::RangeTy Range) {
701 // Check if this is a call to the implicitarg_ptr builtin and it
702 // is used to retrieve the hostcall pointer. The implicit arg for
703 // hostcall is not used only if every use of the implicitarg_ptr
704 // is a load that clearly does not retrieve any byte of the
705 // hostcall pointer. We check this by tracing all the uses of the
706 // initial call to the implicitarg_ptr intrinsic.
707 auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {
708 auto &Call = cast<CallBase>(I);
709 if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
710 return true;
711
712 const auto *PointerInfoAA = A.getAAFor<AAPointerInfo>(
713 *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED);
714 if (!PointerInfoAA || !PointerInfoAA->getState().isValidState())
715 return false;
716
717 return PointerInfoAA->forallInterferingAccesses(
718 Range, [](const AAPointerInfo::Access &Acc, bool IsExact) {
719 return Acc.getRemoteInst()->isDroppable();
720 });
721 };
722
723 bool UsedAssumedInformation = false;
724 return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,
725 UsedAssumedInformation);
726 }
727
728 bool funcRetrievesLDSKernelId(Attributor &A) {
729 auto DoesNotRetrieve = [&](Instruction &I) {
730 auto &Call = cast<CallBase>(I);
731 return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;
732 };
733 bool UsedAssumedInformation = false;
734 return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,
735 UsedAssumedInformation);
736 }
737
738 // Returns true if FlatScratchInit is needed, i.e., no-flat-scratch-init is
739 // not to be set.
740 bool needFlatScratchInit(Attributor &A) {
741 assert(isAssumed(FLAT_SCRATCH_INIT)); // only called if the bit is still set
742
743 // Check all AddrSpaceCast instructions. FlatScratchInit is needed if
744 // there is a cast from PRIVATE_ADDRESS.
745 auto AddrSpaceCastNotFromPrivate = [](Instruction &I) {
746 return cast<AddrSpaceCastInst>(I).getSrcAddressSpace() !=
748 };
749
750 bool UsedAssumedInformation = false;
751 if (!A.checkForAllInstructions(AddrSpaceCastNotFromPrivate, *this,
752 {Instruction::AddrSpaceCast},
753 UsedAssumedInformation))
754 return true;
755
756 // Check for addrSpaceCast from PRIVATE_ADDRESS in constant expressions
757 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
758
759 Function *F = getAssociatedFunction();
760 for (Instruction &I : instructions(F)) {
761 for (const Use &U : I.operands()) {
762 if (const auto *C = dyn_cast<Constant>(U)) {
763 if (InfoCache.checkConstForAddrSpaceCastFromPrivate(C))
764 return true;
765 }
766 }
767 }
768
769 // Finally check callees.
770
771 // This is called on each callee; false means callee shouldn't have
772 // no-flat-scratch-init.
773 auto CheckForNoFlatScratchInit = [&](Instruction &I) {
774 const auto &CB = cast<CallBase>(I);
775 const Function *Callee = CB.getCalledFunction();
776
777 // Callee == 0 for inline asm or indirect call with known callees.
778 // In the latter case, updateImpl() already checked the callees and we
779 // know their FLAT_SCRATCH_INIT bit is set.
780 // If function has indirect call with unknown callees, the bit is
781 // already removed in updateImpl() and execution won't reach here.
782 if (!Callee)
783 return true;
784
785 return Callee->getIntrinsicID() !=
786 Intrinsic::amdgcn_addrspacecast_nonnull;
787 };
788
789 UsedAssumedInformation = false;
790 // If any callee is false (i.e. need FlatScratchInit),
791 // checkForAllCallLikeInstructions returns false, in which case this
792 // function returns true.
793 return !A.checkForAllCallLikeInstructions(CheckForNoFlatScratchInit, *this,
794 UsedAssumedInformation);
795 }
796};
797
798AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
799 Attributor &A) {
801 return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
802 llvm_unreachable("AAAMDAttributes is only valid for function position");
803}
804
805/// Base class to derive different size ranges.
806struct AAAMDSizeRangeAttribute
807 : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
808 using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
809
810 StringRef AttrName;
811
812 AAAMDSizeRangeAttribute(const IRPosition &IRP, Attributor &A,
813 StringRef AttrName)
814 : Base(IRP, 32), AttrName(AttrName) {}
815
816 /// See AbstractAttribute::trackStatistics()
817 void trackStatistics() const override {}
818
819 template <class AttributeImpl> ChangeStatus updateImplImpl(Attributor &A) {
820 ChangeStatus Change = ChangeStatus::UNCHANGED;
821
822 auto CheckCallSite = [&](AbstractCallSite CS) {
823 Function *Caller = CS.getInstruction()->getFunction();
824 LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
825 << "->" << getAssociatedFunction()->getName() << '\n');
826
827 const auto *CallerInfo = A.getAAFor<AttributeImpl>(
828 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
829 if (!CallerInfo || !CallerInfo->isValidState())
830 return false;
831
832 Change |=
833 clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
834
835 return true;
836 };
837
838 bool AllCallSitesKnown = true;
839 if (!A.checkForAllCallSites(CheckCallSite, *this,
840 /*RequireAllCallSites=*/true,
841 AllCallSitesKnown))
842 return indicatePessimisticFixpoint();
843
844 return Change;
845 }
846
847 /// Clamp the assumed range to the default value ([Min, Max]) and emit the
848 /// attribute if it is not same as default.
850 emitAttributeIfNotDefaultAfterClamp(Attributor &A,
851 std::pair<unsigned, unsigned> Default) {
852 auto [Min, Max] = Default;
853 unsigned Lower = getAssumed().getLower().getZExtValue();
854 unsigned Upper = getAssumed().getUpper().getZExtValue();
855
856 // Clamp the range to the default value.
857 if (Lower < Min)
858 Lower = Min;
859 if (Upper > Max + 1)
860 Upper = Max + 1;
861
862 // No manifest if the value is invalid or same as default after clamp.
863 if ((Lower == Min && Upper == Max + 1) || (Upper < Lower))
864 return ChangeStatus::UNCHANGED;
865
866 Function *F = getAssociatedFunction();
867 LLVMContext &Ctx = F->getContext();
868 SmallString<10> Buffer;
869 raw_svector_ostream OS(Buffer);
870 OS << Lower << ',' << Upper - 1;
871 return A.manifestAttrs(getIRPosition(),
872 {Attribute::get(Ctx, AttrName, OS.str())},
873 /*ForceReplace=*/true);
874 }
875
876 const std::string getAsStr(Attributor *) const override {
877 std::string Str;
878 raw_string_ostream OS(Str);
879 OS << getName() << '[';
880 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
881 OS << ']';
882 return OS.str();
883 }
884};
885
886/// Propagate amdgpu-flat-work-group-size attribute.
887struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
888 AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
889 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-flat-work-group-size") {}
890
891 void initialize(Attributor &A) override {
892 Function *F = getAssociatedFunction();
893 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
894
895 bool HasAttr = false;
896 auto Range = InfoCache.getDefaultFlatWorkGroupSize(*F);
897 auto MaxRange = InfoCache.getMaximumFlatWorkGroupRange(*F);
898
899 if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr(*F)) {
900 // We only consider an attribute that is not max range because the front
901 // end always emits the attribute, unfortunately, and sometimes it emits
902 // the max range.
903 if (*Attr != MaxRange) {
904 Range = *Attr;
905 HasAttr = true;
906 }
907 }
908
909 // We don't want to directly clamp the state if it's the max range because
910 // that is basically the worst state.
911 if (Range == MaxRange)
912 return;
913
914 auto [Min, Max] = Range;
915 ConstantRange CR(APInt(32, Min), APInt(32, Max + 1));
916 IntegerRangeState IRS(CR);
917 clampStateAndIndicateChange(this->getState(), IRS);
918
919 if (HasAttr || AMDGPU::isEntryFunctionCC(F->getCallingConv()))
920 indicateOptimisticFixpoint();
921 }
922
923 ChangeStatus updateImpl(Attributor &A) override {
924 return updateImplImpl<AAAMDFlatWorkGroupSize>(A);
925 }
926
927 /// Create an abstract attribute view for the position \p IRP.
928 static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
929 Attributor &A);
930
931 ChangeStatus manifest(Attributor &A) override {
932 Function *F = getAssociatedFunction();
933 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
934 return emitAttributeIfNotDefaultAfterClamp(
935 A, InfoCache.getMaximumFlatWorkGroupRange(*F));
936 }
937
938 /// See AbstractAttribute::getName()
939 StringRef getName() const override { return "AAAMDFlatWorkGroupSize"; }
940
941 /// See AbstractAttribute::getIdAddr()
942 const char *getIdAddr() const override { return &ID; }
943
944 /// This function should return true if the type of the \p AA is
945 /// AAAMDFlatWorkGroupSize
946 static bool classof(const AbstractAttribute *AA) {
947 return (AA->getIdAddr() == &ID);
948 }
949
950 /// Unique ID (due to the unique address)
951 static const char ID;
952};
953
954const char AAAMDFlatWorkGroupSize::ID = 0;
955
956AAAMDFlatWorkGroupSize &
957AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
958 Attributor &A) {
960 return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
962 "AAAMDFlatWorkGroupSize is only valid for function position");
963}
964
965struct TupleDecIntegerRangeState : public AbstractState {
966 DecIntegerState<uint32_t> X, Y, Z;
967
968 bool isValidState() const override {
969 return X.isValidState() && Y.isValidState() && Z.isValidState();
970 }
971
972 bool isAtFixpoint() const override {
973 return X.isAtFixpoint() && Y.isAtFixpoint() && Z.isAtFixpoint();
974 }
975
976 ChangeStatus indicateOptimisticFixpoint() override {
977 return X.indicateOptimisticFixpoint() | Y.indicateOptimisticFixpoint() |
978 Z.indicateOptimisticFixpoint();
979 }
980
981 ChangeStatus indicatePessimisticFixpoint() override {
982 return X.indicatePessimisticFixpoint() | Y.indicatePessimisticFixpoint() |
983 Z.indicatePessimisticFixpoint();
984 }
985
986 TupleDecIntegerRangeState operator^=(const TupleDecIntegerRangeState &Other) {
987 X ^= Other.X;
988 Y ^= Other.Y;
989 Z ^= Other.Z;
990 return *this;
991 }
992
993 bool operator==(const TupleDecIntegerRangeState &Other) const {
994 return X == Other.X && Y == Other.Y && Z == Other.Z;
995 }
996
997 TupleDecIntegerRangeState &getAssumed() { return *this; }
998 const TupleDecIntegerRangeState &getAssumed() const { return *this; }
999};
1000
1001using AAAMDMaxNumWorkgroupsState =
1002 StateWrapper<TupleDecIntegerRangeState, AbstractAttribute, uint32_t>;
1003
1004/// Propagate amdgpu-max-num-workgroups attribute.
1005struct AAAMDMaxNumWorkgroups
1006 : public StateWrapper<TupleDecIntegerRangeState, AbstractAttribute> {
1007 using Base = StateWrapper<TupleDecIntegerRangeState, AbstractAttribute>;
1008
1009 AAAMDMaxNumWorkgroups(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1010
1011 void initialize(Attributor &A) override {
1012 Function *F = getAssociatedFunction();
1013 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1014
1015 SmallVector<unsigned> MaxNumWorkgroups = InfoCache.getMaxNumWorkGroups(*F);
1016
1017 X.takeKnownMinimum(MaxNumWorkgroups[0]);
1018 Y.takeKnownMinimum(MaxNumWorkgroups[1]);
1019 Z.takeKnownMinimum(MaxNumWorkgroups[2]);
1020
1021 if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
1022 indicatePessimisticFixpoint();
1023 }
1024
1025 ChangeStatus updateImpl(Attributor &A) override {
1026 ChangeStatus Change = ChangeStatus::UNCHANGED;
1027
1028 auto CheckCallSite = [&](AbstractCallSite CS) {
1029 Function *Caller = CS.getInstruction()->getFunction();
1030 LLVM_DEBUG(dbgs() << "[AAAMDMaxNumWorkgroups] Call " << Caller->getName()
1031 << "->" << getAssociatedFunction()->getName() << '\n');
1032
1033 const auto *CallerInfo = A.getAAFor<AAAMDMaxNumWorkgroups>(
1034 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
1035 if (!CallerInfo || !CallerInfo->isValidState())
1036 return false;
1037
1038 Change |=
1039 clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
1040 return true;
1041 };
1042
1043 bool AllCallSitesKnown = true;
1044 if (!A.checkForAllCallSites(CheckCallSite, *this,
1045 /*RequireAllCallSites=*/true,
1046 AllCallSitesKnown))
1047 return indicatePessimisticFixpoint();
1048
1049 return Change;
1050 }
1051
1052 /// Create an abstract attribute view for the position \p IRP.
1053 static AAAMDMaxNumWorkgroups &createForPosition(const IRPosition &IRP,
1054 Attributor &A);
1055
1056 ChangeStatus manifest(Attributor &A) override {
1057 Function *F = getAssociatedFunction();
1058 LLVMContext &Ctx = F->getContext();
1059 SmallString<32> Buffer;
1060 raw_svector_ostream OS(Buffer);
1061 OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed();
1062
1063 // TODO: Should annotate loads of the group size for this to do anything
1064 // useful.
1065 return A.manifestAttrs(
1066 getIRPosition(),
1067 {Attribute::get(Ctx, "amdgpu-max-num-workgroups", OS.str())},
1068 /* ForceReplace= */ true);
1069 }
1070
1071 StringRef getName() const override { return "AAAMDMaxNumWorkgroups"; }
1072
1073 const std::string getAsStr(Attributor *) const override {
1074 std::string Buffer = "AAAMDMaxNumWorkgroupsState[";
1075 raw_string_ostream OS(Buffer);
1076 OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed()
1077 << ']';
1078 return OS.str();
1079 }
1080
1081 const char *getIdAddr() const override { return &ID; }
1082
1083 /// This function should return true if the type of the \p AA is
1084 /// AAAMDMaxNumWorkgroups
1085 static bool classof(const AbstractAttribute *AA) {
1086 return (AA->getIdAddr() == &ID);
1087 }
1088
1089 void trackStatistics() const override {}
1090
1091 /// Unique ID (due to the unique address)
1092 static const char ID;
1093};
1094
1095const char AAAMDMaxNumWorkgroups::ID = 0;
1096
1097AAAMDMaxNumWorkgroups &
1098AAAMDMaxNumWorkgroups::createForPosition(const IRPosition &IRP, Attributor &A) {
1100 return *new (A.Allocator) AAAMDMaxNumWorkgroups(IRP, A);
1101 llvm_unreachable("AAAMDMaxNumWorkgroups is only valid for function position");
1102}
1103
1104/// Propagate amdgpu-waves-per-eu attribute.
1105struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
1106 AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)
1107 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {}
1108
1109 void initialize(Attributor &A) override {
1110 Function *F = getAssociatedFunction();
1111 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1112
1113 // If the attribute exists, we will honor it if it is not the default.
1114 if (auto Attr = InfoCache.getWavesPerEUAttr(*F)) {
1115 std::pair<unsigned, unsigned> MaxWavesPerEURange{
1116 1U, InfoCache.getMaxWavesPerEU(*F)};
1117 if (*Attr != MaxWavesPerEURange) {
1118 auto [Min, Max] = *Attr;
1119 ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
1120 IntegerRangeState RangeState(Range);
1121 this->getState() = RangeState;
1122 indicateOptimisticFixpoint();
1123 return;
1124 }
1125 }
1126
1127 if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
1128 indicatePessimisticFixpoint();
1129 }
1130
1131 ChangeStatus updateImpl(Attributor &A) override {
1132 ChangeStatus Change = ChangeStatus::UNCHANGED;
1133
1134 auto CheckCallSite = [&](AbstractCallSite CS) {
1135 Function *Caller = CS.getInstruction()->getFunction();
1136 Function *Func = getAssociatedFunction();
1137 LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
1138 << "->" << Func->getName() << '\n');
1139 (void)Func;
1140
1141 const auto *CallerAA = A.getAAFor<AAAMDWavesPerEU>(
1142 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
1143 if (!CallerAA || !CallerAA->isValidState())
1144 return false;
1145
1146 ConstantRange Assumed = getAssumed();
1147 unsigned Min = std::max(Assumed.getLower().getZExtValue(),
1148 CallerAA->getAssumed().getLower().getZExtValue());
1149 unsigned Max = std::max(Assumed.getUpper().getZExtValue(),
1150 CallerAA->getAssumed().getUpper().getZExtValue());
1151 ConstantRange Range(APInt(32, Min), APInt(32, Max));
1152 IntegerRangeState RangeState(Range);
1153 getState() = RangeState;
1154 Change |= getState() == Assumed ? ChangeStatus::UNCHANGED
1155 : ChangeStatus::CHANGED;
1156
1157 return true;
1158 };
1159
1160 bool AllCallSitesKnown = true;
1161 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
1162 return indicatePessimisticFixpoint();
1163
1164 return Change;
1165 }
1166
1167 /// Create an abstract attribute view for the position \p IRP.
1168 static AAAMDWavesPerEU &createForPosition(const IRPosition &IRP,
1169 Attributor &A);
1170
1171 ChangeStatus manifest(Attributor &A) override {
1172 Function *F = getAssociatedFunction();
1173 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1174 return emitAttributeIfNotDefaultAfterClamp(
1175 A, {1U, InfoCache.getMaxWavesPerEU(*F)});
1176 }
1177
1178 /// See AbstractAttribute::getName()
1179 StringRef getName() const override { return "AAAMDWavesPerEU"; }
1180
1181 /// See AbstractAttribute::getIdAddr()
1182 const char *getIdAddr() const override { return &ID; }
1183
1184 /// This function should return true if the type of the \p AA is
1185 /// AAAMDWavesPerEU
1186 static bool classof(const AbstractAttribute *AA) {
1187 return (AA->getIdAddr() == &ID);
1188 }
1189
1190 /// Unique ID (due to the unique address)
1191 static const char ID;
1192};
1193
1194const char AAAMDWavesPerEU::ID = 0;
1195
1196AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
1197 Attributor &A) {
1199 return *new (A.Allocator) AAAMDWavesPerEU(IRP, A);
1200 llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
1201}
1202
1203/// Compute the minimum number of AGPRs required to allocate the inline asm.
1204static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
1205 const CallBase &Call) {
1206 unsigned ArgNo = 0;
1207 unsigned ResNo = 0;
1208 unsigned AGPRDefCount = 0;
1209 unsigned AGPRUseCount = 0;
1210 unsigned MaxPhysReg = 0;
1211 const DataLayout &DL = Call.getFunction()->getParent()->getDataLayout();
1212
1213 // TODO: Overestimates due to not accounting for tied operands
1214 for (const InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
1215 Type *Ty = nullptr;
1216 switch (CI.Type) {
1217 case InlineAsm::isOutput: {
1218 Ty = Call.getType();
1219 if (auto *STy = dyn_cast<StructType>(Ty))
1220 Ty = STy->getElementType(ResNo);
1221 ++ResNo;
1222 break;
1223 }
1224 case InlineAsm::isInput: {
1225 Ty = Call.getArgOperand(ArgNo++)->getType();
1226 break;
1227 }
1228 case InlineAsm::isLabel:
1229 continue;
1231 // Parse the physical register reference.
1232 break;
1233 }
1234
1235 for (StringRef Code : CI.Codes) {
1236 unsigned RegCount = 0;
1237 if (Code.starts_with("a")) {
1238 // Virtual register, compute number of registers based on the type.
1239 //
1240 // We ought to be going through TargetLowering to get the number of
1241 // registers, but we should avoid the dependence on CodeGen here.
1242 RegCount = divideCeil(DL.getTypeSizeInBits(Ty), 32);
1243 } else {
1244 // Physical register reference
1245 auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg(Code);
1246 if (Kind == 'a') {
1247 RegCount = NumRegs;
1248 MaxPhysReg = std::max(MaxPhysReg, std::min(RegIdx + NumRegs, 256u));
1249 }
1250
1251 continue;
1252 }
1253
1254 if (CI.Type == InlineAsm::isOutput) {
1255 // Apply tuple alignment requirement
1256 //
1257 // TODO: This is more conservative than necessary.
1258 AGPRDefCount = alignTo(AGPRDefCount, RegCount);
1259
1260 AGPRDefCount += RegCount;
1261 if (CI.isEarlyClobber) {
1262 AGPRUseCount = alignTo(AGPRUseCount, RegCount);
1263 AGPRUseCount += RegCount;
1264 }
1265 } else {
1266 AGPRUseCount = alignTo(AGPRUseCount, RegCount);
1267 AGPRUseCount += RegCount;
1268 }
1269 }
1270 }
1271
1272 unsigned MaxVirtReg = std::max(AGPRUseCount, AGPRDefCount);
1273
1274 // TODO: This is overly conservative. If there are any physical registers,
1275 // allocate any virtual registers after them so we don't have to solve optimal
1276 // packing.
1277 return std::min(MaxVirtReg + MaxPhysReg, 256u);
1278}
1279
1280struct AAAMDGPUMinAGPRAlloc
1281 : public StateWrapper<DecIntegerState<>, AbstractAttribute> {
1282 using Base = StateWrapper<DecIntegerState<>, AbstractAttribute>;
1283 AAAMDGPUMinAGPRAlloc(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1284
1285 static AAAMDGPUMinAGPRAlloc &createForPosition(const IRPosition &IRP,
1286 Attributor &A) {
1288 return *new (A.Allocator) AAAMDGPUMinAGPRAlloc(IRP, A);
1290 "AAAMDGPUMinAGPRAlloc is only valid for function position");
1291 }
1292
1293 void initialize(Attributor &A) override {
1294 Function *F = getAssociatedFunction();
1295 auto [MinNumAGPR, MaxNumAGPR] =
1296 AMDGPU::getIntegerPairAttribute(*F, "amdgpu-agpr-alloc", {~0u, ~0u},
1297 /*OnlyFirstRequired=*/true);
1298 if (MinNumAGPR == 0)
1299 indicateOptimisticFixpoint();
1300 }
1301
1302 const std::string getAsStr(Attributor *A) const override {
1303 std::string Str = "amdgpu-agpr-alloc=";
1304 raw_string_ostream OS(Str);
1305 OS << getAssumed();
1306 return OS.str();
1307 }
1308
1309 void trackStatistics() const override {}
1310
1311 ChangeStatus updateImpl(Attributor &A) override {
1312 DecIntegerState<> Maximum;
1313
1314 // Check for cases which require allocation of AGPRs. The only cases where
1315 // AGPRs are required are if there are direct references to AGPRs, so inline
1316 // assembly and special intrinsics.
1317 auto CheckForMinAGPRAllocs = [&](Instruction &I) {
1318 const auto &CB = cast<CallBase>(I);
1319 const Value *CalleeOp = CB.getCalledOperand();
1320
1321 if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp)) {
1322 // Technically, the inline asm could be invoking a call to an unknown
1323 // external function that requires AGPRs, but ignore that.
1324 unsigned NumRegs = inlineAsmGetNumRequiredAGPRs(IA, CB);
1325 Maximum.takeAssumedMaximum(NumRegs);
1326 return true;
1327 }
1328 switch (CB.getIntrinsicID()) {
1330 break;
1331 case Intrinsic::write_register:
1332 case Intrinsic::read_register:
1333 case Intrinsic::read_volatile_register: {
1334 const MDString *RegName = cast<MDString>(
1336 cast<MetadataAsValue>(CB.getArgOperand(0))->getMetadata())
1337 ->getOperand(0));
1338 auto [Kind, RegIdx, NumRegs] =
1340 if (Kind == 'a')
1341 Maximum.takeAssumedMaximum(std::min(RegIdx + NumRegs, 256u));
1342
1343 return true;
1344 }
1345 // Trap-like intrinsics such as llvm.trap and llvm.debugtrap do not have
1346 // the nocallback attribute, so the AMDGPU attributor can conservatively
1347 // drop all implicitly-known inputs and AGPR allocation information. Make
1348 // sure we still infer that no implicit inputs are required and that the
1349 // AGPR allocation stays at zero. Trap-like intrinsics may invoke a
1350 // function which requires AGPRs, so we need to check if the called
1351 // function has the "trap-func-name" attribute.
1352 case Intrinsic::trap:
1353 case Intrinsic::debugtrap:
1354 case Intrinsic::ubsantrap:
1355 return CB.hasFnAttr(Attribute::NoCallback) ||
1356 !CB.hasFnAttr("trap-func-name");
1357 default:
1358 // Some intrinsics may use AGPRs, but if we have a choice, we are not
1359 // required to use AGPRs.
1360 // Assume !nocallback intrinsics may call a function which requires
1361 // AGPRs.
1362 return CB.hasFnAttr(Attribute::NoCallback);
1363 }
1364
1365 // TODO: Handle callsite attributes
1366 auto *CBEdges = A.getAAFor<AACallEdges>(
1367 *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
1368 if (!CBEdges || CBEdges->hasUnknownCallee()) {
1370 return false;
1371 }
1372
1373 for (const Function *PossibleCallee : CBEdges->getOptimisticEdges()) {
1374 const auto *CalleeInfo = A.getAAFor<AAAMDGPUMinAGPRAlloc>(
1375 *this, IRPosition::function(*PossibleCallee), DepClassTy::REQUIRED);
1376 if (!CalleeInfo || !CalleeInfo->isValidState()) {
1378 return false;
1379 }
1380
1381 Maximum.takeAssumedMaximum(CalleeInfo->getAssumed());
1382 }
1383
1384 return true;
1385 };
1386
1387 bool UsedAssumedInformation = false;
1388 if (!A.checkForAllCallLikeInstructions(CheckForMinAGPRAllocs, *this,
1389 UsedAssumedInformation))
1390 return indicatePessimisticFixpoint();
1391
1392 return clampStateAndIndicateChange(getState(), Maximum);
1393 }
1394
1395 ChangeStatus manifest(Attributor &A) override {
1396 LLVMContext &Ctx = getAssociatedFunction()->getContext();
1397 SmallString<4> Buffer;
1398 raw_svector_ostream OS(Buffer);
1399 OS << getAssumed();
1400
1401 return A.manifestAttrs(
1402 getIRPosition(), {Attribute::get(Ctx, "amdgpu-agpr-alloc", OS.str())});
1403 }
1404
1405 StringRef getName() const override { return "AAAMDGPUMinAGPRAlloc"; }
1406 const char *getIdAddr() const override { return &ID; }
1407
1408 /// This function should return true if the type of the \p AA is
1409 /// AAAMDGPUMinAGPRAllocs
1410 static bool classof(const AbstractAttribute *AA) {
1411 return (AA->getIdAddr() == &ID);
1412 }
1413
1414 static const char ID;
1415};
1416
1417const char AAAMDGPUMinAGPRAlloc::ID = 0;
1418
1419/// An abstract attribute to propagate the function attribute
1420/// "amdgpu-cluster-dims" from kernel entry functions to device functions.
1421struct AAAMDGPUClusterDims
1422 : public StateWrapper<BooleanState, AbstractAttribute> {
1423 using Base = StateWrapper<BooleanState, AbstractAttribute>;
1424 AAAMDGPUClusterDims(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1425
1426 /// Create an abstract attribute view for the position \p IRP.
1427 static AAAMDGPUClusterDims &createForPosition(const IRPosition &IRP,
1428 Attributor &A);
1429
1430 /// See AbstractAttribute::getName().
1431 StringRef getName() const override { return "AAAMDGPUClusterDims"; }
1432
1433 /// See AbstractAttribute::getIdAddr().
1434 const char *getIdAddr() const override { return &ID; }
1435
1436 /// This function should return true if the type of the \p AA is
1437 /// AAAMDGPUClusterDims.
1438 static bool classof(const AbstractAttribute *AA) {
1439 return AA->getIdAddr() == &ID;
1440 }
1441
1442 virtual const AMDGPU::ClusterDimsAttr &getClusterDims() const = 0;
1443
1444 /// Unique ID (due to the unique address)
1445 static const char ID;
1446};
1447
1448const char AAAMDGPUClusterDims::ID = 0;
1449
1450struct AAAMDGPUClusterDimsFunction : public AAAMDGPUClusterDims {
1451 AAAMDGPUClusterDimsFunction(const IRPosition &IRP, Attributor &A)
1452 : AAAMDGPUClusterDims(IRP, A) {}
1453
1454 void initialize(Attributor &A) override {
1455 Function *F = getAssociatedFunction();
1456 assert(F && "empty associated function");
1457
1459
1460 // No matter what a kernel function has, it is final.
1461 if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
1462 if (Attr.isUnknown())
1463 indicatePessimisticFixpoint();
1464 else
1465 indicateOptimisticFixpoint();
1466 }
1467 }
1468
1469 const std::string getAsStr(Attributor *A) const override {
1470 if (!getAssumed() || Attr.isUnknown())
1471 return "unknown";
1472 if (Attr.isNoCluster())
1473 return "no";
1474 if (Attr.isVariableDims())
1475 return "variable";
1476 return Attr.to_string();
1477 }
1478
1479 void trackStatistics() const override {}
1480
1481 ChangeStatus updateImpl(Attributor &A) override {
1482 auto OldState = Attr;
1483
1484 auto CheckCallSite = [&](AbstractCallSite CS) {
1485 const auto *CallerAA = A.getAAFor<AAAMDGPUClusterDims>(
1486 *this, IRPosition::function(*CS.getInstruction()->getFunction()),
1487 DepClassTy::REQUIRED);
1488 if (!CallerAA || !CallerAA->isValidState())
1489 return false;
1490
1491 return merge(CallerAA->getClusterDims());
1492 };
1493
1494 bool UsedAssumedInformation = false;
1495 if (!A.checkForAllCallSites(CheckCallSite, *this,
1496 /*RequireAllCallSites=*/true,
1497 UsedAssumedInformation))
1498 return indicatePessimisticFixpoint();
1499
1500 return OldState == Attr ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED;
1501 }
1502
1503 ChangeStatus manifest(Attributor &A) override {
1504 if (Attr.isUnknown())
1505 return ChangeStatus::UNCHANGED;
1506 return A.manifestAttrs(
1507 getIRPosition(),
1508 {Attribute::get(getAssociatedFunction()->getContext(), AttrName,
1509 Attr.to_string())},
1510 /*ForceReplace=*/true);
1511 }
1512
1513 const AMDGPU::ClusterDimsAttr &getClusterDims() const override {
1514 return Attr;
1515 }
1516
1517private:
1518 bool merge(const AMDGPU::ClusterDimsAttr &Other) {
1519 // Case 1: Both of them are unknown yet, we do nothing and continue wait for
1520 // propagation.
1521 if (Attr.isUnknown() && Other.isUnknown())
1522 return true;
1523
1524 // Case 2: The other is determined, but we are unknown yet, we simply take
1525 // the other's value.
1526 if (Attr.isUnknown()) {
1527 Attr = Other;
1528 return true;
1529 }
1530
1531 // Case 3: We are determined but the other is unknown yet, we simply keep
1532 // everything unchanged.
1533 if (Other.isUnknown())
1534 return true;
1535
1536 // After this point, both are determined.
1537
1538 // Case 4: If they are same, we do nothing.
1539 if (Attr == Other)
1540 return true;
1541
1542 // Now they are not same.
1543
1544 // Case 5: If either of us uses cluster (but not both; otherwise case 4
1545 // would hold), then it is unknown whether cluster will be used, and the
1546 // state is final, unlike case 1.
1547 if (Attr.isNoCluster() || Other.isNoCluster()) {
1548 Attr.setUnknown();
1549 return false;
1550 }
1551
1552 // Case 6: Both of us use cluster, but the dims are different, so the result
1553 // is, cluster is used, but we just don't have a fixed dims.
1554 Attr.setVariableDims();
1555 return true;
1556 }
1557
1558 AMDGPU::ClusterDimsAttr Attr;
1559
1560 static constexpr char AttrName[] = "amdgpu-cluster-dims";
1561};
1562
1563AAAMDGPUClusterDims &
1564AAAMDGPUClusterDims::createForPosition(const IRPosition &IRP, Attributor &A) {
1566 return *new (A.Allocator) AAAMDGPUClusterDimsFunction(IRP, A);
1567 llvm_unreachable("AAAMDGPUClusterDims is only valid for function position");
1568}
1569
1570static bool runImpl(SetVector<Function *> &Functions, bool IsModulePass,
1571 bool DeleteFns, Module &M, AnalysisGetter &AG,
1572 TargetMachine &TM, AMDGPUAttributorOptions Options,
1573 ThinOrFullLTOPhase LTOPhase) {
1574
1575 CallGraphUpdater CGUpdater;
1577 AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, TM);
1578 DenseSet<const char *> Allowed(
1579 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
1580 &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
1581 &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID,
1582 &AAAMDGPUMinAGPRAlloc::ID, &AACallEdges::ID, &AAPointerInfo::ID,
1585 &AAAMDGPUClusterDims::ID, &AAAlign::ID});
1586
1587 AttributorConfig AC(CGUpdater);
1588 AC.IsClosedWorldModule = Options.IsClosedWorld;
1589 AC.Allowed = &Allowed;
1590 AC.IsModulePass = IsModulePass;
1591 AC.DeleteFns = DeleteFns;
1592 AC.DefaultInitializeLiveInternals = false;
1593 AC.IndirectCalleeSpecializationCallback =
1594 [](Attributor &A, const AbstractAttribute &AA, CallBase &CB,
1595 Function &Callee, unsigned NumAssumedCallees) {
1596 return !AMDGPU::isEntryFunctionCC(Callee.getCallingConv()) &&
1597 (NumAssumedCallees <= IndirectCallSpecializationThreshold);
1598 };
1599 AC.IPOAmendableCB = [](const Function &F) {
1600 return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
1601 };
1602
1603 Attributor A(Functions, InfoCache, AC);
1604
1605 LLVM_DEBUG({
1606 StringRef LTOPhaseStr = to_string(LTOPhase);
1607 dbgs() << "[AMDGPUAttributor] Running at phase " << LTOPhaseStr << '\n'
1608 << "[AMDGPUAttributor] Module " << M.getName() << " is "
1609 << (AC.IsClosedWorldModule ? "" : "not ")
1610 << "assumed to be a closed world.\n";
1611 });
1612
1613 for (auto *F : Functions) {
1614 A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(*F));
1615 A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(*F));
1616 A.getOrCreateAAFor<AAAMDMaxNumWorkgroups>(IRPosition::function(*F));
1617 CallingConv::ID CC = F->getCallingConv();
1618 if (!AMDGPU::isEntryFunctionCC(CC)) {
1619 A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(*F));
1620 A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(*F));
1621 }
1622
1623 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(*F);
1624 if (!F->isDeclaration() && ST.hasClusters())
1625 A.getOrCreateAAFor<AAAMDGPUClusterDims>(IRPosition::function(*F));
1626
1627 if (ST.hasGFX90AInsts())
1628 A.getOrCreateAAFor<AAAMDGPUMinAGPRAlloc>(IRPosition::function(*F));
1629
1630 for (auto &I : instructions(F)) {
1631 Value *Ptr = nullptr;
1632 if (auto *LI = dyn_cast<LoadInst>(&I))
1633 Ptr = LI->getPointerOperand();
1634 else if (auto *SI = dyn_cast<StoreInst>(&I))
1635 Ptr = SI->getPointerOperand();
1636 else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I))
1637 Ptr = RMW->getPointerOperand();
1638 else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I))
1639 Ptr = CmpX->getPointerOperand();
1640
1641 if (Ptr) {
1642 A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(*Ptr));
1643 A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(*Ptr));
1644 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Ptr)) {
1645 if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc)
1646 A.getOrCreateAAFor<AAAlign>(IRPosition::value(*Ptr));
1647 }
1648 }
1649 }
1650 }
1651
1652 return A.run() == ChangeStatus::CHANGED;
1653}
1654} // namespace
1655
1658
1661 AnalysisGetter AG(FAM);
1662
1663 SetVector<Function *> Functions;
1664 for (Function &F : M) {
1665 if (!F.isIntrinsic())
1666 Functions.insert(&F);
1667 }
1668
1669 // TODO: Probably preserves CFG
1670 return runImpl(Functions, /*IsModulePass=*/true, /*DeleteFns=*/true, M, AG,
1671 TM, Options, LTOPhase)
1674}
1675
1678 LazyCallGraph &CG,
1679 CGSCCUpdateResult &UR) {
1680
1682 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
1683 AnalysisGetter AG(FAM);
1684
1685 SetVector<Function *> Functions;
1686 for (LazyCallGraph::Node &N : C) {
1687 Function *F = &N.getFunction();
1688 if (!F->isIntrinsic())
1689 Functions.insert(F);
1690 }
1691
1693 Module *M = C.begin()->getFunction().getParent();
1694 // In the CGSCC pipeline, avoid untracked call graph modifications by
1695 // disabling function deletion, mirroring the generic AttributorCGSCCPass.
1696 return runImpl(Functions, /*IsModulePass=*/false, /*DeleteFns=*/false, *M, AG,
1700}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isDSAddress(const Constant *C)
static constexpr std::pair< ImplicitArgumentMask, StringLiteral > ImplicitAttrs[]
static cl::opt< unsigned > IndirectCallSpecializationThreshold("amdgpu-indirect-call-specialization-threshold", cl::desc("A threshold controls whether an indirect call will be specialized"), cl::init(3))
static ImplicitArgumentMask intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, bool HasApertureRegs, bool SupportsGetDoorBellID, unsigned CodeObjectVersion)
static bool hasSanitizerAttributes(const Function &F)
Returns true if sanitizer attributes are present on a function.
ImplicitArgumentMask
@ UNKNOWN_INTRINSIC
@ NOT_IMPLICIT_INPUT
@ ALL_ARGUMENT_MASK
ImplicitArgumentPositions
@ LAST_ARG_POS
static bool castRequiresQueuePtr(unsigned SrcAS)
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
DXIL Resource Access
@ Default
static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)
AMD GCN specific subclass of TargetSubtarget.
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
static StringRef getName(Value *V)
Basic Register Allocator
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ClusterDimsAttr get(const Function &F)
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1555
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Value * getArgOperand(unsigned i) const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
const APInt & getLower() const
Return the lower value for this range.
const APInt & getUpper() const
Return the upper value for this range.
This is an important base class in LLVM.
Definition Constant.h:43
A proxy from a FunctionAnalysisManager to an SCC.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
unsigned getAddressSpace() const
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
A node in the call graph.
An SCC of the call graph.
A lazily constructed view of the call graph of a module.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition Module.h:278
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
A vector that has set insertion semantics.
Definition SetVector.h:57
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void push_back(const T &Elt)
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:222
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
LLVM_ABI bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
Definition User.cpp:119
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getAMDHSACodeObjectVersion(const Module &M)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
std::tuple< char, unsigned, unsigned > parseAsmPhysRegName(StringRef RegName)
Returns a valid charcode or 0 in the first entry if this is a valid physical register name.
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
std::tuple< char, unsigned, unsigned > parseAsmConstraintPhysReg(StringRef Constraint)
Returns a valid charcode or 0 in the first entry if this is a valid physical register constraint.
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
E & operator^=(E &LHS, E RHS)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
initializer< Ty > init(const Ty &Val)
NodeAddr< CodeNode * > Code
Definition RDFGraph.h:388
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
AnalysisManager< LazyCallGraph::SCC, LazyCallGraph & > CGSCCAnalysisManager
The CGSCC analysis manager.
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition Pass.h:77
@ None
No LTO/ThinLTO behavior needed.
Definition Pass.h:79
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
const char * to_string(ThinOrFullLTOPhase Phase)
Definition Pass.cpp:301
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ Other
Any other memory.
Definition ModRef.h:68
ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R)
Helper function to clamp a state S of type StateType with the information in R and indicate/return if...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
ChangeStatus
{
Definition Attributor.h:496
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
BumpPtrAllocatorImpl<> BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition Allocator.h:383
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
#define N
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual const SetVector< Function * > & getOptimisticEdges() const =0
Get the optimistic edges.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual bool hasNonAsmUnknownCallee() const =0
Is there any call with a unknown callee, excluding any inline asm.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
Instruction * getRemoteInst() const
Return the actual instruction that causes the access.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual const char * getIdAddr() const =0
This function should return the address of the ID of the AbstractAttribute.
Wrapper for FunctionAnalysisManager.
The fixpoint analysis framework that orchestrates the attribute deduction.
Support structure for SCC passes to communicate updates the call graph back to the CGSCC pass manager...
DecIntegerState & takeAssumedMaximum(base_t Value)
Take maximum of assumed and Value.
Helper to describe and deal with positions in the LLVM-IR.
Definition Attributor.h:593
static const IRPosition callsite_returned(const CallBase &CB)
Create a position describing the returned value of CB.
Definition Attributor.h:661
static const IRPosition value(const Value &V, const CallBaseContext *CBContext=nullptr)
Create a position describing the value of V.
Definition Attributor.h:617
@ IRP_FUNCTION
An attribute for a function (scope).
Definition Attributor.h:605
static const IRPosition function(const Function &F, const CallBaseContext *CBContext=nullptr)
Create a position describing the function scope of F.
Definition Attributor.h:636
Kind getPositionKind() const
Return the associated position kind.
Definition Attributor.h:889
static const IRPosition callsite_function(const CallBase &CB)
Create a position describing the function scope of CB.
Definition Attributor.h:656
Data structure to hold cached (LLVM-IR) information.
bool isValidState() const override
See AbstractState::isValidState() NOTE: For now we simply pretend that the worst possible state is in...
ChangeStatus indicatePessimisticFixpoint() override
See AbstractState::indicatePessimisticFixpoint(...)
Helper to tie a abstract state implementation to an abstract attribute.