LLVM 23.0.0git
AMDGPUAttributor.cpp
Go to the documentation of this file.
1//===- AMDGPUAttributor.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPU.h"
14#include "GCNSubtarget.h"
16#include "llvm/IR/IntrinsicsAMDGPU.h"
17#include "llvm/IR/IntrinsicsR600.h"
20
21#define DEBUG_TYPE "amdgpu-attributor"
22
23using namespace llvm;
24
26 "amdgpu-indirect-call-specialization-threshold",
28 "A threshold controls whether an indirect call will be specialized"),
29 cl::init(3));
30
31#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
32
34#include "AMDGPUAttributes.def"
36};
37
38#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
39
42#include "AMDGPUAttributes.def"
45};
46
47#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
48static constexpr std::pair<ImplicitArgumentMask, StringLiteral>
50#include "AMDGPUAttributes.def"
51};
52
53// We do not need to note the x workitem or workgroup id because they are always
54// initialized.
55//
56// TODO: We should not add the attributes if the known compile time workgroup
57// size is 1 for y/z.
59intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
60 bool HasApertureRegs, bool SupportsGetDoorBellID,
61 unsigned CodeObjectVersion) {
62 switch (ID) {
63 case Intrinsic::amdgcn_workitem_id_x:
64 NonKernelOnly = true;
65 return WORKITEM_ID_X;
66 case Intrinsic::amdgcn_workgroup_id_x:
67 NonKernelOnly = true;
68 return WORKGROUP_ID_X;
69 case Intrinsic::amdgcn_workitem_id_y:
70 case Intrinsic::r600_read_tidig_y:
71 return WORKITEM_ID_Y;
72 case Intrinsic::amdgcn_workitem_id_z:
73 case Intrinsic::r600_read_tidig_z:
74 return WORKITEM_ID_Z;
75 case Intrinsic::amdgcn_workgroup_id_y:
76 case Intrinsic::r600_read_tgid_y:
77 return WORKGROUP_ID_Y;
78 case Intrinsic::amdgcn_workgroup_id_z:
79 case Intrinsic::r600_read_tgid_z:
80 return WORKGROUP_ID_Z;
81 case Intrinsic::amdgcn_cluster_id_x:
82 NonKernelOnly = true;
83 return CLUSTER_ID_X;
84 case Intrinsic::amdgcn_cluster_id_y:
85 return CLUSTER_ID_Y;
86 case Intrinsic::amdgcn_cluster_id_z:
87 return CLUSTER_ID_Z;
88 case Intrinsic::amdgcn_lds_kernel_id:
89 return LDS_KERNEL_ID;
90 case Intrinsic::amdgcn_dispatch_ptr:
91 return DISPATCH_PTR;
92 case Intrinsic::amdgcn_dispatch_id:
93 return DISPATCH_ID;
94 case Intrinsic::amdgcn_implicitarg_ptr:
95 return IMPLICIT_ARG_PTR;
96 // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access
97 // queue_ptr.
98 case Intrinsic::amdgcn_queue_ptr:
99 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
100 return QUEUE_PTR;
101 case Intrinsic::amdgcn_is_shared:
102 case Intrinsic::amdgcn_is_private:
103 if (HasApertureRegs)
104 return NOT_IMPLICIT_INPUT;
105 // Under V5, we need implicitarg_ptr + offsets to access private_base or
106 // shared_base. For pre-V5, however, need to access them through queue_ptr +
107 // offsets.
108 return CodeObjectVersion >= AMDGPU::AMDHSA_COV5 ? IMPLICIT_ARG_PTR
109 : QUEUE_PTR;
110 case Intrinsic::trap:
111 case Intrinsic::debugtrap:
112 case Intrinsic::ubsantrap:
113 if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.
114 return CodeObjectVersion >= AMDGPU::AMDHSA_COV4 ? NOT_IMPLICIT_INPUT
115 : QUEUE_PTR;
116 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
117 return QUEUE_PTR;
118 default:
119 return UNKNOWN_INTRINSIC;
120 }
121}
122
123static bool castRequiresQueuePtr(unsigned SrcAS) {
124 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
125}
126
127static bool isDSAddress(const Constant *C) {
129 if (!GV)
130 return false;
131 unsigned AS = GV->getAddressSpace();
133}
134
135/// Returns true if sanitizer attributes are present on a function.
136static bool hasSanitizerAttributes(const Function &F) {
137 return F.hasFnAttribute(Attribute::SanitizeAddress) ||
138 F.hasFnAttribute(Attribute::SanitizeThread) ||
139 F.hasFnAttribute(Attribute::SanitizeMemory) ||
140 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
141 F.hasFnAttribute(Attribute::SanitizeMemTag);
142}
143
144namespace {
145class AMDGPUInformationCache : public InformationCache {
146public:
147 AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
149 SetVector<Function *> *CGSCC, TargetMachine &TM)
150 : InformationCache(M, AG, Allocator, CGSCC), TM(TM),
151 CodeObjectVersion(AMDGPU::getAMDHSACodeObjectVersion(M)) {}
152
153 TargetMachine &TM;
154
155 enum ConstantStatus : uint8_t {
156 NONE = 0,
157 DS_GLOBAL = 1 << 0,
158 ADDR_SPACE_CAST_PRIVATE_TO_FLAT = 1 << 1,
159 ADDR_SPACE_CAST_LOCAL_TO_FLAT = 1 << 2,
160 ADDR_SPACE_CAST_BOTH_TO_FLAT =
161 ADDR_SPACE_CAST_PRIVATE_TO_FLAT | ADDR_SPACE_CAST_LOCAL_TO_FLAT
162 };
163
164 /// Check if the subtarget has aperture regs.
165 bool hasApertureRegs(Function &F) {
166 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
167 return ST.hasApertureRegs();
168 }
169
170 /// Check if the subtarget supports GetDoorbellID.
171 bool supportsGetDoorbellID(Function &F) {
172 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
173 return ST.supportsGetDoorbellID();
174 }
175
176 std::optional<std::pair<unsigned, unsigned>>
177 getFlatWorkGroupSizeAttr(const Function &F) const {
178 auto R = AMDGPU::getIntegerPairAttribute(F, "amdgpu-flat-work-group-size");
179 if (!R)
180 return std::nullopt;
181 return std::make_pair(R->first, *(R->second));
182 }
183
184 std::pair<unsigned, unsigned>
185 getDefaultFlatWorkGroupSize(const Function &F) const {
186 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
187 return ST.getDefaultFlatWorkGroupSize(F.getCallingConv());
188 }
189
190 std::pair<unsigned, unsigned>
191 getMaximumFlatWorkGroupRange(const Function &F) {
192 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
193 return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
194 }
195
196 SmallVector<unsigned> getMaxNumWorkGroups(const Function &F) {
197 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
198 return ST.getMaxNumWorkGroups(F);
199 }
200
201 /// Get code object version.
202 unsigned getCodeObjectVersion() const { return CodeObjectVersion; }
203
204 std::optional<std::pair<unsigned, unsigned>>
205 getWavesPerEUAttr(const Function &F) {
206 auto Val = AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu",
207 /*OnlyFirstRequired=*/true);
208 if (!Val)
209 return std::nullopt;
210 if (!Val->second) {
211 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
212 Val->second = ST.getMaxWavesPerEU();
213 }
214 return std::make_pair(Val->first, *(Val->second));
215 }
216
217 unsigned getMaxWavesPerEU(const Function &F) {
218 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
219 return ST.getMaxWavesPerEU();
220 }
221
222 unsigned getMaxAddrSpace() const override {
224 }
225
226private:
227 /// Check if the ConstantExpr \p CE uses an addrspacecast from private or
228 /// local to flat. These casts may require the queue pointer.
229 static uint8_t visitConstExpr(const ConstantExpr *CE) {
230 uint8_t Status = NONE;
231
232 if (CE->getOpcode() == Instruction::AddrSpaceCast) {
233 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
234 if (SrcAS == AMDGPUAS::PRIVATE_ADDRESS)
235 Status |= ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
236 else if (SrcAS == AMDGPUAS::LOCAL_ADDRESS)
237 Status |= ADDR_SPACE_CAST_LOCAL_TO_FLAT;
238 }
239
240 return Status;
241 }
242
243 /// Get the constant access bitmap for \p C.
244 uint8_t getConstantAccess(const Constant *C,
245 SmallPtrSetImpl<const Constant *> &Visited) {
246 auto It = ConstantStatus.find(C);
247 if (It != ConstantStatus.end())
248 return It->second;
249
250 uint8_t Result = 0;
251 if (isDSAddress(C))
252 Result = DS_GLOBAL;
253
254 if (const auto *CE = dyn_cast<ConstantExpr>(C))
255 Result |= visitConstExpr(CE);
256
257 for (const Use &U : C->operands()) {
258 const auto *OpC = dyn_cast<Constant>(U);
259 if (!OpC || !Visited.insert(OpC).second)
260 continue;
261
262 Result |= getConstantAccess(OpC, Visited);
263 }
264 return Result;
265 }
266
267public:
268 /// Returns true if \p Fn needs the queue pointer because of \p C.
269 bool needsQueuePtr(const Constant *C, Function &Fn) {
270 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
271 bool HasAperture = hasApertureRegs(Fn);
272
273 // No need to explore the constants.
274 if (!IsNonEntryFunc && HasAperture)
275 return false;
276
277 SmallPtrSet<const Constant *, 8> Visited;
278 uint8_t Access = getConstantAccess(C, Visited);
279
280 // We need to trap on DS globals in non-entry functions.
281 if (IsNonEntryFunc && (Access & DS_GLOBAL))
282 return true;
283
284 return !HasAperture && (Access & ADDR_SPACE_CAST_BOTH_TO_FLAT);
285 }
286
287 bool checkConstForAddrSpaceCastFromPrivate(const Constant *C) {
288 SmallPtrSet<const Constant *, 8> Visited;
289 uint8_t Access = getConstantAccess(C, Visited);
290 return Access & ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
291 }
292
293private:
294 /// Used to determine if the Constant needs the queue pointer.
295 DenseMap<const Constant *, uint8_t> ConstantStatus;
296 const unsigned CodeObjectVersion;
297};
298
299struct AAAMDAttributes
300 : public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
301 AbstractAttribute> {
302 using Base = StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
303 AbstractAttribute>;
304
305 AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
306
307 /// Create an abstract attribute view for the position \p IRP.
308 static AAAMDAttributes &createForPosition(const IRPosition &IRP,
309 Attributor &A);
310
311 /// See AbstractAttribute::getName().
312 StringRef getName() const override { return "AAAMDAttributes"; }
313
314 /// See AbstractAttribute::getIdAddr().
315 const char *getIdAddr() const override { return &ID; }
316
317 /// This function should return true if the type of the \p AA is
318 /// AAAMDAttributes.
319 static bool classof(const AbstractAttribute *AA) {
320 return (AA->getIdAddr() == &ID);
321 }
322
323 /// Unique ID (due to the unique address)
324 static const char ID;
325};
326const char AAAMDAttributes::ID = 0;
327
328struct AAUniformWorkGroupSize
329 : public StateWrapper<BooleanState, AbstractAttribute> {
330 using Base = StateWrapper<BooleanState, AbstractAttribute>;
331 AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
332
333 /// Create an abstract attribute view for the position \p IRP.
334 static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
335 Attributor &A);
336
337 /// See AbstractAttribute::getName().
338 StringRef getName() const override { return "AAUniformWorkGroupSize"; }
339
340 /// See AbstractAttribute::getIdAddr().
341 const char *getIdAddr() const override { return &ID; }
342
343 /// This function should return true if the type of the \p AA is
344 /// AAAMDAttributes.
345 static bool classof(const AbstractAttribute *AA) {
346 return (AA->getIdAddr() == &ID);
347 }
348
349 /// Unique ID (due to the unique address)
350 static const char ID;
351};
352const char AAUniformWorkGroupSize::ID = 0;
353
354struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
355 AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
356 : AAUniformWorkGroupSize(IRP, A) {}
357
358 void initialize(Attributor &A) override {
359 Function *F = getAssociatedFunction();
360 CallingConv::ID CC = F->getCallingConv();
361
362 if (CC != CallingConv::AMDGPU_KERNEL)
363 return;
364
365 bool InitialValue = F->hasFnAttribute("uniform-work-group-size");
366
367 if (InitialValue)
368 indicateOptimisticFixpoint();
369 else
370 indicatePessimisticFixpoint();
371 }
372
373 ChangeStatus updateImpl(Attributor &A) override {
374 ChangeStatus Change = ChangeStatus::UNCHANGED;
375
376 auto CheckCallSite = [&](AbstractCallSite CS) {
377 Function *Caller = CS.getInstruction()->getFunction();
378 LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
379 << "->" << getAssociatedFunction()->getName() << "\n");
380
381 const auto *CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
382 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
383 if (!CallerInfo || !CallerInfo->isValidState())
384 return false;
385
386 Change = Change | clampStateAndIndicateChange(this->getState(),
387 CallerInfo->getState());
388
389 return true;
390 };
391
392 bool AllCallSitesKnown = true;
393 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
394 return indicatePessimisticFixpoint();
395
396 return Change;
397 }
398
399 ChangeStatus manifest(Attributor &A) override {
400 if (!getAssumed())
401 return ChangeStatus::UNCHANGED;
402
403 LLVMContext &Ctx = getAssociatedFunction()->getContext();
404 return A.manifestAttrs(getIRPosition(),
405 {Attribute::get(Ctx, "uniform-work-group-size")},
406 /*ForceReplace=*/true);
407 }
408
409 bool isValidState() const override {
410 // This state is always valid, even when the state is false.
411 return true;
412 }
413
414 const std::string getAsStr(Attributor *) const override {
415 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
416 }
417
418 /// See AbstractAttribute::trackStatistics()
419 void trackStatistics() const override {}
420};
421
422AAUniformWorkGroupSize &
423AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
424 Attributor &A) {
426 return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
428 "AAUniformWorkGroupSize is only valid for function position");
429}
430
431struct AAAMDAttributesFunction : public AAAMDAttributes {
432 AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
433 : AAAMDAttributes(IRP, A) {}
434
435 void initialize(Attributor &A) override {
436 Function *F = getAssociatedFunction();
437
438 // If the function requires the implicit arg pointer due to sanitizers,
439 // assume it's needed even if explicitly marked as not requiring it.
440 // Flat scratch initialization is needed because `asan_malloc_impl`
441 // calls introduced later in pipeline will have flat scratch accesses.
442 // FIXME: FLAT_SCRATCH_INIT will not be required here if device-libs
443 // implementation for `asan_malloc_impl` is updated.
444 const bool HasSanitizerAttrs = hasSanitizerAttributes(*F);
445 if (HasSanitizerAttrs) {
446 removeAssumedBits(IMPLICIT_ARG_PTR);
447 removeAssumedBits(HOSTCALL_PTR);
448 removeAssumedBits(FLAT_SCRATCH_INIT);
449 }
450
451 for (auto Attr : ImplicitAttrs) {
452 if (HasSanitizerAttrs &&
453 (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR ||
454 Attr.first == FLAT_SCRATCH_INIT))
455 continue;
456
457 if (F->hasFnAttribute(Attr.second))
458 addKnownBits(Attr.first);
459 }
460
461 if (F->isDeclaration())
462 return;
463
464 // Ignore functions with graphics calling conventions, these are currently
465 // not allowed to have kernel arguments.
466 if (AMDGPU::isGraphics(F->getCallingConv())) {
467 indicatePessimisticFixpoint();
468 return;
469 }
470 }
471
472 ChangeStatus updateImpl(Attributor &A) override {
473 Function *F = getAssociatedFunction();
474 // The current assumed state used to determine a change.
475 auto OrigAssumed = getAssumed();
476
477 // Check for Intrinsics and propagate attributes.
478 const AACallEdges *AAEdges = A.getAAFor<AACallEdges>(
479 *this, this->getIRPosition(), DepClassTy::REQUIRED);
480 if (!AAEdges || !AAEdges->isValidState() ||
481 AAEdges->hasNonAsmUnknownCallee())
482 return indicatePessimisticFixpoint();
483
484 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
485
486 bool NeedsImplicit = false;
487 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
488 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
489 bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F);
490 unsigned COV = InfoCache.getCodeObjectVersion();
491
492 for (Function *Callee : AAEdges->getOptimisticEdges()) {
493 Intrinsic::ID IID = Callee->getIntrinsicID();
494 if (IID == Intrinsic::not_intrinsic) {
495 const AAAMDAttributes *AAAMD = A.getAAFor<AAAMDAttributes>(
496 *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
497 if (!AAAMD || !AAAMD->isValidState())
498 return indicatePessimisticFixpoint();
499 *this &= *AAAMD;
500 continue;
501 }
502
503 bool NonKernelOnly = false;
504 ImplicitArgumentMask AttrMask =
505 intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
506 HasApertureRegs, SupportsGetDoorbellID, COV);
507
508 if (AttrMask == UNKNOWN_INTRINSIC) {
509 // Assume not-nocallback intrinsics may invoke a function which accesses
510 // implicit arguments.
511 //
512 // FIXME: This isn't really the correct check. We want to ensure it
513 // isn't calling any function that may use implicit arguments regardless
514 // of whether it's internal to the module or not.
515 //
516 // TODO: Ignoring callsite attributes.
517 if (!Callee->hasFnAttribute(Attribute::NoCallback))
518 return indicatePessimisticFixpoint();
519 continue;
520 }
521
522 if (AttrMask != NOT_IMPLICIT_INPUT) {
523 if ((IsNonEntryFunc || !NonKernelOnly))
524 removeAssumedBits(AttrMask);
525 }
526 }
527
528 // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base.
529 if (NeedsImplicit)
530 removeAssumedBits(IMPLICIT_ARG_PTR);
531
532 if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {
533 // Under V5, we need implicitarg_ptr + offsets to access private_base or
534 // shared_base. We do not actually need queue_ptr.
535 if (COV >= 5)
536 removeAssumedBits(IMPLICIT_ARG_PTR);
537 else
538 removeAssumedBits(QUEUE_PTR);
539 }
540
541 if (funcRetrievesMultigridSyncArg(A, COV)) {
542 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
543 "multigrid_sync_arg needs implicitarg_ptr");
544 removeAssumedBits(MULTIGRID_SYNC_ARG);
545 }
546
547 if (funcRetrievesHostcallPtr(A, COV)) {
548 assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");
549 removeAssumedBits(HOSTCALL_PTR);
550 }
551
552 if (funcRetrievesHeapPtr(A, COV)) {
553 assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");
554 removeAssumedBits(HEAP_PTR);
555 }
556
557 if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A, COV)) {
558 assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");
559 removeAssumedBits(QUEUE_PTR);
560 }
561
562 if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) {
563 removeAssumedBits(LDS_KERNEL_ID);
564 }
565
566 if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A, COV))
567 removeAssumedBits(DEFAULT_QUEUE);
568
569 if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV))
570 removeAssumedBits(COMPLETION_ACTION);
571
572 if (isAssumed(FLAT_SCRATCH_INIT) && needFlatScratchInit(A))
573 removeAssumedBits(FLAT_SCRATCH_INIT);
574
575 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
576 : ChangeStatus::UNCHANGED;
577 }
578
579 ChangeStatus manifest(Attributor &A) override {
581 LLVMContext &Ctx = getAssociatedFunction()->getContext();
582
583 for (auto Attr : ImplicitAttrs) {
584 if (isKnown(Attr.first))
585 AttrList.push_back(Attribute::get(Ctx, Attr.second));
586 }
587
588 return A.manifestAttrs(getIRPosition(), AttrList,
589 /* ForceReplace */ true);
590 }
591
592 const std::string getAsStr(Attributor *) const override {
593 std::string Str;
594 raw_string_ostream OS(Str);
595 OS << "AMDInfo[";
596 for (auto Attr : ImplicitAttrs)
597 if (isAssumed(Attr.first))
598 OS << ' ' << Attr.second;
599 OS << " ]";
600 return OS.str();
601 }
602
603 /// See AbstractAttribute::trackStatistics()
604 void trackStatistics() const override {}
605
606private:
607 bool checkForQueuePtr(Attributor &A) {
608 Function *F = getAssociatedFunction();
609 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
610
611 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
612
613 bool NeedsQueuePtr = false;
614
615 auto CheckAddrSpaceCasts = [&](Instruction &I) {
616 unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
617 if (castRequiresQueuePtr(SrcAS)) {
618 NeedsQueuePtr = true;
619 return false;
620 }
621 return true;
622 };
623
624 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
625
626 // `checkForAllInstructions` is much more cheaper than going through all
627 // instructions, try it first.
628
629 // The queue pointer is not needed if aperture regs is present.
630 if (!HasApertureRegs) {
631 bool UsedAssumedInformation = false;
632 A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
633 {Instruction::AddrSpaceCast},
634 UsedAssumedInformation);
635 }
636
637 // If we found that we need the queue pointer, nothing else to do.
638 if (NeedsQueuePtr)
639 return true;
640
641 if (!IsNonEntryFunc && HasApertureRegs)
642 return false;
643
644 for (BasicBlock &BB : *F) {
645 for (Instruction &I : BB) {
646 for (const Use &U : I.operands()) {
647 if (const auto *C = dyn_cast<Constant>(U)) {
648 if (InfoCache.needsQueuePtr(C, *F))
649 return true;
650 }
651 }
652 }
653 }
654
655 return false;
656 }
657
658 bool funcRetrievesMultigridSyncArg(Attributor &A, unsigned COV) {
660 AA::RangeTy Range(Pos, 8);
661 return funcRetrievesImplicitKernelArg(A, Range);
662 }
663
664 bool funcRetrievesHostcallPtr(Attributor &A, unsigned COV) {
666 AA::RangeTy Range(Pos, 8);
667 return funcRetrievesImplicitKernelArg(A, Range);
668 }
669
670 bool funcRetrievesDefaultQueue(Attributor &A, unsigned COV) {
672 AA::RangeTy Range(Pos, 8);
673 return funcRetrievesImplicitKernelArg(A, Range);
674 }
675
676 bool funcRetrievesCompletionAction(Attributor &A, unsigned COV) {
678 AA::RangeTy Range(Pos, 8);
679 return funcRetrievesImplicitKernelArg(A, Range);
680 }
681
682 bool funcRetrievesHeapPtr(Attributor &A, unsigned COV) {
683 if (COV < 5)
684 return false;
686 return funcRetrievesImplicitKernelArg(A, Range);
687 }
688
689 bool funcRetrievesQueuePtr(Attributor &A, unsigned COV) {
690 if (COV < 5)
691 return false;
693 return funcRetrievesImplicitKernelArg(A, Range);
694 }
695
696 bool funcRetrievesImplicitKernelArg(Attributor &A, AA::RangeTy Range) {
697 // Check if this is a call to the implicitarg_ptr builtin and it
698 // is used to retrieve the hostcall pointer. The implicit arg for
699 // hostcall is not used only if every use of the implicitarg_ptr
700 // is a load that clearly does not retrieve any byte of the
701 // hostcall pointer. We check this by tracing all the uses of the
702 // initial call to the implicitarg_ptr intrinsic.
703 auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {
704 auto &Call = cast<CallBase>(I);
705 if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
706 return true;
707
708 const auto *PointerInfoAA = A.getAAFor<AAPointerInfo>(
709 *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED);
710 if (!PointerInfoAA || !PointerInfoAA->getState().isValidState())
711 return false;
712
713 return PointerInfoAA->forallInterferingAccesses(
714 Range, [](const AAPointerInfo::Access &Acc, bool IsExact) {
715 return Acc.getRemoteInst()->isDroppable();
716 });
717 };
718
719 bool UsedAssumedInformation = false;
720 return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,
721 UsedAssumedInformation);
722 }
723
724 bool funcRetrievesLDSKernelId(Attributor &A) {
725 auto DoesNotRetrieve = [&](Instruction &I) {
726 auto &Call = cast<CallBase>(I);
727 return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;
728 };
729 bool UsedAssumedInformation = false;
730 return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,
731 UsedAssumedInformation);
732 }
733
734 // Returns true if FlatScratchInit is needed, i.e., no-flat-scratch-init is
735 // not to be set.
736 bool needFlatScratchInit(Attributor &A) {
737 assert(isAssumed(FLAT_SCRATCH_INIT)); // only called if the bit is still set
738
739 // Check all AddrSpaceCast instructions. FlatScratchInit is needed if
740 // there is a cast from PRIVATE_ADDRESS.
741 auto AddrSpaceCastNotFromPrivate = [](Instruction &I) {
742 return cast<AddrSpaceCastInst>(I).getSrcAddressSpace() !=
744 };
745
746 bool UsedAssumedInformation = false;
747 if (!A.checkForAllInstructions(AddrSpaceCastNotFromPrivate, *this,
748 {Instruction::AddrSpaceCast},
749 UsedAssumedInformation))
750 return true;
751
752 // Check for addrSpaceCast from PRIVATE_ADDRESS in constant expressions
753 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
754
755 Function *F = getAssociatedFunction();
756 for (Instruction &I : instructions(F)) {
757 for (const Use &U : I.operands()) {
758 if (const auto *C = dyn_cast<Constant>(U)) {
759 if (InfoCache.checkConstForAddrSpaceCastFromPrivate(C))
760 return true;
761 }
762 }
763 }
764
765 // Finally check callees.
766
767 // This is called on each callee; false means callee shouldn't have
768 // no-flat-scratch-init.
769 auto CheckForNoFlatScratchInit = [&](Instruction &I) {
770 const auto &CB = cast<CallBase>(I);
771 const Function *Callee = CB.getCalledFunction();
772
773 // Callee == 0 for inline asm or indirect call with known callees.
774 // In the latter case, updateImpl() already checked the callees and we
775 // know their FLAT_SCRATCH_INIT bit is set.
776 // If function has indirect call with unknown callees, the bit is
777 // already removed in updateImpl() and execution won't reach here.
778 if (!Callee)
779 return true;
780
781 return Callee->getIntrinsicID() !=
782 Intrinsic::amdgcn_addrspacecast_nonnull;
783 };
784
785 UsedAssumedInformation = false;
786 // If any callee is false (i.e. need FlatScratchInit),
787 // checkForAllCallLikeInstructions returns false, in which case this
788 // function returns true.
789 return !A.checkForAllCallLikeInstructions(CheckForNoFlatScratchInit, *this,
790 UsedAssumedInformation);
791 }
792};
793
794AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
795 Attributor &A) {
797 return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
798 llvm_unreachable("AAAMDAttributes is only valid for function position");
799}
800
801/// Base class to derive different size ranges.
802struct AAAMDSizeRangeAttribute
803 : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
804 using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
805
806 StringRef AttrName;
807
808 AAAMDSizeRangeAttribute(const IRPosition &IRP, Attributor &A,
809 StringRef AttrName)
810 : Base(IRP, 32), AttrName(AttrName) {}
811
812 /// See AbstractAttribute::trackStatistics()
813 void trackStatistics() const override {}
814
815 template <class AttributeImpl> ChangeStatus updateImplImpl(Attributor &A) {
816 ChangeStatus Change = ChangeStatus::UNCHANGED;
817
818 auto CheckCallSite = [&](AbstractCallSite CS) {
819 Function *Caller = CS.getInstruction()->getFunction();
820 LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
821 << "->" << getAssociatedFunction()->getName() << '\n');
822
823 const auto *CallerInfo = A.getAAFor<AttributeImpl>(
824 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
825 if (!CallerInfo || !CallerInfo->isValidState())
826 return false;
827
828 Change |=
829 clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
830
831 return true;
832 };
833
834 bool AllCallSitesKnown = true;
835 if (!A.checkForAllCallSites(CheckCallSite, *this,
836 /*RequireAllCallSites=*/true,
837 AllCallSitesKnown))
838 return indicatePessimisticFixpoint();
839
840 return Change;
841 }
842
843 /// Clamp the assumed range to the default value ([Min, Max]) and emit the
844 /// attribute if it is not same as default.
846 emitAttributeIfNotDefaultAfterClamp(Attributor &A,
847 std::pair<unsigned, unsigned> Default) {
848 auto [Min, Max] = Default;
849 unsigned Lower = getAssumed().getLower().getZExtValue();
850 unsigned Upper = getAssumed().getUpper().getZExtValue();
851
852 // Clamp the range to the default value.
853 if (Lower < Min)
854 Lower = Min;
855 if (Upper > Max + 1)
856 Upper = Max + 1;
857
858 // No manifest if the value is invalid or same as default after clamp.
859 if ((Lower == Min && Upper == Max + 1) || (Upper < Lower))
860 return ChangeStatus::UNCHANGED;
861
862 Function *F = getAssociatedFunction();
863 LLVMContext &Ctx = F->getContext();
864 SmallString<10> Buffer;
865 raw_svector_ostream OS(Buffer);
866 OS << Lower << ',' << Upper - 1;
867 return A.manifestAttrs(getIRPosition(),
868 {Attribute::get(Ctx, AttrName, OS.str())},
869 /*ForceReplace=*/true);
870 }
871
872 const std::string getAsStr(Attributor *) const override {
873 std::string Str;
874 raw_string_ostream OS(Str);
875 OS << getName() << '[';
876 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
877 OS << ']';
878 return OS.str();
879 }
880};
881
882/// Propagate amdgpu-flat-work-group-size attribute.
883struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
884 AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
885 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-flat-work-group-size") {}
886
887 void initialize(Attributor &A) override {
888 Function *F = getAssociatedFunction();
889 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
890
891 bool HasAttr = false;
892 auto Range = InfoCache.getDefaultFlatWorkGroupSize(*F);
893 auto MaxRange = InfoCache.getMaximumFlatWorkGroupRange(*F);
894
895 if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr(*F)) {
896 // We only consider an attribute that is not max range because the front
897 // end always emits the attribute, unfortunately, and sometimes it emits
898 // the max range.
899 if (*Attr != MaxRange) {
900 Range = *Attr;
901 HasAttr = true;
902 }
903 }
904
905 // We don't want to directly clamp the state if it's the max range because
906 // that is basically the worst state.
907 if (Range == MaxRange)
908 return;
909
910 auto [Min, Max] = Range;
911 ConstantRange CR(APInt(32, Min), APInt(32, Max + 1));
912 IntegerRangeState IRS(CR);
913 clampStateAndIndicateChange(this->getState(), IRS);
914
915 if (HasAttr || AMDGPU::isEntryFunctionCC(F->getCallingConv()))
916 indicateOptimisticFixpoint();
917 }
918
919 ChangeStatus updateImpl(Attributor &A) override {
920 return updateImplImpl<AAAMDFlatWorkGroupSize>(A);
921 }
922
923 /// Create an abstract attribute view for the position \p IRP.
924 static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
925 Attributor &A);
926
927 ChangeStatus manifest(Attributor &A) override {
928 Function *F = getAssociatedFunction();
929 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
930 return emitAttributeIfNotDefaultAfterClamp(
931 A, InfoCache.getMaximumFlatWorkGroupRange(*F));
932 }
933
934 /// See AbstractAttribute::getName()
935 StringRef getName() const override { return "AAAMDFlatWorkGroupSize"; }
936
937 /// See AbstractAttribute::getIdAddr()
938 const char *getIdAddr() const override { return &ID; }
939
940 /// This function should return true if the type of the \p AA is
941 /// AAAMDFlatWorkGroupSize
942 static bool classof(const AbstractAttribute *AA) {
943 return (AA->getIdAddr() == &ID);
944 }
945
946 /// Unique ID (due to the unique address)
947 static const char ID;
948};
949
950const char AAAMDFlatWorkGroupSize::ID = 0;
951
952AAAMDFlatWorkGroupSize &
953AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
954 Attributor &A) {
956 return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
958 "AAAMDFlatWorkGroupSize is only valid for function position");
959}
960
961struct TupleDecIntegerRangeState : public AbstractState {
962 DecIntegerState<uint32_t> X, Y, Z;
963
964 bool isValidState() const override {
965 return X.isValidState() && Y.isValidState() && Z.isValidState();
966 }
967
968 bool isAtFixpoint() const override {
969 return X.isAtFixpoint() && Y.isAtFixpoint() && Z.isAtFixpoint();
970 }
971
972 ChangeStatus indicateOptimisticFixpoint() override {
973 return X.indicateOptimisticFixpoint() | Y.indicateOptimisticFixpoint() |
974 Z.indicateOptimisticFixpoint();
975 }
976
977 ChangeStatus indicatePessimisticFixpoint() override {
978 return X.indicatePessimisticFixpoint() | Y.indicatePessimisticFixpoint() |
979 Z.indicatePessimisticFixpoint();
980 }
981
982 TupleDecIntegerRangeState operator^=(const TupleDecIntegerRangeState &Other) {
983 X ^= Other.X;
984 Y ^= Other.Y;
985 Z ^= Other.Z;
986 return *this;
987 }
988
989 bool operator==(const TupleDecIntegerRangeState &Other) const {
990 return X == Other.X && Y == Other.Y && Z == Other.Z;
991 }
992
993 TupleDecIntegerRangeState &getAssumed() { return *this; }
994 const TupleDecIntegerRangeState &getAssumed() const { return *this; }
995};
996
997using AAAMDMaxNumWorkgroupsState =
998 StateWrapper<TupleDecIntegerRangeState, AbstractAttribute, uint32_t>;
999
1000/// Propagate amdgpu-max-num-workgroups attribute.
1001struct AAAMDMaxNumWorkgroups
1002 : public StateWrapper<TupleDecIntegerRangeState, AbstractAttribute> {
1003 using Base = StateWrapper<TupleDecIntegerRangeState, AbstractAttribute>;
1004
1005 AAAMDMaxNumWorkgroups(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1006
1007 void initialize(Attributor &A) override {
1008 Function *F = getAssociatedFunction();
1009 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1010
1011 SmallVector<unsigned> MaxNumWorkgroups = InfoCache.getMaxNumWorkGroups(*F);
1012
1013 X.takeKnownMinimum(MaxNumWorkgroups[0]);
1014 Y.takeKnownMinimum(MaxNumWorkgroups[1]);
1015 Z.takeKnownMinimum(MaxNumWorkgroups[2]);
1016
1017 if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
1018 indicatePessimisticFixpoint();
1019 }
1020
1021 ChangeStatus updateImpl(Attributor &A) override {
1022 ChangeStatus Change = ChangeStatus::UNCHANGED;
1023
1024 auto CheckCallSite = [&](AbstractCallSite CS) {
1025 Function *Caller = CS.getInstruction()->getFunction();
1026 LLVM_DEBUG(dbgs() << "[AAAMDMaxNumWorkgroups] Call " << Caller->getName()
1027 << "->" << getAssociatedFunction()->getName() << '\n');
1028
1029 const auto *CallerInfo = A.getAAFor<AAAMDMaxNumWorkgroups>(
1030 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
1031 if (!CallerInfo || !CallerInfo->isValidState())
1032 return false;
1033
1034 Change |=
1035 clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
1036 return true;
1037 };
1038
1039 bool AllCallSitesKnown = true;
1040 if (!A.checkForAllCallSites(CheckCallSite, *this,
1041 /*RequireAllCallSites=*/true,
1042 AllCallSitesKnown))
1043 return indicatePessimisticFixpoint();
1044
1045 return Change;
1046 }
1047
1048 /// Create an abstract attribute view for the position \p IRP.
1049 static AAAMDMaxNumWorkgroups &createForPosition(const IRPosition &IRP,
1050 Attributor &A);
1051
1052 ChangeStatus manifest(Attributor &A) override {
1053 Function *F = getAssociatedFunction();
1054 LLVMContext &Ctx = F->getContext();
1055 SmallString<32> Buffer;
1056 raw_svector_ostream OS(Buffer);
1057 OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed();
1058
1059 // TODO: Should annotate loads of the group size for this to do anything
1060 // useful.
1061 return A.manifestAttrs(
1062 getIRPosition(),
1063 {Attribute::get(Ctx, "amdgpu-max-num-workgroups", OS.str())},
1064 /* ForceReplace= */ true);
1065 }
1066
1067 StringRef getName() const override { return "AAAMDMaxNumWorkgroups"; }
1068
1069 const std::string getAsStr(Attributor *) const override {
1070 std::string Buffer = "AAAMDMaxNumWorkgroupsState[";
1071 raw_string_ostream OS(Buffer);
1072 OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed()
1073 << ']';
1074 return OS.str();
1075 }
1076
1077 const char *getIdAddr() const override { return &ID; }
1078
1079 /// This function should return true if the type of the \p AA is
1080 /// AAAMDMaxNumWorkgroups
1081 static bool classof(const AbstractAttribute *AA) {
1082 return (AA->getIdAddr() == &ID);
1083 }
1084
1085 void trackStatistics() const override {}
1086
1087 /// Unique ID (due to the unique address)
1088 static const char ID;
1089};
1090
1091const char AAAMDMaxNumWorkgroups::ID = 0;
1092
1093AAAMDMaxNumWorkgroups &
1094AAAMDMaxNumWorkgroups::createForPosition(const IRPosition &IRP, Attributor &A) {
1096 return *new (A.Allocator) AAAMDMaxNumWorkgroups(IRP, A);
1097 llvm_unreachable("AAAMDMaxNumWorkgroups is only valid for function position");
1098}
1099
1100/// Propagate amdgpu-waves-per-eu attribute.
1101struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
1102 AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)
1103 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {}
1104
1105 void initialize(Attributor &A) override {
1106 Function *F = getAssociatedFunction();
1107 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1108
1109 // If the attribute exists, we will honor it if it is not the default.
1110 if (auto Attr = InfoCache.getWavesPerEUAttr(*F)) {
1111 std::pair<unsigned, unsigned> MaxWavesPerEURange{
1112 1U, InfoCache.getMaxWavesPerEU(*F)};
1113 if (*Attr != MaxWavesPerEURange) {
1114 auto [Min, Max] = *Attr;
1115 ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
1116 IntegerRangeState RangeState(Range);
1117 this->getState() = RangeState;
1118 indicateOptimisticFixpoint();
1119 return;
1120 }
1121 }
1122
1123 if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
1124 indicatePessimisticFixpoint();
1125 }
1126
1127 ChangeStatus updateImpl(Attributor &A) override {
1128 ChangeStatus Change = ChangeStatus::UNCHANGED;
1129
1130 auto CheckCallSite = [&](AbstractCallSite CS) {
1131 Function *Caller = CS.getInstruction()->getFunction();
1132 Function *Func = getAssociatedFunction();
1133 LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
1134 << "->" << Func->getName() << '\n');
1135 (void)Func;
1136
1137 const auto *CallerAA = A.getAAFor<AAAMDWavesPerEU>(
1138 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
1139 if (!CallerAA || !CallerAA->isValidState())
1140 return false;
1141
1142 ConstantRange Assumed = getAssumed();
1143 unsigned Min = std::max(Assumed.getLower().getZExtValue(),
1144 CallerAA->getAssumed().getLower().getZExtValue());
1145 unsigned Max = std::max(Assumed.getUpper().getZExtValue(),
1146 CallerAA->getAssumed().getUpper().getZExtValue());
1147 ConstantRange Range(APInt(32, Min), APInt(32, Max));
1148 IntegerRangeState RangeState(Range);
1149 getState() = RangeState;
1150 Change |= getState() == Assumed ? ChangeStatus::UNCHANGED
1151 : ChangeStatus::CHANGED;
1152
1153 return true;
1154 };
1155
1156 bool AllCallSitesKnown = true;
1157 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
1158 return indicatePessimisticFixpoint();
1159
1160 return Change;
1161 }
1162
1163 /// Create an abstract attribute view for the position \p IRP.
1164 static AAAMDWavesPerEU &createForPosition(const IRPosition &IRP,
1165 Attributor &A);
1166
1167 ChangeStatus manifest(Attributor &A) override {
1168 Function *F = getAssociatedFunction();
1169 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1170 return emitAttributeIfNotDefaultAfterClamp(
1171 A, {1U, InfoCache.getMaxWavesPerEU(*F)});
1172 }
1173
1174 /// See AbstractAttribute::getName()
1175 StringRef getName() const override { return "AAAMDWavesPerEU"; }
1176
1177 /// See AbstractAttribute::getIdAddr()
1178 const char *getIdAddr() const override { return &ID; }
1179
1180 /// This function should return true if the type of the \p AA is
1181 /// AAAMDWavesPerEU
1182 static bool classof(const AbstractAttribute *AA) {
1183 return (AA->getIdAddr() == &ID);
1184 }
1185
1186 /// Unique ID (due to the unique address)
1187 static const char ID;
1188};
1189
1190const char AAAMDWavesPerEU::ID = 0;
1191
1192AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
1193 Attributor &A) {
1195 return *new (A.Allocator) AAAMDWavesPerEU(IRP, A);
1196 llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
1197}
1198
1199/// Compute the minimum number of AGPRs required to allocate the inline asm.
1200static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
1201 const CallBase &Call) {
1202 unsigned ArgNo = 0;
1203 unsigned ResNo = 0;
1204 unsigned AGPRDefCount = 0;
1205 unsigned AGPRUseCount = 0;
1206 unsigned MaxPhysReg = 0;
1207 const DataLayout &DL = Call.getFunction()->getParent()->getDataLayout();
1208
1209 // TODO: Overestimates due to not accounting for tied operands
1210 for (const InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
1211 Type *Ty = nullptr;
1212 switch (CI.Type) {
1213 case InlineAsm::isOutput: {
1214 Ty = Call.getType();
1215 if (auto *STy = dyn_cast<StructType>(Ty))
1216 Ty = STy->getElementType(ResNo);
1217 ++ResNo;
1218 break;
1219 }
1220 case InlineAsm::isInput: {
1221 Ty = Call.getArgOperand(ArgNo++)->getType();
1222 break;
1223 }
1224 case InlineAsm::isLabel:
1225 continue;
1227 // Parse the physical register reference.
1228 break;
1229 }
1230
1231 for (StringRef Code : CI.Codes) {
1232 unsigned RegCount = 0;
1233 if (Code.starts_with("a")) {
1234 // Virtual register, compute number of registers based on the type.
1235 //
1236 // We ought to be going through TargetLowering to get the number of
1237 // registers, but we should avoid the dependence on CodeGen here.
1238 RegCount = divideCeil(DL.getTypeSizeInBits(Ty), 32);
1239 } else {
1240 // Physical register reference
1241 auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg(Code);
1242 if (Kind == 'a') {
1243 RegCount = NumRegs;
1244 MaxPhysReg = std::max(MaxPhysReg, std::min(RegIdx + NumRegs, 256u));
1245 }
1246
1247 continue;
1248 }
1249
1250 if (CI.Type == InlineAsm::isOutput) {
1251 // Apply tuple alignment requirement
1252 //
1253 // TODO: This is more conservative than necessary.
1254 AGPRDefCount = alignTo(AGPRDefCount, RegCount);
1255
1256 AGPRDefCount += RegCount;
1257 if (CI.isEarlyClobber) {
1258 AGPRUseCount = alignTo(AGPRUseCount, RegCount);
1259 AGPRUseCount += RegCount;
1260 }
1261 } else {
1262 AGPRUseCount = alignTo(AGPRUseCount, RegCount);
1263 AGPRUseCount += RegCount;
1264 }
1265 }
1266 }
1267
1268 unsigned MaxVirtReg = std::max(AGPRUseCount, AGPRDefCount);
1269
1270 // TODO: This is overly conservative. If there are any physical registers,
1271 // allocate any virtual registers after them so we don't have to solve optimal
1272 // packing.
1273 return std::min(MaxVirtReg + MaxPhysReg, 256u);
1274}
1275
1276struct AAAMDGPUMinAGPRAlloc
1277 : public StateWrapper<DecIntegerState<>, AbstractAttribute> {
1278 using Base = StateWrapper<DecIntegerState<>, AbstractAttribute>;
1279 AAAMDGPUMinAGPRAlloc(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1280
1281 static AAAMDGPUMinAGPRAlloc &createForPosition(const IRPosition &IRP,
1282 Attributor &A) {
1284 return *new (A.Allocator) AAAMDGPUMinAGPRAlloc(IRP, A);
1286 "AAAMDGPUMinAGPRAlloc is only valid for function position");
1287 }
1288
1289 void initialize(Attributor &A) override {
1290 Function *F = getAssociatedFunction();
1291 auto [MinNumAGPR, MaxNumAGPR] =
1292 AMDGPU::getIntegerPairAttribute(*F, "amdgpu-agpr-alloc", {~0u, ~0u},
1293 /*OnlyFirstRequired=*/true);
1294 if (MinNumAGPR == 0)
1295 indicateOptimisticFixpoint();
1296 }
1297
1298 const std::string getAsStr(Attributor *A) const override {
1299 std::string Str = "amdgpu-agpr-alloc=";
1300 raw_string_ostream OS(Str);
1301 OS << getAssumed();
1302 return OS.str();
1303 }
1304
1305 void trackStatistics() const override {}
1306
1307 ChangeStatus updateImpl(Attributor &A) override {
1308 DecIntegerState<> Maximum;
1309
1310 // Check for cases which require allocation of AGPRs. The only cases where
1311 // AGPRs are required are if there are direct references to AGPRs, so inline
1312 // assembly and special intrinsics.
1313 auto CheckForMinAGPRAllocs = [&](Instruction &I) {
1314 const auto &CB = cast<CallBase>(I);
1315 const Value *CalleeOp = CB.getCalledOperand();
1316
1317 if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp)) {
1318 // Technically, the inline asm could be invoking a call to an unknown
1319 // external function that requires AGPRs, but ignore that.
1320 unsigned NumRegs = inlineAsmGetNumRequiredAGPRs(IA, CB);
1321 Maximum.takeAssumedMaximum(NumRegs);
1322 return true;
1323 }
1324 switch (CB.getIntrinsicID()) {
1326 break;
1327 case Intrinsic::write_register:
1328 case Intrinsic::read_register:
1329 case Intrinsic::read_volatile_register: {
1330 const MDString *RegName = cast<MDString>(
1332 cast<MetadataAsValue>(CB.getArgOperand(0))->getMetadata())
1333 ->getOperand(0));
1334 auto [Kind, RegIdx, NumRegs] =
1336 if (Kind == 'a')
1337 Maximum.takeAssumedMaximum(std::min(RegIdx + NumRegs, 256u));
1338
1339 return true;
1340 }
1341 // Trap-like intrinsics such as llvm.trap and llvm.debugtrap do not have
1342 // the nocallback attribute, so the AMDGPU attributor can conservatively
1343 // drop all implicitly-known inputs and AGPR allocation information. Make
1344 // sure we still infer that no implicit inputs are required and that the
1345 // AGPR allocation stays at zero. Trap-like intrinsics may invoke a
1346 // function which requires AGPRs, so we need to check if the called
1347 // function has the "trap-func-name" attribute.
1348 case Intrinsic::trap:
1349 case Intrinsic::debugtrap:
1350 case Intrinsic::ubsantrap:
1351 return CB.hasFnAttr(Attribute::NoCallback) ||
1352 !CB.hasFnAttr("trap-func-name");
1353 default:
1354 // Some intrinsics may use AGPRs, but if we have a choice, we are not
1355 // required to use AGPRs.
1356 // Assume !nocallback intrinsics may call a function which requires
1357 // AGPRs.
1358 return CB.hasFnAttr(Attribute::NoCallback);
1359 }
1360
1361 // TODO: Handle callsite attributes
1362 auto *CBEdges = A.getAAFor<AACallEdges>(
1363 *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
1364 if (!CBEdges || CBEdges->hasUnknownCallee()) {
1366 return false;
1367 }
1368
1369 for (const Function *PossibleCallee : CBEdges->getOptimisticEdges()) {
1370 const auto *CalleeInfo = A.getAAFor<AAAMDGPUMinAGPRAlloc>(
1371 *this, IRPosition::function(*PossibleCallee), DepClassTy::REQUIRED);
1372 if (!CalleeInfo || !CalleeInfo->isValidState()) {
1374 return false;
1375 }
1376
1377 Maximum.takeAssumedMaximum(CalleeInfo->getAssumed());
1378 }
1379
1380 return true;
1381 };
1382
1383 bool UsedAssumedInformation = false;
1384 if (!A.checkForAllCallLikeInstructions(CheckForMinAGPRAllocs, *this,
1385 UsedAssumedInformation))
1386 return indicatePessimisticFixpoint();
1387
1388 return clampStateAndIndicateChange(getState(), Maximum);
1389 }
1390
1391 ChangeStatus manifest(Attributor &A) override {
1392 LLVMContext &Ctx = getAssociatedFunction()->getContext();
1393 SmallString<4> Buffer;
1394 raw_svector_ostream OS(Buffer);
1395 OS << getAssumed();
1396
1397 return A.manifestAttrs(
1398 getIRPosition(), {Attribute::get(Ctx, "amdgpu-agpr-alloc", OS.str())});
1399 }
1400
1401 StringRef getName() const override { return "AAAMDGPUMinAGPRAlloc"; }
1402 const char *getIdAddr() const override { return &ID; }
1403
1404 /// This function should return true if the type of the \p AA is
1405 /// AAAMDGPUMinAGPRAllocs
1406 static bool classof(const AbstractAttribute *AA) {
1407 return (AA->getIdAddr() == &ID);
1408 }
1409
1410 static const char ID;
1411};
1412
1413const char AAAMDGPUMinAGPRAlloc::ID = 0;
1414
1415/// An abstract attribute to propagate the function attribute
1416/// "amdgpu-cluster-dims" from kernel entry functions to device functions.
1417struct AAAMDGPUClusterDims
1418 : public StateWrapper<BooleanState, AbstractAttribute> {
1419 using Base = StateWrapper<BooleanState, AbstractAttribute>;
1420 AAAMDGPUClusterDims(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1421
1422 /// Create an abstract attribute view for the position \p IRP.
1423 static AAAMDGPUClusterDims &createForPosition(const IRPosition &IRP,
1424 Attributor &A);
1425
1426 /// See AbstractAttribute::getName().
1427 StringRef getName() const override { return "AAAMDGPUClusterDims"; }
1428
1429 /// See AbstractAttribute::getIdAddr().
1430 const char *getIdAddr() const override { return &ID; }
1431
1432 /// This function should return true if the type of the \p AA is
1433 /// AAAMDGPUClusterDims.
1434 static bool classof(const AbstractAttribute *AA) {
1435 return AA->getIdAddr() == &ID;
1436 }
1437
1438 virtual const AMDGPU::ClusterDimsAttr &getClusterDims() const = 0;
1439
1440 /// Unique ID (due to the unique address)
1441 static const char ID;
1442};
1443
1444const char AAAMDGPUClusterDims::ID = 0;
1445
1446struct AAAMDGPUClusterDimsFunction : public AAAMDGPUClusterDims {
1447 AAAMDGPUClusterDimsFunction(const IRPosition &IRP, Attributor &A)
1448 : AAAMDGPUClusterDims(IRP, A) {}
1449
1450 void initialize(Attributor &A) override {
1451 Function *F = getAssociatedFunction();
1452 assert(F && "empty associated function");
1453
1455
1456 // No matter what a kernel function has, it is final.
1457 if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
1458 if (Attr.isUnknown())
1459 indicatePessimisticFixpoint();
1460 else
1461 indicateOptimisticFixpoint();
1462 }
1463 }
1464
1465 const std::string getAsStr(Attributor *A) const override {
1466 if (!getAssumed() || Attr.isUnknown())
1467 return "unknown";
1468 if (Attr.isNoCluster())
1469 return "no";
1470 if (Attr.isVariableDims())
1471 return "variable";
1472 return Attr.to_string();
1473 }
1474
1475 void trackStatistics() const override {}
1476
1477 ChangeStatus updateImpl(Attributor &A) override {
1478 auto OldState = Attr;
1479
1480 auto CheckCallSite = [&](AbstractCallSite CS) {
1481 const auto *CallerAA = A.getAAFor<AAAMDGPUClusterDims>(
1482 *this, IRPosition::function(*CS.getInstruction()->getFunction()),
1483 DepClassTy::REQUIRED);
1484 if (!CallerAA || !CallerAA->isValidState())
1485 return false;
1486
1487 return merge(CallerAA->getClusterDims());
1488 };
1489
1490 bool UsedAssumedInformation = false;
1491 if (!A.checkForAllCallSites(CheckCallSite, *this,
1492 /*RequireAllCallSites=*/true,
1493 UsedAssumedInformation))
1494 return indicatePessimisticFixpoint();
1495
1496 return OldState == Attr ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED;
1497 }
1498
1499 ChangeStatus manifest(Attributor &A) override {
1500 if (Attr.isUnknown())
1501 return ChangeStatus::UNCHANGED;
1502 return A.manifestAttrs(
1503 getIRPosition(),
1504 {Attribute::get(getAssociatedFunction()->getContext(), AttrName,
1505 Attr.to_string())},
1506 /*ForceReplace=*/true);
1507 }
1508
1509 const AMDGPU::ClusterDimsAttr &getClusterDims() const override {
1510 return Attr;
1511 }
1512
1513private:
1514 bool merge(const AMDGPU::ClusterDimsAttr &Other) {
1515 // Case 1: Both of them are unknown yet, we do nothing and continue wait for
1516 // propagation.
1517 if (Attr.isUnknown() && Other.isUnknown())
1518 return true;
1519
1520 // Case 2: The other is determined, but we are unknown yet, we simply take
1521 // the other's value.
1522 if (Attr.isUnknown()) {
1523 Attr = Other;
1524 return true;
1525 }
1526
1527 // Case 3: We are determined but the other is unknown yet, we simply keep
1528 // everything unchanged.
1529 if (Other.isUnknown())
1530 return true;
1531
1532 // After this point, both are determined.
1533
1534 // Case 4: If they are same, we do nothing.
1535 if (Attr == Other)
1536 return true;
1537
1538 // Now they are not same.
1539
1540 // Case 5: If either of us uses cluster (but not both; otherwise case 4
1541 // would hold), then it is unknown whether cluster will be used, and the
1542 // state is final, unlike case 1.
1543 if (Attr.isNoCluster() || Other.isNoCluster()) {
1544 Attr.setUnknown();
1545 return false;
1546 }
1547
1548 // Case 6: Both of us use cluster, but the dims are different, so the result
1549 // is, cluster is used, but we just don't have a fixed dims.
1550 Attr.setVariableDims();
1551 return true;
1552 }
1553
1554 AMDGPU::ClusterDimsAttr Attr;
1555
1556 static constexpr char AttrName[] = "amdgpu-cluster-dims";
1557};
1558
1559AAAMDGPUClusterDims &
1560AAAMDGPUClusterDims::createForPosition(const IRPosition &IRP, Attributor &A) {
1562 return *new (A.Allocator) AAAMDGPUClusterDimsFunction(IRP, A);
1563 llvm_unreachable("AAAMDGPUClusterDims is only valid for function position");
1564}
1565
1566static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
1567 AMDGPUAttributorOptions Options,
1568 ThinOrFullLTOPhase LTOPhase) {
1569 SetVector<Function *> Functions;
1570 for (Function &F : M) {
1571 if (!F.isIntrinsic())
1572 Functions.insert(&F);
1573 }
1574
1575 CallGraphUpdater CGUpdater;
1577 AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, TM);
1578 DenseSet<const char *> Allowed(
1579 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
1580 &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
1581 &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID,
1582 &AAAMDGPUMinAGPRAlloc::ID, &AACallEdges::ID, &AAPointerInfo::ID,
1585 &AAAMDGPUClusterDims::ID, &AAAlign::ID});
1586
1587 AttributorConfig AC(CGUpdater);
1588 AC.IsClosedWorldModule = Options.IsClosedWorld;
1589 AC.Allowed = &Allowed;
1590 AC.IsModulePass = true;
1591 AC.DefaultInitializeLiveInternals = false;
1592 AC.IndirectCalleeSpecializationCallback =
1593 [](Attributor &A, const AbstractAttribute &AA, CallBase &CB,
1594 Function &Callee, unsigned NumAssumedCallees) {
1595 return !AMDGPU::isEntryFunctionCC(Callee.getCallingConv()) &&
1596 (NumAssumedCallees <= IndirectCallSpecializationThreshold);
1597 };
1598 AC.IPOAmendableCB = [](const Function &F) {
1599 return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
1600 };
1601
1602 Attributor A(Functions, InfoCache, AC);
1603
1604 LLVM_DEBUG({
1605 StringRef LTOPhaseStr = to_string(LTOPhase);
1606 dbgs() << "[AMDGPUAttributor] Running at phase " << LTOPhaseStr << '\n'
1607 << "[AMDGPUAttributor] Module " << M.getName() << " is "
1608 << (AC.IsClosedWorldModule ? "" : "not ")
1609 << "assumed to be a closed world.\n";
1610 });
1611
1612 for (auto *F : Functions) {
1613 A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(*F));
1614 A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(*F));
1615 A.getOrCreateAAFor<AAAMDMaxNumWorkgroups>(IRPosition::function(*F));
1616 CallingConv::ID CC = F->getCallingConv();
1617 if (!AMDGPU::isEntryFunctionCC(CC)) {
1618 A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(*F));
1619 A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(*F));
1620 }
1621
1622 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(*F);
1623 if (!F->isDeclaration() && ST.hasClusters())
1624 A.getOrCreateAAFor<AAAMDGPUClusterDims>(IRPosition::function(*F));
1625
1626 if (ST.hasGFX90AInsts())
1627 A.getOrCreateAAFor<AAAMDGPUMinAGPRAlloc>(IRPosition::function(*F));
1628
1629 for (auto &I : instructions(F)) {
1630 Value *Ptr = nullptr;
1631 if (auto *LI = dyn_cast<LoadInst>(&I))
1632 Ptr = LI->getPointerOperand();
1633 else if (auto *SI = dyn_cast<StoreInst>(&I))
1634 Ptr = SI->getPointerOperand();
1635 else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I))
1636 Ptr = RMW->getPointerOperand();
1637 else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I))
1638 Ptr = CmpX->getPointerOperand();
1639
1640 if (Ptr) {
1641 A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(*Ptr));
1642 A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(*Ptr));
1643 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Ptr)) {
1644 if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc)
1645 A.getOrCreateAAFor<AAAlign>(IRPosition::value(*Ptr));
1646 }
1647 }
1648 }
1649 }
1650
1651 return A.run() == ChangeStatus::CHANGED;
1652}
1653} // namespace
1654
1657
1660 AnalysisGetter AG(FAM);
1661
1662 // TODO: Probably preserves CFG
1663 return runImpl(M, AG, TM, Options, LTOPhase) ? PreservedAnalyses::none()
1665}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isDSAddress(const Constant *C)
static constexpr std::pair< ImplicitArgumentMask, StringLiteral > ImplicitAttrs[]
static cl::opt< unsigned > IndirectCallSpecializationThreshold("amdgpu-indirect-call-specialization-threshold", cl::desc("A threshold controls whether an indirect call will be specialized"), cl::init(3))
static ImplicitArgumentMask intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, bool HasApertureRegs, bool SupportsGetDoorBellID, unsigned CodeObjectVersion)
static bool hasSanitizerAttributes(const Function &F)
Returns true if sanitizer attributes are present on a function.
ImplicitArgumentMask
@ UNKNOWN_INTRINSIC
@ NOT_IMPLICIT_INPUT
@ ALL_ARGUMENT_MASK
ImplicitArgumentPositions
@ LAST_ARG_POS
static bool castRequiresQueuePtr(unsigned SrcAS)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
DXIL Resource Access
@ Default
static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)
AMD GCN specific subclass of TargetSubtarget.
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
static StringRef getName(Value *V)
Basic Register Allocator
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ClusterDimsAttr get(const Function &F)
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1555
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Value * getArgOperand(unsigned i) const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
const APInt & getLower() const
Return the lower value for this range.
const APInt & getUpper() const
Return the upper value for this range.
This is an important base class in LLVM.
Definition Constant.h:43
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
unsigned getAddressSpace() const
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition Module.h:278
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void push_back(const T &Elt)
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:222
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
LLVM_ABI bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
Definition User.cpp:119
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getAMDHSACodeObjectVersion(const Module &M)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
std::tuple< char, unsigned, unsigned > parseAsmPhysRegName(StringRef RegName)
Returns a valid charcode or 0 in the first entry if this is a valid physical register name.
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
std::tuple< char, unsigned, unsigned > parseAsmConstraintPhysReg(StringRef Constraint)
Returns a valid charcode or 0 in the first entry if this is a valid physical register constraint.
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
E & operator^=(E &LHS, E RHS)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
initializer< Ty > init(const Ty &Val)
NodeAddr< CodeNode * > Code
Definition RDFGraph.h:388
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition Pass.h:77
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
const char * to_string(ThinOrFullLTOPhase Phase)
Definition Pass.cpp:301
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ Other
Any other memory.
Definition ModRef.h:68
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R)
Helper function to clamp a state S of type StateType with the information in R and indicate/return if...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
ChangeStatus
{
Definition Attributor.h:496
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
BumpPtrAllocatorImpl<> BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition Allocator.h:383
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual const SetVector< Function * > & getOptimisticEdges() const =0
Get the optimistic edges.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual bool hasNonAsmUnknownCallee() const =0
Is there any call with a unknown callee, excluding any inline asm.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
Instruction * getRemoteInst() const
Return the actual instruction that causes the access.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual const char * getIdAddr() const =0
This function should return the address of the ID of the AbstractAttribute.
Wrapper for FunctionAnalysisManager.
The fixpoint analysis framework that orchestrates the attribute deduction.
DecIntegerState & takeAssumedMaximum(base_t Value)
Take maximum of assumed and Value.
Helper to describe and deal with positions in the LLVM-IR.
Definition Attributor.h:593
static const IRPosition callsite_returned(const CallBase &CB)
Create a position describing the returned value of CB.
Definition Attributor.h:661
static const IRPosition value(const Value &V, const CallBaseContext *CBContext=nullptr)
Create a position describing the value of V.
Definition Attributor.h:617
@ IRP_FUNCTION
An attribute for a function (scope).
Definition Attributor.h:605
static const IRPosition function(const Function &F, const CallBaseContext *CBContext=nullptr)
Create a position describing the function scope of F.
Definition Attributor.h:636
Kind getPositionKind() const
Return the associated position kind.
Definition Attributor.h:889
static const IRPosition callsite_function(const CallBase &CB)
Create a position describing the function scope of CB.
Definition Attributor.h:656
Data structure to hold cached (LLVM-IR) information.
bool isValidState() const override
See AbstractState::isValidState() NOTE: For now we simply pretend that the worst possible state is in...
ChangeStatus indicatePessimisticFixpoint() override
See AbstractState::indicatePessimisticFixpoint(...)
Helper to tie a abstract state implementation to an abstract attribute.