LLVM  17.0.0git
AMDGPUAttributor.cpp
Go to the documentation of this file.
1 //===- AMDGPUAttributor.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "GCNSubtarget.h"
15 #include "Utils/AMDGPUBaseInfo.h"
18 #include "llvm/IR/IntrinsicsAMDGPU.h"
19 #include "llvm/IR/IntrinsicsR600.h"
22 
23 #define DEBUG_TYPE "amdgpu-attributor"
24 
25 namespace llvm {
27 }
28 
29 using namespace llvm;
30 
31 #define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
32 
34  #include "AMDGPUAttributes.def"
36 };
37 
38 #define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
39 
42  #include "AMDGPUAttributes.def"
44 };
45 
46 #define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
47 static constexpr std::pair<ImplicitArgumentMask,
49  #include "AMDGPUAttributes.def"
50 };
51 
52 // We do not need to note the x workitem or workgroup id because they are always
53 // initialized.
54 //
55 // TODO: We should not add the attributes if the known compile time workgroup
56 // size is 1 for y/z.
58 intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
59  bool HasApertureRegs, bool SupportsGetDoorBellID) {
60  unsigned CodeObjectVersion = AMDGPU::getAmdhsaCodeObjectVersion();
61  switch (ID) {
62  case Intrinsic::amdgcn_workitem_id_x:
63  NonKernelOnly = true;
64  return WORKITEM_ID_X;
65  case Intrinsic::amdgcn_workgroup_id_x:
66  NonKernelOnly = true;
67  return WORKGROUP_ID_X;
68  case Intrinsic::amdgcn_workitem_id_y:
69  case Intrinsic::r600_read_tidig_y:
70  return WORKITEM_ID_Y;
71  case Intrinsic::amdgcn_workitem_id_z:
72  case Intrinsic::r600_read_tidig_z:
73  return WORKITEM_ID_Z;
74  case Intrinsic::amdgcn_workgroup_id_y:
75  case Intrinsic::r600_read_tgid_y:
76  return WORKGROUP_ID_Y;
77  case Intrinsic::amdgcn_workgroup_id_z:
78  case Intrinsic::r600_read_tgid_z:
79  return WORKGROUP_ID_Z;
80  case Intrinsic::amdgcn_lds_kernel_id:
81  return LDS_KERNEL_ID;
82  case Intrinsic::amdgcn_dispatch_ptr:
83  return DISPATCH_PTR;
84  case Intrinsic::amdgcn_dispatch_id:
85  return DISPATCH_ID;
86  case Intrinsic::amdgcn_implicitarg_ptr:
87  return IMPLICIT_ARG_PTR;
88  // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access
89  // queue_ptr.
90  case Intrinsic::amdgcn_queue_ptr:
91  NeedsImplicit = (CodeObjectVersion == 5);
92  return QUEUE_PTR;
93  case Intrinsic::amdgcn_is_shared:
94  case Intrinsic::amdgcn_is_private:
95  if (HasApertureRegs)
96  return NOT_IMPLICIT_INPUT;
97  // Under V5, we need implicitarg_ptr + offsets to access private_base or
98  // shared_base. For pre-V5, however, need to access them through queue_ptr +
99  // offsets.
100  return CodeObjectVersion == 5 ? IMPLICIT_ARG_PTR : QUEUE_PTR;
101  case Intrinsic::trap:
102  if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.
103  return CodeObjectVersion >= 4 ? NOT_IMPLICIT_INPUT : QUEUE_PTR;
104  NeedsImplicit = (CodeObjectVersion == 5); // Need impicitarg_ptr under V5.
105  return QUEUE_PTR;
106  default:
107  return NOT_IMPLICIT_INPUT;
108  }
109 }
110 
111 static bool castRequiresQueuePtr(unsigned SrcAS) {
112  return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
113 }
114 
115 static bool isDSAddress(const Constant *C) {
116  const GlobalValue *GV = dyn_cast<GlobalValue>(C);
117  if (!GV)
118  return false;
119  unsigned AS = GV->getAddressSpace();
121 }
122 
123 /// Returns true if the function requires the implicit argument be passed
124 /// regardless of the function contents.
125 static bool funcRequiresHostcallPtr(const Function &F) {
126  // Sanitizers require the hostcall buffer passed in the implicit arguments.
127  return F.hasFnAttribute(Attribute::SanitizeAddress) ||
128  F.hasFnAttribute(Attribute::SanitizeThread) ||
129  F.hasFnAttribute(Attribute::SanitizeMemory) ||
130  F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
131  F.hasFnAttribute(Attribute::SanitizeMemTag);
132 }
133 
134 namespace {
135 class AMDGPUInformationCache : public InformationCache {
136 public:
137  AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
140  : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {}
141  TargetMachine &TM;
142 
143  enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
144 
145  /// Check if the subtarget has aperture regs.
146  bool hasApertureRegs(Function &F) {
147  const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
148  return ST.hasApertureRegs();
149  }
150 
151  /// Check if the subtarget supports GetDoorbellID.
152  bool supportsGetDoorbellID(Function &F) {
153  const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
154  return ST.supportsGetDoorbellID();
155  }
156 
157  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {
158  const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
159  return ST.getFlatWorkGroupSizes(F);
160  }
161 
162  std::pair<unsigned, unsigned>
163  getMaximumFlatWorkGroupRange(const Function &F) {
164  const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
165  return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
166  }
167 
168 private:
169  /// Check if the ConstantExpr \p CE requires the queue pointer.
170  static bool visitConstExpr(const ConstantExpr *CE) {
171  if (CE->getOpcode() == Instruction::AddrSpaceCast) {
172  unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
173  return castRequiresQueuePtr(SrcAS);
174  }
175  return false;
176  }
177 
178  /// Get the constant access bitmap for \p C.
179  uint8_t getConstantAccess(const Constant *C) {
180  auto It = ConstantStatus.find(C);
181  if (It != ConstantStatus.end())
182  return It->second;
183 
184  uint8_t Result = 0;
185  if (isDSAddress(C))
186  Result = DS_GLOBAL;
187 
188  if (const auto *CE = dyn_cast<ConstantExpr>(C))
189  if (visitConstExpr(CE))
190  Result |= ADDR_SPACE_CAST;
191 
192  for (const Use &U : C->operands()) {
193  const auto *OpC = dyn_cast<Constant>(U);
194  if (!OpC)
195  continue;
196 
197  Result |= getConstantAccess(OpC);
198  }
199  return Result;
200  }
201 
202 public:
203  /// Returns true if \p Fn needs the queue pointer because of \p C.
204  bool needsQueuePtr(const Constant *C, Function &Fn) {
205  bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
206  bool HasAperture = hasApertureRegs(Fn);
207 
208  // No need to explore the constants.
209  if (!IsNonEntryFunc && HasAperture)
210  return false;
211 
212  uint8_t Access = getConstantAccess(C);
213 
214  // We need to trap on DS globals in non-entry functions.
215  if (IsNonEntryFunc && (Access & DS_GLOBAL))
216  return true;
217 
218  return !HasAperture && (Access & ADDR_SPACE_CAST);
219  }
220 
221 private:
222  /// Used to determine if the Constant needs the queue pointer.
224 };
225 
226 struct AAAMDAttributes
227  : public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
228  AbstractAttribute> {
231 
232  AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
233 
234  /// Create an abstract attribute view for the position \p IRP.
235  static AAAMDAttributes &createForPosition(const IRPosition &IRP,
236  Attributor &A);
237 
238  /// See AbstractAttribute::getName().
239  const std::string getName() const override { return "AAAMDAttributes"; }
240 
241  /// See AbstractAttribute::getIdAddr().
242  const char *getIdAddr() const override { return &ID; }
243 
244  /// This function should return true if the type of the \p AA is
245  /// AAAMDAttributes.
246  static bool classof(const AbstractAttribute *AA) {
247  return (AA->getIdAddr() == &ID);
248  }
249 
250  /// Unique ID (due to the unique address)
251  static const char ID;
252 };
253 const char AAAMDAttributes::ID = 0;
254 
255 struct AAUniformWorkGroupSize
256  : public StateWrapper<BooleanState, AbstractAttribute> {
258  AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
259 
260  /// Create an abstract attribute view for the position \p IRP.
261  static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
262  Attributor &A);
263 
264  /// See AbstractAttribute::getName().
265  const std::string getName() const override {
266  return "AAUniformWorkGroupSize";
267  }
268 
269  /// See AbstractAttribute::getIdAddr().
270  const char *getIdAddr() const override { return &ID; }
271 
272  /// This function should return true if the type of the \p AA is
273  /// AAAMDAttributes.
274  static bool classof(const AbstractAttribute *AA) {
275  return (AA->getIdAddr() == &ID);
276  }
277 
278  /// Unique ID (due to the unique address)
279  static const char ID;
280 };
281 const char AAUniformWorkGroupSize::ID = 0;
282 
283 struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
284  AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
285  : AAUniformWorkGroupSize(IRP, A) {}
286 
287  void initialize(Attributor &A) override {
288  Function *F = getAssociatedFunction();
289  CallingConv::ID CC = F->getCallingConv();
290 
292  return;
293 
294  bool InitialValue = false;
295  if (F->hasFnAttribute("uniform-work-group-size"))
296  InitialValue = F->getFnAttribute("uniform-work-group-size")
297  .getValueAsString()
298  .equals("true");
299 
300  if (InitialValue)
301  indicateOptimisticFixpoint();
302  else
303  indicatePessimisticFixpoint();
304  }
305 
306  ChangeStatus updateImpl(Attributor &A) override {
308 
309  auto CheckCallSite = [&](AbstractCallSite CS) {
310  Function *Caller = CS.getInstruction()->getFunction();
311  LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
312  << "->" << getAssociatedFunction()->getName() << "\n");
313 
314  const auto &CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
315  *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
316 
317  Change = Change | clampStateAndIndicateChange(this->getState(),
318  CallerInfo.getState());
319 
320  return true;
321  };
322 
323  bool AllCallSitesKnown = true;
324  if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
325  return indicatePessimisticFixpoint();
326 
327  return Change;
328  }
329 
330  ChangeStatus manifest(Attributor &A) override {
331  SmallVector<Attribute, 8> AttrList;
332  LLVMContext &Ctx = getAssociatedFunction()->getContext();
333 
334  AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
335  getAssumed() ? "true" : "false"));
336  return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
337  /* ForceReplace */ true);
338  }
339 
340  bool isValidState() const override {
341  // This state is always valid, even when the state is false.
342  return true;
343  }
344 
345  const std::string getAsStr() const override {
346  return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
347  }
348 
349  /// See AbstractAttribute::trackStatistics()
350  void trackStatistics() const override {}
351 };
352 
353 AAUniformWorkGroupSize &
354 AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
355  Attributor &A) {
357  return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
359  "AAUniformWorkGroupSize is only valid for function position");
360 }
361 
362 struct AAAMDAttributesFunction : public AAAMDAttributes {
363  AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
364  : AAAMDAttributes(IRP, A) {}
365 
366  void initialize(Attributor &A) override {
367  Function *F = getAssociatedFunction();
368 
369  // If the function requires the implicit arg pointer due to sanitizers,
370  // assume it's needed even if explicitly marked as not requiring it.
371  const bool NeedsHostcall = funcRequiresHostcallPtr(*F);
372  if (NeedsHostcall) {
373  removeAssumedBits(IMPLICIT_ARG_PTR);
374  removeAssumedBits(HOSTCALL_PTR);
375  }
376 
377  for (auto Attr : ImplicitAttrs) {
378  if (NeedsHostcall &&
379  (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
380  continue;
381 
382  if (F->hasFnAttribute(Attr.second))
383  addKnownBits(Attr.first);
384  }
385 
386  if (F->isDeclaration())
387  return;
388 
389  // Ignore functions with graphics calling conventions, these are currently
390  // not allowed to have kernel arguments.
391  if (AMDGPU::isGraphics(F->getCallingConv())) {
392  indicatePessimisticFixpoint();
393  return;
394  }
395  }
396 
397  ChangeStatus updateImpl(Attributor &A) override {
398  Function *F = getAssociatedFunction();
399  // The current assumed state used to determine a change.
400  auto OrigAssumed = getAssumed();
401 
402  // Check for Intrinsics and propagate attributes.
403  const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
404  *this, this->getIRPosition(), DepClassTy::REQUIRED);
405  if (AAEdges.hasNonAsmUnknownCallee())
406  return indicatePessimisticFixpoint();
407 
408  bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
409 
410  bool NeedsImplicit = false;
411  auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
412  bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
413  bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F);
414 
415  for (Function *Callee : AAEdges.getOptimisticEdges()) {
416  Intrinsic::ID IID = Callee->getIntrinsicID();
417  if (IID == Intrinsic::not_intrinsic) {
418  const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
419  *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
420  *this &= AAAMD;
421  continue;
422  }
423 
424  bool NonKernelOnly = false;
425  ImplicitArgumentMask AttrMask =
426  intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
427  HasApertureRegs, SupportsGetDoorbellID);
428  if (AttrMask != NOT_IMPLICIT_INPUT) {
429  if ((IsNonEntryFunc || !NonKernelOnly))
430  removeAssumedBits(AttrMask);
431  }
432  }
433 
434  // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base.
435  if (NeedsImplicit)
436  removeAssumedBits(IMPLICIT_ARG_PTR);
437 
438  if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {
439  // Under V5, we need implicitarg_ptr + offsets to access private_base or
440  // shared_base. We do not actually need queue_ptr.
442  removeAssumedBits(IMPLICIT_ARG_PTR);
443  else
444  removeAssumedBits(QUEUE_PTR);
445  }
446 
447  if (funcRetrievesMultigridSyncArg(A)) {
448  assert(!isAssumed(IMPLICIT_ARG_PTR) &&
449  "multigrid_sync_arg needs implicitarg_ptr");
450  removeAssumedBits(MULTIGRID_SYNC_ARG);
451  }
452 
453  if (funcRetrievesHostcallPtr(A)) {
454  assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");
455  removeAssumedBits(HOSTCALL_PTR);
456  }
457 
458  if (funcRetrievesHeapPtr(A)) {
459  assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");
460  removeAssumedBits(HEAP_PTR);
461  }
462 
463  if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A)) {
464  assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");
465  removeAssumedBits(QUEUE_PTR);
466  }
467 
468  if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) {
469  removeAssumedBits(LDS_KERNEL_ID);
470  }
471 
472  if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A))
473  removeAssumedBits(DEFAULT_QUEUE);
474 
475  if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A))
476  removeAssumedBits(COMPLETION_ACTION);
477 
478  return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
480  }
481 
482  ChangeStatus manifest(Attributor &A) override {
483  SmallVector<Attribute, 8> AttrList;
484  LLVMContext &Ctx = getAssociatedFunction()->getContext();
485 
486  for (auto Attr : ImplicitAttrs) {
487  if (isKnown(Attr.first))
488  AttrList.push_back(Attribute::get(Ctx, Attr.second));
489  }
490 
491  return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
492  /* ForceReplace */ true);
493  }
494 
495  const std::string getAsStr() const override {
496  std::string Str;
497  raw_string_ostream OS(Str);
498  OS << "AMDInfo[";
499  for (auto Attr : ImplicitAttrs)
500  OS << ' ' << Attr.second;
501  OS << " ]";
502  return OS.str();
503  }
504 
505  /// See AbstractAttribute::trackStatistics()
506  void trackStatistics() const override {}
507 
508 private:
509  bool checkForQueuePtr(Attributor &A) {
510  Function *F = getAssociatedFunction();
511  bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
512 
513  auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
514 
515  bool NeedsQueuePtr = false;
516 
517  auto CheckAddrSpaceCasts = [&](Instruction &I) {
518  unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
519  if (castRequiresQueuePtr(SrcAS)) {
520  NeedsQueuePtr = true;
521  return false;
522  }
523  return true;
524  };
525 
526  bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
527 
528  // `checkForAllInstructions` is much more cheaper than going through all
529  // instructions, try it first.
530 
531  // The queue pointer is not needed if aperture regs is present.
532  if (!HasApertureRegs) {
533  bool UsedAssumedInformation = false;
534  A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
535  {Instruction::AddrSpaceCast},
536  UsedAssumedInformation);
537  }
538 
539  // If we found that we need the queue pointer, nothing else to do.
540  if (NeedsQueuePtr)
541  return true;
542 
543  if (!IsNonEntryFunc && HasApertureRegs)
544  return false;
545 
546  for (BasicBlock &BB : *F) {
547  for (Instruction &I : BB) {
548  for (const Use &U : I.operands()) {
549  if (const auto *C = dyn_cast<Constant>(U)) {
550  if (InfoCache.needsQueuePtr(C, *F))
551  return true;
552  }
553  }
554  }
555  }
556 
557  return false;
558  }
559 
560  bool funcRetrievesMultigridSyncArg(Attributor &A) {
562  AA::RangeTy Range(Pos, 8);
563  return funcRetrievesImplicitKernelArg(A, Range);
564  }
565 
566  bool funcRetrievesHostcallPtr(Attributor &A) {
568  AA::RangeTy Range(Pos, 8);
569  return funcRetrievesImplicitKernelArg(A, Range);
570  }
571 
572  bool funcRetrievesDefaultQueue(Attributor &A) {
574  AA::RangeTy Range(Pos, 8);
575  return funcRetrievesImplicitKernelArg(A, Range);
576  }
577 
578  bool funcRetrievesCompletionAction(Attributor &A) {
580  AA::RangeTy Range(Pos, 8);
581  return funcRetrievesImplicitKernelArg(A, Range);
582  }
583 
584  bool funcRetrievesHeapPtr(Attributor &A) {
586  return false;
588  return funcRetrievesImplicitKernelArg(A, Range);
589  }
590 
591  bool funcRetrievesQueuePtr(Attributor &A) {
593  return false;
595  return funcRetrievesImplicitKernelArg(A, Range);
596  }
597 
598  bool funcRetrievesImplicitKernelArg(Attributor &A, AA::RangeTy Range) {
599  // Check if this is a call to the implicitarg_ptr builtin and it
600  // is used to retrieve the hostcall pointer. The implicit arg for
601  // hostcall is not used only if every use of the implicitarg_ptr
602  // is a load that clearly does not retrieve any byte of the
603  // hostcall pointer. We check this by tracing all the uses of the
604  // initial call to the implicitarg_ptr intrinsic.
605  auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {
606  auto &Call = cast<CallBase>(I);
607  if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
608  return true;
609 
610  const auto &PointerInfoAA = A.getAAFor<AAPointerInfo>(
612 
613  return PointerInfoAA.forallInterferingAccesses(
614  Range, [](const AAPointerInfo::Access &Acc, bool IsExact) {
615  return Acc.getRemoteInst()->isDroppable();
616  });
617  };
618 
619  bool UsedAssumedInformation = false;
620  return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,
621  UsedAssumedInformation);
622  }
623 
624  bool funcRetrievesLDSKernelId(Attributor &A) {
625  auto DoesNotRetrieve = [&](Instruction &I) {
626  auto &Call = cast<CallBase>(I);
627  return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;
628  };
629  bool UsedAssumedInformation = false;
630  return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,
631  UsedAssumedInformation);
632  }
633 };
634 
635 AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
636  Attributor &A) {
638  return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
639  llvm_unreachable("AAAMDAttributes is only valid for function position");
640 }
641 
642 /// Propagate amdgpu-flat-work-group-size attribute.
643 struct AAAMDFlatWorkGroupSize
644  : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
646  AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
647  : Base(IRP, 32) {}
648 
649  /// See AbstractAttribute::getState(...).
650  IntegerRangeState &getState() override { return *this; }
651  const IntegerRangeState &getState() const override { return *this; }
652 
653  void initialize(Attributor &A) override {
654  Function *F = getAssociatedFunction();
655  auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
656  unsigned MinGroupSize, MaxGroupSize;
657  std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);
658  intersectKnown(
659  ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));
660 
661  if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
662  indicatePessimisticFixpoint();
663  }
664 
665  ChangeStatus updateImpl(Attributor &A) override {
667 
668  auto CheckCallSite = [&](AbstractCallSite CS) {
669  Function *Caller = CS.getInstruction()->getFunction();
670  LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName()
671  << "->" << getAssociatedFunction()->getName() << '\n');
672 
673  const auto &CallerInfo = A.getAAFor<AAAMDFlatWorkGroupSize>(
674  *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
675 
676  Change |=
677  clampStateAndIndicateChange(this->getState(), CallerInfo.getState());
678 
679  return true;
680  };
681 
682  bool AllCallSitesKnown = true;
683  if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
684  return indicatePessimisticFixpoint();
685 
686  return Change;
687  }
688 
689  ChangeStatus manifest(Attributor &A) override {
690  SmallVector<Attribute, 8> AttrList;
691  Function *F = getAssociatedFunction();
692  LLVMContext &Ctx = F->getContext();
693 
694  auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
695  unsigned Min, Max;
696  std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F);
697 
698  // Don't add the attribute if it's the implied default.
699  if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
701 
702  SmallString<10> Buffer;
703  raw_svector_ostream OS(Buffer);
704  OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
705 
706  AttrList.push_back(
707  Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str()));
708  return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
709  /* ForceReplace */ true);
710  }
711 
712  const std::string getAsStr() const override {
713  std::string Str;
714  raw_string_ostream OS(Str);
715  OS << "AMDFlatWorkGroupSize[";
716  OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
717  OS << ']';
718  return OS.str();
719  }
720 
721  /// See AbstractAttribute::trackStatistics()
722  void trackStatistics() const override {}
723 
724  /// Create an abstract attribute view for the position \p IRP.
725  static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
726  Attributor &A);
727 
728  /// See AbstractAttribute::getName()
729  const std::string getName() const override {
730  return "AAAMDFlatWorkGroupSize";
731  }
732 
733  /// See AbstractAttribute::getIdAddr()
734  const char *getIdAddr() const override { return &ID; }
735 
736  /// This function should return true if the type of the \p AA is
737  /// AAAMDFlatWorkGroupSize
738  static bool classof(const AbstractAttribute *AA) {
739  return (AA->getIdAddr() == &ID);
740  }
741 
742  /// Unique ID (due to the unique address)
743  static const char ID;
744 };
745 
746 const char AAAMDFlatWorkGroupSize::ID = 0;
747 
748 AAAMDFlatWorkGroupSize &
749 AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
750  Attributor &A) {
752  return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
754  "AAAMDFlatWorkGroupSize is only valid for function position");
755 }
756 
757 class AMDGPUAttributor : public ModulePass {
758 public:
759  AMDGPUAttributor() : ModulePass(ID) {}
760 
761  /// doInitialization - Virtual method overridden by subclasses to do
762  /// any necessary initialization before any pass is run.
763  bool doInitialization(Module &) override {
764  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
765  if (!TPC)
766  report_fatal_error("TargetMachine is required");
767 
768  TM = &TPC->getTM<TargetMachine>();
769  return false;
770  }
771 
772  bool runOnModule(Module &M) override {
773  SetVector<Function *> Functions;
774  AnalysisGetter AG(this);
775  for (Function &F : M) {
776  if (!F.isIntrinsic())
777  Functions.insert(&F);
778  }
779 
780  CallGraphUpdater CGUpdater;
782  AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
787 
788  AttributorConfig AC(CGUpdater);
789  AC.Allowed = &Allowed;
790  AC.IsModulePass = true;
791  AC.DefaultInitializeLiveInternals = false;
792 
793  Attributor A(Functions, InfoCache, AC);
794 
795  for (Function &F : M) {
796  if (!F.isIntrinsic()) {
797  A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
798  A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
799  if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) {
800  A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
801  }
802  }
803  }
804 
805  ChangeStatus Change = A.run();
806  return Change == ChangeStatus::CHANGED;
807  }
808 
809  void getAnalysisUsage(AnalysisUsage &AU) const override {
811  }
812 
813  StringRef getPassName() const override { return "AMDGPU Attributor"; }
814  TargetMachine *TM;
815  static char ID;
816 };
817 } // namespace
818 
819 char AMDGPUAttributor::ID = 0;
820 
821 Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); }
822 INITIALIZE_PASS_BEGIN(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false,
823  false)
826  false)
llvm::CallGraphUpdater
Wrapper to unify "old style" CallGraph and "new style" LazyCallGraph.
Definition: CallGraphUpdater.h:29
llvm::IRPosition::function
static const IRPosition function(const Function &F, const CallBaseContext *CBContext=nullptr)
Create a position describing the function scope of F.
Definition: Attributor.h:601
getName
static StringRef getName(Value *V)
Definition: ProvenanceAnalysisEvaluator.cpp:20
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::createAMDGPUAttributorPass
Pass * createAMDGPUAttributorPass()
Definition: AMDGPUAttributor.cpp:821
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:824
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:375
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:248
ImplicitAttrs
static constexpr std::pair< ImplicitArgumentMask, StringLiteral > ImplicitAttrs[]
Definition: AMDGPUAttributor.cpp:48
llvm::Function
Definition: Function.h:59
ImplicitArgumentPositions
ImplicitArgumentPositions
Definition: AMDGPUAttributor.cpp:33
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
llvm::Attribute::get
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:91
llvm::AAPointerInfo::Access
An access description.
Definition: Attributor.h:5291
llvm::ARMBuildAttrs::Allowed
@ Allowed
Definition: ARMBuildAttributes.h:126
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
ImplicitArgumentMask
ImplicitArgumentMask
Definition: AMDGPUAttributor.cpp:40
CycleAnalysis.h
llvm::StateWrapper
Helper to tie a abstract state implementation to an abstract attribute.
Definition: Attributor.h:2997
llvm::AMDGPU::getCompletionActionImplicitArgPosition
unsigned getCompletionActionImplicitArgPosition()
Definition: AMDGPUBaseInfo.cpp:196
llvm::AMDGPU::getAmdhsaCodeObjectVersion
unsigned getAmdhsaCodeObjectVersion()
Definition: AMDGPUBaseInfo.cpp:149
LAST_ARG_POS
@ LAST_ARG_POS
Definition: AMDGPUAttributor.cpp:35
llvm::initializeCycleInfoWrapperPassPass
void initializeCycleInfoWrapperPassPass(PassRegistry &)
llvm::InformationCache
Data structure to hold cached (LLVM-IR) information.
Definition: Attributor.h:1152
llvm::AttributorConfig
Configuration for the Attributor.
Definition: Attributor.h:1396
llvm::AAPointerInfo::Access::getRemoteInst
Instruction * getRemoteInst() const
Return the actual instruction that causes the access.
Definition: Attributor.h:5391
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false) INITIALIZE_PASS_DEPENDENCY(CycleInfoWrapperPass)
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::AAPotentialValues::ID
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:4802
llvm::Intrinsic::not_intrinsic
@ not_intrinsic
Definition: Intrinsics.h:44
initialize
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Definition: TargetLibraryInfo.cpp:152
llvm::AbstractCallSite
AbstractCallSite.
Definition: AbstractCallSite.h:50
llvm::AACallEdges::hasNonAsmUnknownCallee
virtual bool hasNonAsmUnknownCallee() const =0
Is there any call with a unknown callee, excluding any inline asm.
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
funcRequiresHostcallPtr
static bool funcRequiresHostcallPtr(const Function &F)
Returns true if the function requires the implicit argument be passed regardless of the function cont...
Definition: AMDGPUAttributor.cpp:125
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::StringLiteral
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:841
llvm::clampStateAndIndicateChange
ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R)
Helper function to clamp a state S of type StateType with the information in R and indicate/return if...
Definition: Attributor.h:3230
TargetMachine.h
llvm::AddrSpaceCastInst
This class represents a conversion between pointers from one address space to another.
Definition: Instructions.h:5298
GCNSubtarget.h
llvm::ChangeStatus
ChangeStatus
{
Definition: Attributor.h:464
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
NOT_IMPLICIT_INPUT
@ NOT_IMPLICIT_INPUT
Definition: AMDGPUAttributor.cpp:41
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
false
Definition: StackSlotColoring.cpp:141
AMDGPU
Definition: AMDGPUReplaceLDSUseWithPointer.cpp:114
llvm::pdb::PDB_SymType::Caller
@ Caller
llvm::Instruction
Definition: Instruction.h:41
llvm::IRPosition::getPositionKind
Kind getPositionKind() const
Return the associated position kind.
Definition: Attributor.h:798
llvm::PassRegistry
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
llvm::IntegerRangeState
State for an integer range.
Definition: Attributor.h:2748
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:156
llvm::AA::RangeTy
Helper to represent an access offset and size, with logic to deal with uncertainty and check for over...
Definition: Attributor.h:226
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::AAPointerInfo::forallInterferingAccesses
virtual bool forallInterferingAccesses(AA::RangeTy Range, function_ref< bool(const Access &, bool)> CB) const =0
Call CB on all accesses that might interfere with Range and return true if all such accesses were kno...
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1885
llvm::SmallString
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
llvm::ChangeStatus::UNCHANGED
@ UNCHANGED
isDSAddress
static bool isDSAddress(const Constant *C)
Definition: AMDGPUAttributor.cpp:115
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUAttributor.cpp:23
llvm::DenseSet< const char * >
Attributor.h
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition
unsigned getMultigridSyncArgImplicitArgPosition()
Definition: AMDGPUBaseInfo.cpp:153
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::AbstractAttribute
Base struct for all "concrete attribute" deductions.
Definition: Attributor.h:3102
llvm::AAPointerInfo::ID
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:5495
llvm::DepClassTy::REQUIRED
@ REQUIRED
The target cannot be valid if the source is not.
llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:237
llvm::AACallEdges::ID
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:4926
llvm::AACallEdges::getOptimisticEdges
virtual const SetVector< Function * > & getOptimisticEdges() const =0
Get the optimistic edges.
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::BumpPtrAllocatorImpl
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:63
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:376
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::IRPosition::IRP_FUNCTION
@ IRP_FUNCTION
An attribute for a function (scope).
Definition: Attributor.h:570
llvm::DenseMap
Definition: DenseMap.h:714
intrinsicToAttrMask
static ImplicitArgumentMask intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, bool HasApertureRegs, bool SupportsGetDoorBellID)
Definition: AMDGPUAttributor.cpp:58
I
#define I(x, y, z)
Definition: MD5.cpp:58
TargetPassConfig.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
llvm::GlobalValue::getAddressSpace
unsigned getAddressSpace() const
Definition: GlobalValue.h:201
llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:201
llvm::AMDGPU::getDefaultQueueImplicitArgPosition
unsigned getDefaultQueueImplicitArgPosition()
Definition: AMDGPUBaseInfo.cpp:184
llvm::IRPosition
Helper to describe and deal with positions in the LLVM-IR.
Definition: Attributor.h:558
llvm::WinEH::EncodingType::CE
@ CE
Windows NT (Windows on ARM)
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::User::isDroppable
bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
Definition: User.cpp:115
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
castRequiresQueuePtr
static bool castRequiresQueuePtr(unsigned SrcAS)
Definition: AMDGPUAttributor.cpp:111
llvm::CGSCC
@ CGSCC
Definition: Attributor.h:5570
llvm::logicalview::LVAttributeKind::Range
@ Range
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
AMDGPU.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::AAPointerInfo
An abstract interface for struct information.
Definition: Attributor.h:5112
llvm::IRAttributeManifest::manifestAttrs
static ChangeStatus manifestAttrs(Attributor &A, const IRPosition &IRP, const ArrayRef< Attribute > &DeducedAttrs, bool ForceReplace=false)
Definition: Attributor.cpp:959
CC
auto CC
Definition: RISCVRedundantCopyElimination.cpp:79
llvm::AMDGPU::isGraphics
bool isGraphics(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1877
llvm::ChangeStatus::CHANGED
@ CHANGED
llvm::AACallEdges
An abstract state for querying live call edges.
Definition: Attributor.h:4885
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:187
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:998
llvm::Attributor
The fixpoint analysis framework that orchestrates the attribute deduction.
Definition: Attributor.h:1470
llvm::ConstantRange
This class represents a range of values.
Definition: ConstantRange.h:47
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
llvm::pdb::DbgHeaderType::Max
@ Max
llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPU.h:372
Allocator
Basic Register Allocator
Definition: RegAllocBasic.cpp:143
llvm::to_string
std::string to_string(const T &Value)
Definition: ScopedPrinter.h:85
llvm::AnalysisGetter
Wrapper for FunctionAnalysisManager.
Definition: Attributor.h:1100
llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:672
llvm::AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET
@ QUEUE_PTR_OFFSET
Definition: SIDefines.h:908
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AAPotentialConstantValues::ID
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:4768
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::CycleInfoWrapperPass
Legacy analysis pass which computes a CycleInfo.
Definition: CycleAnalysis.h:31
llvm::SetVector< Function * >
llvm::IRPosition::callsite_returned
static const IRPosition callsite_returned(const CallBase &CB)
Create a position describing the returned value of CB.
Definition: Attributor.h:626
llvm::AMDGPU::getHostcallImplicitArgPosition
unsigned getHostcallImplicitArgPosition()
Definition: AMDGPUBaseInfo.cpp:170
llvm::AbstractAttribute::getIdAddr
virtual const char * getIdAddr() const =0
This function should return the address of the ID of the AbstractAttribute.
ALL_ARGUMENT_MASK
@ ALL_ARGUMENT_MASK
Definition: AMDGPUAttributor.cpp:43
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
AMDGPUBaseInfo.h
llvm::AMDGPU::ImplicitArg::HEAP_PTR_OFFSET
@ HEAP_PTR_OFFSET
Definition: SIDefines.h:901
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:39