LLVM  14.0.0git
AMDGPUAttributor.cpp
Go to the documentation of this file.
1 //===- AMDGPUAttributor.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "GCNSubtarget.h"
16 #include "llvm/IR/IntrinsicsAMDGPU.h"
17 #include "llvm/IR/IntrinsicsR600.h"
20 
21 #define DEBUG_TYPE "amdgpu-attributor"
22 
23 using namespace llvm;
24 
27 
28  // SGPRs
29  DISPATCH_PTR = 1 << 0,
30  QUEUE_PTR = 1 << 1,
31  DISPATCH_ID = 1 << 2,
32  IMPLICIT_ARG_PTR = 1 << 3,
33  WORKGROUP_ID_X = 1 << 4,
34  WORKGROUP_ID_Y = 1 << 5,
35  WORKGROUP_ID_Z = 1 << 6,
36 
37  // VGPRS:
38  WORKITEM_ID_X = 1 << 7,
39  WORKITEM_ID_Y = 1 << 8,
40  WORKITEM_ID_Z = 1 << 9,
41  ALL_ARGUMENT_MASK = (1 << 10) - 1
42 };
43 
44 static constexpr std::pair<ImplicitArgumentMask,
46  {DISPATCH_PTR, "amdgpu-no-dispatch-ptr"},
47  {QUEUE_PTR, "amdgpu-no-queue-ptr"},
48  {DISPATCH_ID, "amdgpu-no-dispatch-id"},
49  {IMPLICIT_ARG_PTR, "amdgpu-no-implicitarg-ptr"},
50  {WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x"},
51  {WORKGROUP_ID_Y, "amdgpu-no-workgroup-id-y"},
52  {WORKGROUP_ID_Z, "amdgpu-no-workgroup-id-z"},
53  {WORKITEM_ID_X, "amdgpu-no-workitem-id-x"},
54  {WORKITEM_ID_Y, "amdgpu-no-workitem-id-y"},
55  {WORKITEM_ID_Z, "amdgpu-no-workitem-id-z"}
56 };
57 
58 // We do not need to note the x workitem or workgroup id because they are always
59 // initialized.
60 //
61 // TODO: We should not add the attributes if the known compile time workgroup
62 // size is 1 for y/z.
64 intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &IsQueuePtr) {
65  switch (ID) {
66  case Intrinsic::amdgcn_workitem_id_x:
67  NonKernelOnly = true;
68  return WORKITEM_ID_X;
69  case Intrinsic::amdgcn_workgroup_id_x:
70  NonKernelOnly = true;
71  return WORKGROUP_ID_X;
72  case Intrinsic::amdgcn_workitem_id_y:
73  case Intrinsic::r600_read_tidig_y:
74  return WORKITEM_ID_Y;
75  case Intrinsic::amdgcn_workitem_id_z:
76  case Intrinsic::r600_read_tidig_z:
77  return WORKITEM_ID_Z;
78  case Intrinsic::amdgcn_workgroup_id_y:
79  case Intrinsic::r600_read_tgid_y:
80  return WORKGROUP_ID_Y;
81  case Intrinsic::amdgcn_workgroup_id_z:
82  case Intrinsic::r600_read_tgid_z:
83  return WORKGROUP_ID_Z;
84  case Intrinsic::amdgcn_dispatch_ptr:
85  return DISPATCH_PTR;
86  case Intrinsic::amdgcn_dispatch_id:
87  return DISPATCH_ID;
88  case Intrinsic::amdgcn_implicitarg_ptr:
89  return IMPLICIT_ARG_PTR;
90  case Intrinsic::amdgcn_queue_ptr:
91  case Intrinsic::amdgcn_is_shared:
92  case Intrinsic::amdgcn_is_private:
93  // TODO: Does not require queue ptr on gfx9+
94  case Intrinsic::trap:
95  case Intrinsic::debugtrap:
96  IsQueuePtr = true;
97  return QUEUE_PTR;
98  default:
99  return NOT_IMPLICIT_INPUT;
100  }
101 }
102 
103 static bool castRequiresQueuePtr(unsigned SrcAS) {
104  return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
105 }
106 
107 static bool isDSAddress(const Constant *C) {
108  const GlobalValue *GV = dyn_cast<GlobalValue>(C);
109  if (!GV)
110  return false;
111  unsigned AS = GV->getAddressSpace();
113 }
114 
115 /// Returns true if the function requires the implicit argument be passed
116 /// regardless of the function contents.
117 static bool funcRequiresImplicitArgPtr(const Function &F) {
118  // Sanitizers require the hostcall buffer passed in the implicit arguments.
119  return F.hasFnAttribute(Attribute::SanitizeAddress) ||
120  F.hasFnAttribute(Attribute::SanitizeThread) ||
121  F.hasFnAttribute(Attribute::SanitizeMemory) ||
122  F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
123  F.hasFnAttribute(Attribute::SanitizeMemTag);
124 }
125 
126 namespace {
127 class AMDGPUInformationCache : public InformationCache {
128 public:
129  AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
132  : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {}
133  TargetMachine &TM;
134 
135  enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
136 
137  /// Check if the subtarget has aperture regs.
138  bool hasApertureRegs(Function &F) {
139  const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
140  return ST.hasApertureRegs();
141  }
142 
143  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {
144  const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
145  return ST.getFlatWorkGroupSizes(F);
146  }
147 
148  std::pair<unsigned, unsigned>
149  getMaximumFlatWorkGroupRange(const Function &F) {
150  const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
151  return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
152  }
153 
154 private:
155  /// Check if the ConstantExpr \p CE requires queue ptr attribute.
156  static bool visitConstExpr(const ConstantExpr *CE) {
157  if (CE->getOpcode() == Instruction::AddrSpaceCast) {
158  unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
159  return castRequiresQueuePtr(SrcAS);
160  }
161  return false;
162  }
163 
164  /// Get the constant access bitmap for \p C.
165  uint8_t getConstantAccess(const Constant *C) {
166  auto It = ConstantStatus.find(C);
167  if (It != ConstantStatus.end())
168  return It->second;
169 
170  uint8_t Result = 0;
171  if (isDSAddress(C))
172  Result = DS_GLOBAL;
173 
174  if (const auto *CE = dyn_cast<ConstantExpr>(C))
175  if (visitConstExpr(CE))
176  Result |= ADDR_SPACE_CAST;
177 
178  for (const Use &U : C->operands()) {
179  const auto *OpC = dyn_cast<Constant>(U);
180  if (!OpC)
181  continue;
182 
183  Result |= getConstantAccess(OpC);
184  }
185  return Result;
186  }
187 
188 public:
189  /// Returns true if \p Fn needs a queue ptr attribute because of \p C.
190  bool needsQueuePtr(const Constant *C, Function &Fn) {
191  bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
192  bool HasAperture = hasApertureRegs(Fn);
193 
194  // No need to explore the constants.
195  if (!IsNonEntryFunc && HasAperture)
196  return false;
197 
198  uint8_t Access = getConstantAccess(C);
199 
200  // We need to trap on DS globals in non-entry functions.
201  if (IsNonEntryFunc && (Access & DS_GLOBAL))
202  return true;
203 
204  return !HasAperture && (Access & ADDR_SPACE_CAST);
205  }
206 
207 private:
208  /// Used to determine if the Constant needs a queue ptr attribute.
210 };
211 
212 struct AAAMDAttributes : public StateWrapper<
213  BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, AbstractAttribute> {
216 
217  AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
218 
219  /// Create an abstract attribute view for the position \p IRP.
220  static AAAMDAttributes &createForPosition(const IRPosition &IRP,
221  Attributor &A);
222 
223  /// See AbstractAttribute::getName().
224  const std::string getName() const override { return "AAAMDAttributes"; }
225 
226  /// See AbstractAttribute::getIdAddr().
227  const char *getIdAddr() const override { return &ID; }
228 
229  /// This function should return true if the type of the \p AA is
230  /// AAAMDAttributes.
231  static bool classof(const AbstractAttribute *AA) {
232  return (AA->getIdAddr() == &ID);
233  }
234 
235  /// Unique ID (due to the unique address)
236  static const char ID;
237 };
238 const char AAAMDAttributes::ID = 0;
239 
240 struct AAUniformWorkGroupSize
241  : public StateWrapper<BooleanState, AbstractAttribute> {
243  AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
244 
245  /// Create an abstract attribute view for the position \p IRP.
246  static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
247  Attributor &A);
248 
249  /// See AbstractAttribute::getName().
250  const std::string getName() const override {
251  return "AAUniformWorkGroupSize";
252  }
253 
254  /// See AbstractAttribute::getIdAddr().
255  const char *getIdAddr() const override { return &ID; }
256 
257  /// This function should return true if the type of the \p AA is
258  /// AAAMDAttributes.
259  static bool classof(const AbstractAttribute *AA) {
260  return (AA->getIdAddr() == &ID);
261  }
262 
263  /// Unique ID (due to the unique address)
264  static const char ID;
265 };
266 const char AAUniformWorkGroupSize::ID = 0;
267 
268 struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
269  AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
270  : AAUniformWorkGroupSize(IRP, A) {}
271 
272  void initialize(Attributor &A) override {
273  Function *F = getAssociatedFunction();
274  CallingConv::ID CC = F->getCallingConv();
275 
276  if (CC != CallingConv::AMDGPU_KERNEL)
277  return;
278 
279  bool InitialValue = false;
280  if (F->hasFnAttribute("uniform-work-group-size"))
281  InitialValue = F->getFnAttribute("uniform-work-group-size")
282  .getValueAsString()
283  .equals("true");
284 
285  if (InitialValue)
286  indicateOptimisticFixpoint();
287  else
288  indicatePessimisticFixpoint();
289  }
290 
291  ChangeStatus updateImpl(Attributor &A) override {
293 
294  auto CheckCallSite = [&](AbstractCallSite CS) {
295  Function *Caller = CS.getInstruction()->getFunction();
296  LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
297  << "->" << getAssociatedFunction()->getName() << "\n");
298 
299  const auto &CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
300  *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
301 
302  Change = Change | clampStateAndIndicateChange(this->getState(),
303  CallerInfo.getState());
304 
305  return true;
306  };
307 
308  bool AllCallSitesKnown = true;
309  if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
310  return indicatePessimisticFixpoint();
311 
312  return Change;
313  }
314 
315  ChangeStatus manifest(Attributor &A) override {
316  SmallVector<Attribute, 8> AttrList;
317  LLVMContext &Ctx = getAssociatedFunction()->getContext();
318 
319  AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
320  getAssumed() ? "true" : "false"));
321  return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
322  /* ForceReplace */ true);
323  }
324 
325  bool isValidState() const override {
326  // This state is always valid, even when the state is false.
327  return true;
328  }
329 
330  const std::string getAsStr() const override {
331  return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
332  }
333 
334  /// See AbstractAttribute::trackStatistics()
335  void trackStatistics() const override {}
336 };
337 
338 AAUniformWorkGroupSize &
339 AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
340  Attributor &A) {
342  return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
344  "AAUniformWorkGroupSize is only valid for function position");
345 }
346 
347 struct AAAMDAttributesFunction : public AAAMDAttributes {
348  AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
349  : AAAMDAttributes(IRP, A) {}
350 
351  void initialize(Attributor &A) override {
352  Function *F = getAssociatedFunction();
353 
354  // If the function requires the implicit arg pointer due to sanitizers,
355  // assume it's needed even if explicitly marked as not requiring it.
356  const bool NeedsImplicit = funcRequiresImplicitArgPtr(*F);
357  if (NeedsImplicit)
358  removeAssumedBits(IMPLICIT_ARG_PTR);
359 
360  for (auto Attr : ImplicitAttrs) {
361  if (NeedsImplicit && Attr.first == IMPLICIT_ARG_PTR)
362  continue;
363 
364  if (F->hasFnAttribute(Attr.second))
365  addKnownBits(Attr.first);
366  }
367 
368  if (F->isDeclaration())
369  return;
370 
371  // Ignore functions with graphics calling conventions, these are currently
372  // not allowed to have kernel arguments.
373  if (AMDGPU::isGraphics(F->getCallingConv())) {
374  indicatePessimisticFixpoint();
375  return;
376  }
377  }
378 
379  ChangeStatus updateImpl(Attributor &A) override {
380  Function *F = getAssociatedFunction();
381  // The current assumed state used to determine a change.
382  auto OrigAssumed = getAssumed();
383 
384  // Check for Intrinsics and propagate attributes.
385  const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
386  *this, this->getIRPosition(), DepClassTy::REQUIRED);
387  if (AAEdges.hasNonAsmUnknownCallee())
388  return indicatePessimisticFixpoint();
389 
390  bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
391  auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
392 
393  bool NeedsQueuePtr = false;
394 
395  for (Function *Callee : AAEdges.getOptimisticEdges()) {
396  Intrinsic::ID IID = Callee->getIntrinsicID();
397  if (IID == Intrinsic::not_intrinsic) {
398  const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
399  *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
400  *this &= AAAMD;
401  continue;
402  }
403 
404  bool NonKernelOnly = false;
405  ImplicitArgumentMask AttrMask =
406  intrinsicToAttrMask(IID, NonKernelOnly, NeedsQueuePtr);
407  if (AttrMask != NOT_IMPLICIT_INPUT) {
408  if ((IsNonEntryFunc || !NonKernelOnly))
409  removeAssumedBits(AttrMask);
410  }
411  }
412 
413  // If we found that we need amdgpu-queue-ptr, nothing else to do.
414  if (NeedsQueuePtr) {
415  removeAssumedBits(QUEUE_PTR);
416  return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
418  }
419 
420  auto CheckAddrSpaceCasts = [&](Instruction &I) {
421  unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
422  if (castRequiresQueuePtr(SrcAS)) {
423  NeedsQueuePtr = true;
424  return false;
425  }
426  return true;
427  };
428 
429  bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
430 
431  // `checkForAllInstructions` is much more cheaper than going through all
432  // instructions, try it first.
433 
434  // amdgpu-queue-ptr is not needed if aperture regs is present.
435  if (!HasApertureRegs) {
436  bool UsedAssumedInformation = false;
437  A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
438  {Instruction::AddrSpaceCast},
439  UsedAssumedInformation);
440  }
441 
442  // If we found that we need amdgpu-queue-ptr, nothing else to do.
443  if (NeedsQueuePtr) {
444  removeAssumedBits(QUEUE_PTR);
445  return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
447  }
448 
449  if (!IsNonEntryFunc && HasApertureRegs) {
450  return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
452  }
453 
454  for (BasicBlock &BB : *F) {
455  for (Instruction &I : BB) {
456  for (const Use &U : I.operands()) {
457  if (const auto *C = dyn_cast<Constant>(U)) {
458  if (InfoCache.needsQueuePtr(C, *F)) {
459  removeAssumedBits(QUEUE_PTR);
460  return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
462  }
463  }
464  }
465  }
466  }
467 
468  return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
470  }
471 
472  ChangeStatus manifest(Attributor &A) override {
473  SmallVector<Attribute, 8> AttrList;
474  LLVMContext &Ctx = getAssociatedFunction()->getContext();
475 
476  for (auto Attr : ImplicitAttrs) {
477  if (isKnown(Attr.first))
478  AttrList.push_back(Attribute::get(Ctx, Attr.second));
479  }
480 
481  return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
482  /* ForceReplace */ true);
483  }
484 
485  const std::string getAsStr() const override {
486  std::string Str;
487  raw_string_ostream OS(Str);
488  OS << "AMDInfo[";
489  for (auto Attr : ImplicitAttrs)
490  OS << ' ' << Attr.second;
491  OS << " ]";
492  return OS.str();
493  }
494 
495  /// See AbstractAttribute::trackStatistics()
496  void trackStatistics() const override {}
497 };
498 
499 AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
500  Attributor &A) {
502  return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
503  llvm_unreachable("AAAMDAttributes is only valid for function position");
504 }
505 
506 /// Propagate amdgpu-flat-work-group-size attribute.
507 struct AAAMDFlatWorkGroupSize
508  : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
510  AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
511  : Base(IRP, 32) {}
512 
513  /// See AbstractAttribute::getState(...).
514  IntegerRangeState &getState() override { return *this; }
515  const IntegerRangeState &getState() const override { return *this; }
516 
517  void initialize(Attributor &A) override {
518  Function *F = getAssociatedFunction();
519  auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
520  unsigned MinGroupSize, MaxGroupSize;
521  std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);
522  intersectKnown(
523  ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));
524 
525  if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
526  indicatePessimisticFixpoint();
527  }
528 
529  ChangeStatus updateImpl(Attributor &A) override {
531 
532  auto CheckCallSite = [&](AbstractCallSite CS) {
533  Function *Caller = CS.getInstruction()->getFunction();
534  LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName()
535  << "->" << getAssociatedFunction()->getName() << '\n');
536 
537  const auto &CallerInfo = A.getAAFor<AAAMDFlatWorkGroupSize>(
538  *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
539 
540  Change |=
541  clampStateAndIndicateChange(this->getState(), CallerInfo.getState());
542 
543  return true;
544  };
545 
546  bool AllCallSitesKnown = true;
547  if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
548  return indicatePessimisticFixpoint();
549 
550  return Change;
551  }
552 
553  ChangeStatus manifest(Attributor &A) override {
554  SmallVector<Attribute, 8> AttrList;
555  Function *F = getAssociatedFunction();
556  LLVMContext &Ctx = F->getContext();
557 
558  auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
559  unsigned Min, Max;
560  std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F);
561 
562  // Don't add the attribute if it's the implied default.
563  if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
565 
566  SmallString<10> Buffer;
567  raw_svector_ostream OS(Buffer);
568  OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
569 
570  AttrList.push_back(
571  Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str()));
572  return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
573  /* ForceReplace */ true);
574  }
575 
576  const std::string getAsStr() const override {
577  std::string Str;
578  raw_string_ostream OS(Str);
579  OS << "AMDFlatWorkGroupSize[";
580  OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
581  OS << ']';
582  return OS.str();
583  }
584 
585  /// See AbstractAttribute::trackStatistics()
586  void trackStatistics() const override {}
587 
588  /// Create an abstract attribute view for the position \p IRP.
589  static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
590  Attributor &A);
591 
592  /// See AbstractAttribute::getName()
593  const std::string getName() const override {
594  return "AAAMDFlatWorkGroupSize";
595  }
596 
597  /// See AbstractAttribute::getIdAddr()
598  const char *getIdAddr() const override { return &ID; }
599 
600  /// This function should return true if the type of the \p AA is
601  /// AAAMDFlatWorkGroupSize
602  static bool classof(const AbstractAttribute *AA) {
603  return (AA->getIdAddr() == &ID);
604  }
605 
606  /// Unique ID (due to the unique address)
607  static const char ID;
608 };
609 
610 const char AAAMDFlatWorkGroupSize::ID = 0;
611 
612 AAAMDFlatWorkGroupSize &
613 AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
614  Attributor &A) {
616  return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
618  "AAAMDFlatWorkGroupSize is only valid for function position");
619 }
620 
621 class AMDGPUAttributor : public ModulePass {
622 public:
623  AMDGPUAttributor() : ModulePass(ID) {}
624 
625  /// doInitialization - Virtual method overridden by subclasses to do
626  /// any necessary initialization before any pass is run.
627  bool doInitialization(Module &) override {
628  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
629  if (!TPC)
630  report_fatal_error("TargetMachine is required");
631 
632  TM = &TPC->getTM<TargetMachine>();
633  return false;
634  }
635 
636  bool runOnModule(Module &M) override {
637  SetVector<Function *> Functions;
638  AnalysisGetter AG;
639  for (Function &F : M) {
640  if (!F.isIntrinsic())
641  Functions.insert(&F);
642  }
643 
644  CallGraphUpdater CGUpdater;
646  AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
650 
651  Attributor A(Functions, InfoCache, CGUpdater, &Allowed);
652 
653  for (Function &F : M) {
654  if (!F.isIntrinsic()) {
655  A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
656  A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
657  if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) {
658  A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
659  }
660  }
661  }
662 
663  ChangeStatus Change = A.run();
664  return Change == ChangeStatus::CHANGED;
665  }
666 
667  StringRef getPassName() const override { return "AMDGPU Attributor"; }
668  TargetMachine *TM;
669  static char ID;
670 };
671 } // namespace
672 
673 char AMDGPUAttributor::ID = 0;
674 
675 Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); }
676 INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false)
llvm::CallGraphUpdater
Wrapper to unify "old style" CallGraph and "new style" LazyCallGraph.
Definition: CallGraphUpdater.h:28
llvm::IRPosition::function
static const IRPosition function(const Function &F, const CallBaseContext *CBContext=nullptr)
Create a position describing the function scope of F.
Definition: Attributor.h:339
getName
static StringRef getName(Value *V)
Definition: ProvenanceAnalysisEvaluator.cpp:42
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
llvm::createAMDGPUAttributorPass
Pass * createAMDGPUAttributorPass()
Definition: AMDGPUAttributor.cpp:675
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::ARMBuildAttrs::Allowed
@ Allowed
Definition: ARMBuildAttributes.h:125
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:238
ImplicitAttrs
static constexpr std::pair< ImplicitArgumentMask, StringLiteral > ImplicitAttrs[]
Definition: AMDGPUAttributor.cpp:45
WORKITEM_ID_Y
@ WORKITEM_ID_Y
Definition: AMDGPUAttributor.cpp:39
llvm::Function
Definition: Function.h:62
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:631
llvm::Attribute::get
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:95
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1177
ImplicitArgumentMask
ImplicitArgumentMask
Definition: AMDGPUAttributor.cpp:25
llvm::StateWrapper
Helper to tie a abstract state implementation to an abstract attribute.
Definition: Attributor.h:2665
llvm::InformationCache
Data structure to hold cached (LLVM-IR) information.
Definition: Attributor.h:865
IMPLICIT_ARG_PTR
@ IMPLICIT_ARG_PTR
Definition: AMDGPUAttributor.cpp:32
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::Intrinsic::not_intrinsic
@ not_intrinsic
Definition: Intrinsics.h:45
initialize
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Definition: TargetLibraryInfo.cpp:116
llvm::AbstractCallSite
AbstractCallSite.
Definition: AbstractCallSite.h:50
llvm::AACallEdges::hasNonAsmUnknownCallee
virtual bool hasNonAsmUnknownCallee() const =0
Is there any call with a unknown callee, excluding any inline asm.
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::StringLiteral
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:890
llvm::clampStateAndIndicateChange
ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R)
Helper function to clamp a state S of type StateType with the information in R and indicate/return if...
Definition: Attributor.h:2890
TargetMachine.h
llvm::AddrSpaceCastInst
This class represents a conversion between pointers from one address space to another.
Definition: Instructions.h:5258
GCNSubtarget.h
llvm::ChangeStatus
ChangeStatus
{
Definition: Attributor.h:210
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:37
NOT_IMPLICIT_INPUT
@ NOT_IMPLICIT_INPUT
Definition: AMDGPUAttributor.cpp:26
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::pdb::PDB_SymType::Caller
@ Caller
llvm::Instruction
Definition: Instruction.h:45
llvm::IRPosition::getPositionKind
Kind getPositionKind() const
Return the associated position kind.
Definition: Attributor.h:536
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:143
llvm::IntegerRangeState
State for an integer range.
Definition: Attributor.h:2406
WORKGROUP_ID_Y
@ WORKGROUP_ID_Y
Definition: AMDGPUAttributor.cpp:34
WORKITEM_ID_X
@ WORKITEM_ID_X
Definition: AMDGPUAttributor.cpp:38
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:364
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1383
llvm::SmallString
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:25
llvm::ChangeStatus::UNCHANGED
@ UNCHANGED
isDSAddress
static bool isDSAddress(const Constant *C)
Definition: AMDGPUAttributor.cpp:107
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUAttributor.cpp:21
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
Attributor.h
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::AbstractAttribute
Base struct for all "concrete attribute" deductions.
Definition: Attributor.h:2770
llvm::DepClassTy::REQUIRED
@ REQUIRED
The target cannot be valid if the source is not.
llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:240
llvm::AACallEdges::ID
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:4482
llvm::AACallEdges::getOptimisticEdges
virtual const SetVector< Function * > & getOptimisticEdges() const =0
Get the optimistic edges.
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::BumpPtrAllocatorImpl
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:67
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::IRPosition::IRP_FUNCTION
@ IRP_FUNCTION
An attribute for a function (scope).
Definition: Attributor.h:316
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:363
TargetPassConfig.h
intrinsicToAttrMask
static ImplicitArgumentMask intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &IsQueuePtr)
Definition: AMDGPUAttributor.cpp:64
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:80
llvm::IRPosition
Helper to describe and deal with positions in the LLVM-IR.
Definition: Attributor.h:304
llvm::WinEH::EncodingType::CE
@ CE
Windows NT (Windows on ARM)
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
castRequiresQueuePtr
static bool castRequiresQueuePtr(unsigned SrcAS)
Definition: AMDGPUAttributor.cpp:103
llvm::CGSCC
@ CGSCC
Definition: Attributor.h:4793
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
AMDGPU.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::IRAttributeManifest::manifestAttrs
static ChangeStatus manifestAttrs(Attributor &A, const IRPosition &IRP, const ArrayRef< Attribute > &DeducedAttrs, bool ForceReplace=false)
Definition: Attributor.cpp:490
DISPATCH_ID
@ DISPATCH_ID
Definition: AMDGPUAttributor.cpp:31
llvm::AMDGPU::isGraphics
bool isGraphics(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1375
llvm::ChangeStatus::CHANGED
@ CHANGED
llvm::AACallEdges
An abstract state for querying live call edges.
Definition: Attributor.h:4441
QUEUE_PTR
@ QUEUE_PTR
Definition: AMDGPUAttributor.cpp:30
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:185
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:971
DISPATCH_PTR
@ DISPATCH_PTR
Definition: AMDGPUAttributor.cpp:29
llvm::Attributor
The fixpoint analysis framework that orchestrates the attribute deduction.
Definition: Attributor.h:1140
WORKGROUP_ID_X
@ WORKGROUP_ID_X
Definition: AMDGPUAttributor.cpp:33
llvm::ConstantRange
This class represents a range of values.
Definition: ConstantRange.h:47
llvm::GlobalValue::getAddressSpace
unsigned getAddressSpace() const
Definition: Globals.cpp:119
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
llvm::pdb::DbgHeaderType::Max
@ Max
Allocator
Basic Register Allocator
Definition: RegAllocBasic.cpp:146
llvm::to_string
std::string to_string(const T &Value)
Definition: ScopedPrinter.h:87
llvm::AnalysisGetter
Wrapper for FunctoinAnalysisManager.
Definition: Attributor.h:838
llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:661
llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:216
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
WORKGROUP_ID_Z
@ WORKGROUP_ID_Z
Definition: AMDGPUAttributor.cpp:35
llvm::SetVector< Function * >
llvm::AbstractAttribute::getIdAddr
virtual const char * getIdAddr() const =0
This function should return the address of the ID of the AbstractAttribute.
funcRequiresImplicitArgPtr
static bool funcRequiresImplicitArgPtr(const Function &F)
Returns true if the function requires the implicit argument be passed regardless of the function cont...
Definition: AMDGPUAttributor.cpp:117
WORKITEM_ID_Z
@ WORKITEM_ID_Z
Definition: AMDGPUAttributor.cpp:40
ALL_ARGUMENT_MASK
@ ALL_ARGUMENT_MASK
Definition: AMDGPUAttributor.cpp:41
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPU.h:360