LLVM 23.0.0git
NVVMIntrRange.cpp
Go to the documentation of this file.
1//===- NVVMIntrRange.cpp - Set range attributes for NVVM intrinsics -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass adds appropriate range attributes for calls to NVVM
10// intrinsics that return a limited range of values.
11//
12//===----------------------------------------------------------------------===//
13
14#include "NVPTX.h"
15#include "NVVMProperties.h"
19#include "llvm/IR/Intrinsics.h"
20#include "llvm/IR/IntrinsicsNVPTX.h"
21#include "llvm/IR/PassManager.h"
22#include <cstdint>
23
24using namespace llvm;
25
26#define DEBUG_TYPE "nvvm-intr-range"
27
28namespace {
29class NVVMIntrRange : public FunctionPass {
30public:
31 static char ID;
32 NVVMIntrRange() : FunctionPass(ID) {}
33
34 bool runOnFunction(Function &) override;
35};
36} // namespace
37
38FunctionPass *llvm::createNVVMIntrRangePass() { return new NVVMIntrRange(); }
39
40char NVVMIntrRange::ID = 0;
41INITIALIZE_PASS(NVVMIntrRange, "nvvm-intr-range",
42 "Add !range metadata to NVVM intrinsics.", false, false)
43
44// Adds the passed-in [Low,High) range information as metadata to the
45// passed-in call instruction.
46static bool addRangeAttr(uint64_t Low, uint64_t High, IntrinsicInst *II) {
47 if (II->getMetadata(LLVMContext::MD_range))
48 return false;
49
50 const uint64_t BitWidth = II->getType()->getIntegerBitWidth();
52
53 if (auto CurrentRange = II->getRange())
54 Range = Range.intersectWith(CurrentRange.value());
55
56 II->addRangeRetAttr(Range);
57 return true;
58}
59
61 struct Vector3 {
62 unsigned X, Y, Z;
63 };
64
65 // All these annotations are only valid for kernel functions.
66 if (!isKernelFunction(F))
67 return false;
68
69 auto ReqNTID = getReqNTID(F);
70 const auto OverallMaxNTID = getOverallMaxNTID(F);
71 auto ClusterDim = getClusterDim(F);
72 const auto MaxClusterRank = getMaxClusterRank(F);
73
74 // If this function lacks any range information, do nothing.
75 if (ReqNTID.empty() && !OverallMaxNTID && ClusterDim.empty() &&
76 !MaxClusterRank)
77 return false;
78
79 const unsigned MaxNTID =
80 OverallMaxNTID.value_or(std::numeric_limits<unsigned>::max());
81
82 // When reqntid is specified, block dimensions are exact compile-time
83 // constants. Otherwise, use maxntid (capped at hardware limits) as upper
84 // bounds.
85 Vector3 MinBlockDim, MaxBlockDim;
86 if (!ReqNTID.empty()) {
87 ReqNTID.resize(3, 1);
88 MinBlockDim = MaxBlockDim = {ReqNTID[0], ReqNTID[1], ReqNTID[2]};
89 } else {
90 MinBlockDim = {1, 1, 1};
91 MaxBlockDim = {std::min(1024u, MaxNTID), std::min(1024u, MaxNTID),
92 std::min(64u, MaxNTID)};
93 }
94
95 const bool HasClusterInfo = !ClusterDim.empty() || MaxClusterRank;
96
97 // When cluster_dim is specified, cluster dimensions are exact compile-time
98 // constants. Otherwise, use maxclusterrank (capped at hardware limits) as
99 // upper bounds.
100 Vector3 MinClusterDim, MaxClusterDim;
101 uint64_t MinClusterSize, MaxClusterSize;
102 if (!ClusterDim.empty()) {
103 ClusterDim.resize(3, 1);
104 MinClusterDim =
105 MaxClusterDim = {ClusterDim[0], ClusterDim[1], ClusterDim[2]};
106 MinClusterSize = MaxClusterSize =
107 ClusterDim[0] * ClusterDim[1] * ClusterDim[2];
108 } else {
109 const unsigned MaxNctaPerCluster =
110 MaxClusterRank.value_or(std::numeric_limits<unsigned>::max());
111 MinClusterDim = {1, 1, 1};
112 MaxClusterDim = {std::min(0x7fffffffu, MaxNctaPerCluster),
113 std::min(0xffffu, MaxNctaPerCluster),
114 std::min(0xffffu, MaxNctaPerCluster)};
115 MinClusterSize = 1;
116 MaxClusterSize = MaxNctaPerCluster;
117 }
118
119 const auto ProcessIntrinsic = [&](IntrinsicInst *II) -> bool {
120 switch (II->getIntrinsicID()) {
121 // Index within block
122 case Intrinsic::nvvm_read_ptx_sreg_tid_x:
123 return addRangeAttr(0, MaxBlockDim.X, II);
124 case Intrinsic::nvvm_read_ptx_sreg_tid_y:
125 return addRangeAttr(0, MaxBlockDim.Y, II);
126 case Intrinsic::nvvm_read_ptx_sreg_tid_z:
127 return addRangeAttr(0, MaxBlockDim.Z, II);
128
129 // Block size: use single-value range when reqntid is specified;
130 // InstCombine will fold these to constants later.
131 case Intrinsic::nvvm_read_ptx_sreg_ntid_x:
132 return addRangeAttr(MinBlockDim.X, MaxBlockDim.X + 1, II);
133 case Intrinsic::nvvm_read_ptx_sreg_ntid_y:
134 return addRangeAttr(MinBlockDim.Y, MaxBlockDim.Y + 1, II);
135 case Intrinsic::nvvm_read_ptx_sreg_ntid_z:
136 return addRangeAttr(MinBlockDim.Z, MaxBlockDim.Z + 1, II);
137
138 // Cluster size: use single-value ranges when cluster_dim is specified;
139 // InstCombine will fold cluster_nctaid.* / cluster_nctarank to constants
140 // later.
141 case Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x:
142 return addRangeAttr(0, MaxClusterDim.X, II);
143 case Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y:
144 return addRangeAttr(0, MaxClusterDim.Y, II);
145 case Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z:
146 return addRangeAttr(0, MaxClusterDim.Z, II);
147 case Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x:
148 return addRangeAttr(MinClusterDim.X, MaxClusterDim.X + 1, II);
149 case Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y:
150 return addRangeAttr(MinClusterDim.Y, MaxClusterDim.Y + 1, II);
151 case Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z:
152 return addRangeAttr(MinClusterDim.Z, MaxClusterDim.Z + 1, II);
153
154 case Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank:
155 return HasClusterInfo && addRangeAttr(0, MaxClusterSize, II);
156 case Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank:
157 return HasClusterInfo &&
158 addRangeAttr(MinClusterSize, MaxClusterSize + 1, II);
159 default:
160 return false;
161 }
162 };
163
164 // Go through the calls in this function.
165 bool Changed = false;
166 for (Instruction &I : instructions(F))
168 Changed |= ProcessIntrinsic(II);
169
170 return Changed;
171}
172
173bool NVVMIntrRange::runOnFunction(Function &F) { return runNVVMIntrRange(F); }
174
Expand Atomic instructions
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static bool runOnFunction(Function &F, bool PostInlining)
This header defines various interfaces for pass management in LLVM.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t High
static bool runNVVMIntrRange(Function &F)
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
Class for arbitrary precision integers.
Definition APInt.h:78
This class represents a range of values.
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
A wrapper class for inspecting calls to intrinsic functions.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
Changed
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
SmallVector< unsigned, 3 > getReqNTID(const Function &F)
std::optional< unsigned > getMaxClusterRank(const Function &F)
constexpr unsigned BitWidth
bool isKernelFunction(const Function &F)
std::optional< uint64_t > getOverallMaxNTID(const Function &F)
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
FunctionPass * createNVVMIntrRangePass()
SmallVector< unsigned, 3 > getClusterDim(const Function &F)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)