LLVM 22.0.0git
NVPTXSubtarget.cpp
Go to the documentation of this file.
1//===- NVPTXSubtarget.cpp - NVPTX Subtarget Information -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the NVPTX specific subclass of TargetSubtarget.
10//
11//===----------------------------------------------------------------------===//
12
13#include "NVPTXSubtarget.h"
15#include "NVPTXTargetMachine.h"
18
19using namespace llvm;
20
21#define DEBUG_TYPE "nvptx-subtarget"
22
23#define GET_SUBTARGETINFO_ENUM
24#define GET_SUBTARGETINFO_TARGET_DESC
25#define GET_SUBTARGETINFO_CTOR
26#include "NVPTXGenSubtargetInfo.inc"
27
28static cl::opt<bool>
29 NoF16Math("nvptx-no-f16-math", cl::Hidden,
30 cl::desc("NVPTX Specific: Disable generation of f16 math ops."),
31 cl::init(false));
32
33static cl::opt<bool> NoF32x2("nvptx-no-f32x2", cl::Hidden,
34 cl::desc("NVPTX Specific: Disable generation of "
35 "f32x2 instructions and registers."),
36 cl::init(false));
37
38// FullSmVersion encoding helpers: SM * 10 + suffix offset
39// (0 = base, 2 = 'f', 3 = 'a').
40static constexpr unsigned SM(unsigned Version) { return Version * 10; }
41static constexpr unsigned SMF(unsigned Version) { return SM(Version) + 2; }
42static constexpr unsigned SMA(unsigned Version) { return SM(Version) + 3; }
43
44// Pin the vtable to this file.
45void NVPTXSubtarget::anchor() {}
46
47// Returns the minimum PTX version required for a given SM target.
48// This must be kept in sync with the "Supported Targets" column of the
49// "PTX Release History" table in the PTX ISA documentation:
50// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes-ptx-release-history
51//
52// Note: LLVM's minimum supported PTX version is 3.2 (see FeaturePTX in
53// NVPTX.td), so older SMs that supported earlier PTX versions instead use 3.2
54// as their effective minimum.
55static unsigned getMinPTXVersionForSM(unsigned FullSmVersion) {
56 switch (FullSmVersion) {
57 case SM(20):
58 case SM(21):
59 case SM(30):
60 case SM(35):
61 return 32;
62 case SM(32):
63 case SM(50):
64 return 40;
65 case SM(37):
66 case SM(52):
67 return 41;
68 case SM(53):
69 return 42;
70 case SM(60):
71 case SM(61):
72 case SM(62):
73 return 50;
74 case SM(70):
75 return 60;
76 case SM(72):
77 return 61;
78 case SM(75):
79 return 63;
80 case SM(80):
81 return 70;
82 case SM(86):
83 return 71;
84 case SM(87):
85 return 74;
86 case SM(89):
87 case SM(90):
88 return 78;
89 case SMA(90):
90 return 80;
91 case SM(100):
92 case SMA(100):
93 case SM(101):
94 case SMA(101):
95 return 86;
96 case SM(120):
97 case SMA(120):
98 return 87;
99 case SMF(100):
100 case SMF(101):
101 case SM(103):
102 case SMF(103):
103 case SMA(103):
104 case SMF(120):
105 case SM(121):
106 case SMF(121):
107 case SMA(121):
108 return 88;
109 case SM(88):
110 case SM(110):
111 case SMF(110):
112 case SMA(110):
113 return 90;
114 default:
115 llvm_unreachable("Unknown SM version");
116 }
117}
118
120 StringRef FS) {
121 TargetName = std::string(CPU);
122
124
125 // Re-map SM version numbers, SmVersion carries the regular SMs which do
126 // have relative order, while FullSmVersion allows distinguishing sm_90 from
127 // sm_90a, which would *not* be a subset of sm_91.
128 SmVersion = getSmVersion();
129
130 unsigned MinPTX = getMinPTXVersionForSM(FullSmVersion);
131
132 if (PTXVersion == 0) {
133 // User didn't request a specific PTX version; use the minimum for this SM.
134 PTXVersion = MinPTX;
135 } else if (PTXVersion < MinPTX) {
136 // User explicitly requested an insufficient PTX version.
138 "PTX version {0}.{1} does not support target '{2}'. "
139 "Minimum required PTX version is {3}.{4}. "
140 "Either remove the PTX version to use the default, "
141 "or increase it to at least {3}.{4}.",
142 PTXVersion / 10, PTXVersion % 10, CPU, MinPTX / 10, MinPTX % 10));
143 }
144
145 return *this;
146}
147
148NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU,
149 const std::string &FS,
150 const NVPTXTargetMachine &TM)
151 : NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0),
152 FullSmVersion(200), SmVersion(getSmVersion()),
153 InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this) {
154 TSInfo = std::make_unique<NVPTXSelectionDAGInfo>();
155}
156
158
160 return TSInfo.get();
161}
162
163bool NVPTXSubtarget::hasPTXWithFamilySMs(unsigned PTXVersion,
164 ArrayRef<unsigned> SMVersions) const {
165 unsigned PTXVer = getPTXVersion();
166 if (!hasFamilySpecificFeatures() || PTXVer < PTXVersion)
167 return false;
168
169 unsigned SMVer = getSmVersion();
170 return llvm::any_of(SMVersions, [&](unsigned SM) {
171 // sm_101 is a different family, never group it with sm_10x.
172 if (SMVer == 101 || SM == 101)
173 return SMVer == SM &&
174 // PTX 9.0 and later renamed sm_101 to sm_110, so sm_101 is not
175 // supported.
176 !(PTXVer >= 90 && SMVer == 101);
177
178 return getSmFamilyVersion() == SM / 10 && SMVer >= SM;
179 });
180}
181
182bool NVPTXSubtarget::hasPTXWithAccelSMs(unsigned PTXVersion,
183 ArrayRef<unsigned> SMVersions) const {
184 unsigned PTXVer = getPTXVersion();
185 if (!hasArchAccelFeatures() || PTXVer < PTXVersion)
186 return false;
187
188 unsigned SMVer = getSmVersion();
189 return llvm::any_of(SMVersions, [&](unsigned SM) {
190 return SMVer == SM &&
191 // PTX 9.0 and later renamed sm_101 to sm_110, so sm_101 is not
192 // supported.
193 !(PTXVer >= 90 && SMVer == 101);
194 });
195}
196
198 return hasFP16Math() && NoF16Math == false;
199}
200
202 return SmVersion >= 100 && PTXVersion >= 86 && !NoF32x2;
203}
204
206 if (!hasBF16Math())
207 return false;
208
209 switch (Opcode) {
210 // Several BF16 instructions are available on sm_90 only.
211 case ISD::FADD:
212 case ISD::FMUL:
213 case ISD::FSUB:
214 case ISD::SELECT:
215 case ISD::SELECT_CC:
216 case ISD::SETCC:
217 case ISD::FEXP2:
218 case ISD::FCEIL:
219 case ISD::FFLOOR:
220 case ISD::FNEARBYINT:
221 case ISD::FRINT:
222 case ISD::FROUNDEVEN:
223 case ISD::FTRUNC:
224 return getSmVersion() >= 90 && getPTXVersion() >= 78;
225 // Several BF16 instructions are available on sm_80 only.
226 case ISD::FMINNUM:
227 case ISD::FMAXNUM:
230 case ISD::FMAXIMUM:
231 case ISD::FMINIMUM:
232 return getSmVersion() >= 80 && getPTXVersion() >= 70;
233 }
234 return true;
235}
236
238 std::string const &FailureMessage) const {
239 if (hasClusters())
240 return;
241
243 "NVPTX SM architecture \"{}\" and PTX version \"{}\" do not support {}. "
244 "Requires SM >= 90 and PTX >= 78.",
245 getFullSmVersion(), PTXVersion, FailureMessage));
246}
static constexpr unsigned SMF(unsigned Version)
static constexpr unsigned SMA(unsigned Version)
static cl::opt< bool > NoF32x2("nvptx-no-f32x2", cl::Hidden, cl::desc("NVPTX Specific: Disable generation of " "f32x2 instructions and registers."), cl::init(false))
static cl::opt< bool > NoF16Math("nvptx-no-f16-math", cl::Hidden, cl::desc("NVPTX Specific: Disable generation of f16 math ops."), cl::init(false))
static unsigned getMinPTXVersionForSM(unsigned FullSmVersion)
static constexpr unsigned SM(unsigned Version)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
void failIfClustersUnsupported(std::string const &FailureMessage) const
bool hasPTXWithAccelSMs(unsigned PTXVersion, ArrayRef< unsigned > SMVersions) const
std::string getTargetName() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
unsigned getPTXVersion() const
~NVPTXSubtarget() override
bool hasNativeBF16Support(int Opcode) const
unsigned int getFullSmVersion() const
unsigned int getSmVersion() const
bool hasFamilySpecificFeatures() const
bool hasPTXWithFamilySMs(unsigned PTXVersion, ArrayRef< unsigned > SMVersions) const
bool hasF32x2Instructions() const
unsigned int getSmFamilyVersion() const
NVPTXSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const NVPTXTargetMachine &TM)
This constructor initializes the data members to match that of the specified module.
bool hasArchAccelFeatures() const
NVPTXSubtarget & initializeSubtargetDependencies(StringRef CPU, StringRef FS)
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:818
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:795
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:810
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180