clang  9.0.0
AMDGPU.cpp
Go to the documentation of this file.
1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 
21 using namespace clang;
22 using namespace clang::targets;
23 
24 namespace clang {
25 namespace targets {
26 
27 // If you edit the description strings, make sure you update
28 // getPointerWidthV().
29 
30 static const char *const DataLayoutStringR600 =
31  "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
32  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
33 
34 static const char *const DataLayoutStringAMDGCN =
35  "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
36  "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
37  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
38  "-ni:7";
39 
40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
41  Generic, // Default
42  Global, // opencl_global
43  Local, // opencl_local
44  Constant, // opencl_constant
45  Private, // opencl_private
46  Generic, // opencl_generic
47  Global, // cuda_device
48  Constant, // cuda_constant
49  Local // cuda_shared
50 };
51 
52 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
53  Private, // Default
54  Global, // opencl_global
55  Local, // opencl_local
56  Constant, // opencl_constant
57  Private, // opencl_private
58  Generic, // opencl_generic
59  Global, // cuda_device
60  Constant, // cuda_constant
61  Local // cuda_shared
62 };
63 } // namespace targets
64 } // namespace clang
65 
66 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
67 #define BUILTIN(ID, TYPE, ATTRS) \
68  {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
69 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
70  {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
71 #include "clang/Basic/BuiltinsAMDGPU.def"
72 };
73 
74 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
75  "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
76  "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
77  "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
78  "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
79  "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
80  "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
81  "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
82  "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
83  "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
84  "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
85  "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
86  "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
87  "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
88  "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
89  "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
90  "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
91  "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
92  "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
93  "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
94  "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
95  "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
96  "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
97  "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
98  "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
99  "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
100  "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
101  "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
102  "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
103  "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
104  "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
105  "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
106  "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
107  "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
108  "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
109  "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
110  "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
111  "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
112  "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
113  "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
114  "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
115  "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
116  "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
117  "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
118  "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
119  "flat_scratch_lo", "flat_scratch_hi"
120 };
121 
123  return llvm::makeArrayRef(GCCRegNames);
124 }
125 
127  llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
128  const std::vector<std::string> &FeatureVec) const {
129 
130  using namespace llvm::AMDGPU;
131 
132  // XXX - What does the member GPU mean if device name string passed here?
133  if (isAMDGCN(getTriple())) {
134  if (CPU.empty())
135  CPU = "gfx600";
136 
137  switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
138  case GK_GFX1012:
139  case GK_GFX1011:
140  Features["dot1-insts"] = true;
141  Features["dot2-insts"] = true;
142  Features["dot5-insts"] = true;
143  Features["dot6-insts"] = true;
144  LLVM_FALLTHROUGH;
145  case GK_GFX1010:
146  Features["dl-insts"] = true;
147  Features["ci-insts"] = true;
148  Features["16-bit-insts"] = true;
149  Features["dpp"] = true;
150  Features["gfx8-insts"] = true;
151  Features["gfx9-insts"] = true;
152  Features["gfx10-insts"] = true;
153  Features["s-memrealtime"] = true;
154  break;
155  case GK_GFX908:
156  Features["dot3-insts"] = true;
157  Features["dot4-insts"] = true;
158  Features["dot5-insts"] = true;
159  Features["dot6-insts"] = true;
160  LLVM_FALLTHROUGH;
161  case GK_GFX906:
162  Features["dl-insts"] = true;
163  Features["dot1-insts"] = true;
164  Features["dot2-insts"] = true;
165  LLVM_FALLTHROUGH;
166  case GK_GFX909:
167  case GK_GFX904:
168  case GK_GFX902:
169  case GK_GFX900:
170  Features["gfx9-insts"] = true;
171  LLVM_FALLTHROUGH;
172  case GK_GFX810:
173  case GK_GFX803:
174  case GK_GFX802:
175  case GK_GFX801:
176  Features["gfx8-insts"] = true;
177  Features["16-bit-insts"] = true;
178  Features["dpp"] = true;
179  Features["s-memrealtime"] = true;
180  LLVM_FALLTHROUGH;
181  case GK_GFX704:
182  case GK_GFX703:
183  case GK_GFX702:
184  case GK_GFX701:
185  case GK_GFX700:
186  Features["ci-insts"] = true;
187  LLVM_FALLTHROUGH;
188  case GK_GFX601:
189  case GK_GFX600:
190  break;
191  case GK_NONE:
192  return false;
193  default:
194  llvm_unreachable("Unhandled GPU!");
195  }
196  } else {
197  if (CPU.empty())
198  CPU = "r600";
199 
200  switch (llvm::AMDGPU::parseArchR600(CPU)) {
201  case GK_CAYMAN:
202  case GK_CYPRESS:
203  case GK_RV770:
204  case GK_RV670:
205  // TODO: Add fp64 when implemented.
206  break;
207  case GK_TURKS:
208  case GK_CAICOS:
209  case GK_BARTS:
210  case GK_SUMO:
211  case GK_REDWOOD:
212  case GK_JUNIPER:
213  case GK_CEDAR:
214  case GK_RV730:
215  case GK_RV710:
216  case GK_RS880:
217  case GK_R630:
218  case GK_R600:
219  break;
220  default:
221  llvm_unreachable("Unhandled GPU!");
222  }
223  }
224 
225  return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
226 }
227 
229  TargetOptions &TargetOpts) const {
230  bool hasFP32Denormals = false;
231  bool hasFP64Denormals = false;
232 
233  for (auto &I : TargetOpts.FeaturesAsWritten) {
234  if (I == "+fp32-denormals" || I == "-fp32-denormals")
235  hasFP32Denormals = true;
236  if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
237  hasFP64Denormals = true;
238  }
239  if (!hasFP32Denormals)
240  TargetOpts.Features.push_back(
241  (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm
242  ? '+' : '-') + Twine("fp32-denormals"))
243  .str());
244  // Always do not flush fp64 or fp16 denorms.
245  if (!hasFP64Denormals && hasFP64())
246  TargetOpts.Features.push_back("+fp64-fp16-denormals");
247 }
248 
250  SmallVectorImpl<StringRef> &Values) const {
251  if (isAMDGCN(getTriple()))
252  llvm::AMDGPU::fillValidArchListAMDGCN(Values);
253  else
254  llvm::AMDGPU::fillValidArchListR600(Values);
255 }
256 
257 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
258  AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
259 }
260 
261 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
262  const TargetOptions &Opts)
263  : TargetInfo(Triple),
264  GPUKind(isAMDGCN(Triple) ?
265  llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
266  llvm::AMDGPU::parseArchR600(Opts.CPU)),
267  GPUFeatures(isAMDGCN(Triple) ?
268  llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
269  llvm::AMDGPU::getArchAttrR600(GPUKind)) {
272  assert(DataLayout->getAllocaAddrSpace() == Private);
273 
274  setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
275  !isAMDGCN(Triple));
277 
278  HasLegalHalfType = true;
279  HasFloat16 = true;
280 
281  // Set pointer width and alignment for target address space 0.
282  PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
283  if (getMaxPointerWidth() == 64) {
284  LongWidth = LongAlign = 64;
288  }
289 
291 }
292 
294  TargetInfo::adjust(Opts);
295  // ToDo: There are still a few places using default address space as private
296  // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
297  // can be removed from the following line.
298  setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
299  !isAMDGCN(getTriple()));
300 }
301 
303  return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
305 }
306 
308  MacroBuilder &Builder) const {
309  Builder.defineMacro("__AMD__");
310  Builder.defineMacro("__AMDGPU__");
311 
312  if (isAMDGCN(getTriple()))
313  Builder.defineMacro("__AMDGCN__");
314  else
315  Builder.defineMacro("__R600__");
316 
317  if (GPUKind != llvm::AMDGPU::GK_NONE) {
318  StringRef CanonName = isAMDGCN(getTriple()) ?
319  getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
320  Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
321  }
322 
323  // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
324  // removed in the near future.
325  if (hasFMAF())
326  Builder.defineMacro("__HAS_FMAF__");
327  if (hasFastFMAF())
328  Builder.defineMacro("FP_FAST_FMAF");
329  if (hasLDEXPF())
330  Builder.defineMacro("__HAS_LDEXPF__");
331  if (hasFP64())
332  Builder.defineMacro("__HAS_FP64__");
333  if (hasFastFMA())
334  Builder.defineMacro("FP_FAST_FMA");
335 }
336 
338  assert(HalfFormat == Aux->HalfFormat);
339  assert(FloatFormat == Aux->FloatFormat);
340  assert(DoubleFormat == Aux->DoubleFormat);
341 
342  // On x86_64 long double is 80-bit extended precision format, which is
343  // not supported by AMDGPU. 128-bit floating point format is also not
344  // supported by AMDGPU. Therefore keep its own format for these two types.
345  auto SaveLongDoubleFormat = LongDoubleFormat;
346  auto SaveFloat128Format = Float128Format;
347  copyAuxTarget(Aux);
348  LongDoubleFormat = SaveLongDoubleFormat;
349  Float128Format = SaveFloat128Format;
350 }
bool initFeatureMap(llvm::StringMap< bool > &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector< std::string > &FeatureVec) const override
Initialize the map with the default set of target features for the CPU this should include all legal ...
Definition: AMDGPU.cpp:126
Defines the clang::MacroBuilder utility class.
virtual void adjust(LangOptions &Opts)
Set forced language options.
Definition: TargetInfo.cpp:324
const llvm::fltSemantics * FloatFormat
Definition: TargetInfo.h:100
Specialize PointerLikeTypeTraits to allow LazyGenerationalUpdatePtr to be placed into a PointerUnion...
Definition: Dominators.h:30
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
Definition: TargetInfo.h:985
Options for controlling the target.
Definition: TargetOptions.h:26
void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override
===-— Other target property query methods --------------------——===//
Definition: AMDGPU.cpp:307
void fillValidCPUList(SmallVectorImpl< StringRef > &Values) const override
Fill a SmallVectorImpl with the valid values to setCPU.
Definition: AMDGPU.cpp:249
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:49
unsigned char MaxAtomicPromoteWidth
Definition: TargetInfo.h:177
const llvm::fltSemantics * HalfFormat
Definition: TargetInfo.h:100
const llvm::fltSemantics * Float128Format
Definition: TargetInfo.h:100
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:149
ArrayRef< const char * > getGCCRegNames() const override
Definition: AMDGPU.cpp:122
void setAddressSpaceMap(bool DefaultIsPrivate)
Definition: AMDGPU.cpp:257
static const char *const GCCRegNames[]
Definition: X86.cpp:43
Exposes information about the current target.
Definition: TargetInfo.h:161
virtual bool initFeatureMap(llvm::StringMap< bool > &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector< std::string > &FeatureVec) const
Initialize the map with the default set of target features for the CPU this should include all legal ...
Definition: TargetInfo.cpp:396
Defines the clang::LangOptions interface.
void adjustTargetOptions(const CodeGenOptions &CGOpts, TargetOptions &TargetOpts) const override
Adjust target options based on codegen options.
Definition: AMDGPU.cpp:228
void resetDataLayout(StringRef DL)
Definition: TargetInfo.h:199
void setAuxTarget(const TargetInfo *Aux) override
Definition: AMDGPU.cpp:337
const llvm::fltSemantics * LongDoubleFormat
Definition: TargetInfo.h:100
Enumerates target-specific builtins in their own namespaces within namespace clang.
std::vector< std::string > Features
The list of target specific features to enable or disable – this should be a list of strings startin...
Definition: TargetOptions.h:55
static const char *const DataLayoutStringR600
Definition: AMDGPU.cpp:30
std::vector< std::string > FeaturesAsWritten
The list of target specific features to enable or disable, as written on the command line...
Definition: TargetOptions.h:51
AMDGPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
Definition: AMDGPU.cpp:261
Dataflow Directional Tag Classes.
static const char *const DataLayoutStringAMDGCN
Definition: AMDGPU.cpp:34
unsigned[(unsigned) LangAS::FirstTargetAddressSpace] LangASMap
The type of a lookup table which maps from language-specific address spaces to target-specific ones...
Definition: AddressSpaces.h:53
ArrayRef< Builtin::Info > getTargetBuiltins() const override
Return information about target-specific builtins for the current primary target, and info about whic...
Definition: AMDGPU.cpp:302
CodeGenOptions - Track various options which control how the code is optimized and passed to the back...
void copyAuxTarget(const TargetInfo *Aux)
Copy type and layout related info.
Definition: TargetInfo.cpp:811
void adjust(LangOptions &Opts) override
Set forced language options.
Definition: AMDGPU.cpp:293
const llvm::fltSemantics * DoubleFormat
Definition: TargetInfo.h:100
unsigned char MaxAtomicInlineWidth
Definition: TargetInfo.h:177
void defineMacro(const Twine &Name, const Twine &Value="1")
Append a #define line for macro of the form "\#define Name Value\n".
Definition: MacroBuilder.h:29
std::unique_ptr< llvm::DataLayout > DataLayout
Definition: TargetInfo.h:179
Defines enum values for all the target-independent builtin functions.
uint64_t getMaxPointerWidth() const override
Return the maximum width of pointers on this target.
Definition: AMDGPU.h:102
bool UseAddrSpaceMapMangling
Specify if mangling based on address space map should be used or not for language specific address sp...
Definition: TargetInfo.h:265