LLVM 23.0.0git
TargetParser.cpp
Go to the documentation of this file.
1//===-- TargetParser - Parser for target features ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a target parser to recognise hardware features such as
10// FPU/CPU/ARCH names as well as specific support such as HDIV, etc.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/ArrayRef.h"
17
18using namespace llvm;
19using namespace AMDGPU;
20
21/// Find KV in array using binary search.
22static const BasicSubtargetSubTypeKV *
24 // Binary search the array
25 auto F = llvm::lower_bound(A, S);
26 // If not found then return NULL
27 if (F == A.end() || StringRef(F->Key) != S)
28 return nullptr;
29 // Return the found array item
30 return F;
31}
32
33/// For each feature that is (transitively) implied by this feature, set it.
34static void setImpliedBits(FeatureBitset &Bits, const FeatureBitset &Implies,
36 // OR the Implies bits in outside the loop. This allows the Implies for CPUs
37 // which might imply features not in FeatureTable to use this.
38 Bits |= Implies;
39 for (const auto &FE : FeatureTable)
40 if (Implies.test(FE.Value))
41 setImpliedBits(Bits, FE.Implies.getAsBitset(), FeatureTable);
42}
43
44std::optional<llvm::StringMap<bool>> llvm::getCPUDefaultTargetFeatures(
47 if (CPU.empty())
48 return std::nullopt;
49
50 const BasicSubtargetSubTypeKV *CPUEntry = ::find(CPU, ProcDesc);
51 if (!CPUEntry)
52 return std::nullopt;
53
54 // Set the features implied by this CPU feature if there is a match.
55 FeatureBitset Bits;
56 llvm::StringMap<bool> DefaultFeatures;
57 setImpliedBits(Bits, CPUEntry->Implies.getAsBitset(), ProcFeatures);
58
59 [[maybe_unused]] unsigned BitSize = Bits.size();
60 for (const BasicSubtargetFeatureKV &FE : ProcFeatures) {
61 assert(FE.Value < BitSize && "Target Feature is out of range");
62 if (Bits[FE.Value])
63 DefaultFeatures[FE.Key] = true;
64 }
65 return DefaultFeatures;
66}
67
68namespace {
69
70struct GPUInfo {
71 StringLiteral Name;
72 StringLiteral CanonicalName;
73 AMDGPU::GPUKind Kind;
74 unsigned Features;
75};
76
77constexpr GPUInfo R600GPUs[] = {
78 // Name Canonical Kind Features
79 // Name
80 {{"r600"}, {"r600"}, GK_R600, FEATURE_NONE },
81 {{"rv630"}, {"r600"}, GK_R600, FEATURE_NONE },
82 {{"rv635"}, {"r600"}, GK_R600, FEATURE_NONE },
83 {{"r630"}, {"r630"}, GK_R630, FEATURE_NONE },
84 {{"rs780"}, {"rs880"}, GK_RS880, FEATURE_NONE },
85 {{"rs880"}, {"rs880"}, GK_RS880, FEATURE_NONE },
86 {{"rv610"}, {"rs880"}, GK_RS880, FEATURE_NONE },
87 {{"rv620"}, {"rs880"}, GK_RS880, FEATURE_NONE },
88 {{"rv670"}, {"rv670"}, GK_RV670, FEATURE_NONE },
89 {{"rv710"}, {"rv710"}, GK_RV710, FEATURE_NONE },
90 {{"rv730"}, {"rv730"}, GK_RV730, FEATURE_NONE },
91 {{"rv740"}, {"rv770"}, GK_RV770, FEATURE_NONE },
92 {{"rv770"}, {"rv770"}, GK_RV770, FEATURE_NONE },
93 {{"cedar"}, {"cedar"}, GK_CEDAR, FEATURE_NONE },
94 {{"palm"}, {"cedar"}, GK_CEDAR, FEATURE_NONE },
95 {{"cypress"}, {"cypress"}, GK_CYPRESS, FEATURE_FMA },
96 {{"hemlock"}, {"cypress"}, GK_CYPRESS, FEATURE_FMA },
97 {{"juniper"}, {"juniper"}, GK_JUNIPER, FEATURE_NONE },
98 {{"redwood"}, {"redwood"}, GK_REDWOOD, FEATURE_NONE },
99 {{"sumo"}, {"sumo"}, GK_SUMO, FEATURE_NONE },
100 {{"sumo2"}, {"sumo"}, GK_SUMO, FEATURE_NONE },
101 {{"barts"}, {"barts"}, GK_BARTS, FEATURE_NONE },
102 {{"caicos"}, {"caicos"}, GK_CAICOS, FEATURE_NONE },
103 {{"aruba"}, {"cayman"}, GK_CAYMAN, FEATURE_FMA },
104 {{"cayman"}, {"cayman"}, GK_CAYMAN, FEATURE_FMA },
105 {{"turks"}, {"turks"}, GK_TURKS, FEATURE_NONE }
106};
107
108// This table should be sorted by the value of GPUKind
109// Don't bother listing the implicitly true features
110constexpr GPUInfo AMDGCNGPUs[] = {
111 // clang-format off
112 // Name Canonical Kind Features
113 // Name
114 {{"gfx600"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
115 {{"tahiti"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
116 {{"gfx601"}, {"gfx601"}, GK_GFX601, FEATURE_NONE},
117 {{"pitcairn"}, {"gfx601"}, GK_GFX601, FEATURE_NONE},
118 {{"verde"}, {"gfx601"}, GK_GFX601, FEATURE_NONE},
119 {{"gfx602"}, {"gfx602"}, GK_GFX602, FEATURE_NONE},
120 {{"hainan"}, {"gfx602"}, GK_GFX602, FEATURE_NONE},
121 {{"oland"}, {"gfx602"}, GK_GFX602, FEATURE_NONE},
122 {{"gfx700"}, {"gfx700"}, GK_GFX700, FEATURE_NONE},
123 {{"kaveri"}, {"gfx700"}, GK_GFX700, FEATURE_NONE},
124 {{"gfx701"}, {"gfx701"}, GK_GFX701, FEATURE_FAST_FMA_F32},
125 {{"hawaii"}, {"gfx701"}, GK_GFX701, FEATURE_FAST_FMA_F32},
126 {{"gfx702"}, {"gfx702"}, GK_GFX702, FEATURE_FAST_FMA_F32},
127 {{"gfx703"}, {"gfx703"}, GK_GFX703, FEATURE_NONE},
128 {{"kabini"}, {"gfx703"}, GK_GFX703, FEATURE_NONE},
129 {{"mullins"}, {"gfx703"}, GK_GFX703, FEATURE_NONE},
130 {{"gfx704"}, {"gfx704"}, GK_GFX704, FEATURE_NONE},
131 {{"bonaire"}, {"gfx704"}, GK_GFX704, FEATURE_NONE},
132 {{"gfx705"}, {"gfx705"}, GK_GFX705, FEATURE_NONE},
135 {{"gfx802"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32},
136 {{"iceland"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32},
137 {{"tonga"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32},
138 {{"gfx803"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32},
139 {{"fiji"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32},
140 {{"polaris10"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32},
141 {{"polaris11"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32},
142 {{"gfx805"}, {"gfx805"}, GK_GFX805, FEATURE_FAST_DENORMAL_F32},
143 {{"tongapro"}, {"gfx805"}, GK_GFX805, FEATURE_FAST_DENORMAL_F32},
144 {{"gfx810"}, {"gfx810"}, GK_GFX810, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
145 {{"stoney"}, {"gfx810"}, GK_GFX810, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
181
188 {{"gfx12-5-generic"}, {"gfx12-5-generic"}, GK_GFX12_5_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
189 // clang-format on
190};
191
192const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {
193 GPUInfo Search = { {""}, {""}, AK, AMDGPU::FEATURE_NONE };
194
195 auto I =
196 llvm::lower_bound(Table, Search, [](const GPUInfo &A, const GPUInfo &B) {
197 return A.Kind < B.Kind;
198 });
199
200 if (I == Table.end() || I->Kind != Search.Kind)
201 return nullptr;
202 return I;
203}
204
205} // namespace
206
208 switch (AK) {
211 return "gfx9";
214 return "gfx10";
216 return "gfx11";
219 return "gfx12";
220 default: {
221 StringRef ArchName = getArchNameAMDGCN(AK);
222 return ArchName.empty() ? "" : ArchName.drop_back(2);
223 }
224 }
225}
226
228 if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs))
229 return Entry->CanonicalName;
230 return "";
231}
232
234 if (const auto *Entry = getArchEntry(AK, R600GPUs))
235 return Entry->CanonicalName;
236 return "";
237}
238
240 for (const auto &C : AMDGCNGPUs) {
241 if (CPU == C.Name)
242 return C.Kind;
243 }
244
246}
247
249 for (const auto &C : R600GPUs) {
250 if (CPU == C.Name)
251 return C.Kind;
252 }
253
255}
256
258 if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs))
259 return Entry->Features;
260 return FEATURE_NONE;
261}
262
264 if (const auto *Entry = getArchEntry(AK, R600GPUs))
265 return Entry->Features;
266 return FEATURE_NONE;
267}
268
270 // XXX: Should this only report unique canonical names?
271 for (const auto &C : AMDGCNGPUs)
272 Values.push_back(C.Name);
273}
274
276 for (const auto &C : R600GPUs)
277 Values.push_back(C.Name);
278}
279
282 if (AK == AMDGPU::GPUKind::GK_NONE) {
283 if (GPU == "generic-hsa")
284 return {7, 0, 0};
285 if (GPU == "generic")
286 return {6, 0, 0};
287 return {0, 0, 0};
288 }
289
290 // clang-format off
291 switch (AK) {
292 case GK_GFX600: return {6, 0, 0};
293 case GK_GFX601: return {6, 0, 1};
294 case GK_GFX602: return {6, 0, 2};
295 case GK_GFX700: return {7, 0, 0};
296 case GK_GFX701: return {7, 0, 1};
297 case GK_GFX702: return {7, 0, 2};
298 case GK_GFX703: return {7, 0, 3};
299 case GK_GFX704: return {7, 0, 4};
300 case GK_GFX705: return {7, 0, 5};
301 case GK_GFX801: return {8, 0, 1};
302 case GK_GFX802: return {8, 0, 2};
303 case GK_GFX803: return {8, 0, 3};
304 case GK_GFX805: return {8, 0, 5};
305 case GK_GFX810: return {8, 1, 0};
306 case GK_GFX900: return {9, 0, 0};
307 case GK_GFX902: return {9, 0, 2};
308 case GK_GFX904: return {9, 0, 4};
309 case GK_GFX906: return {9, 0, 6};
310 case GK_GFX908: return {9, 0, 8};
311 case GK_GFX909: return {9, 0, 9};
312 case GK_GFX90A: return {9, 0, 10};
313 case GK_GFX90C: return {9, 0, 12};
314 case GK_GFX942: return {9, 4, 2};
315 case GK_GFX950: return {9, 5, 0};
316 case GK_GFX1010: return {10, 1, 0};
317 case GK_GFX1011: return {10, 1, 1};
318 case GK_GFX1012: return {10, 1, 2};
319 case GK_GFX1013: return {10, 1, 3};
320 case GK_GFX1030: return {10, 3, 0};
321 case GK_GFX1031: return {10, 3, 1};
322 case GK_GFX1032: return {10, 3, 2};
323 case GK_GFX1033: return {10, 3, 3};
324 case GK_GFX1034: return {10, 3, 4};
325 case GK_GFX1035: return {10, 3, 5};
326 case GK_GFX1036: return {10, 3, 6};
327 case GK_GFX1100: return {11, 0, 0};
328 case GK_GFX1101: return {11, 0, 1};
329 case GK_GFX1102: return {11, 0, 2};
330 case GK_GFX1103: return {11, 0, 3};
331 case GK_GFX1150: return {11, 5, 0};
332 case GK_GFX1151: return {11, 5, 1};
333 case GK_GFX1152: return {11, 5, 2};
334 case GK_GFX1153: return {11, 5, 3};
335 case GK_GFX1170: return {11, 7, 0};
336 case GK_GFX1200: return {12, 0, 0};
337 case GK_GFX1201: return {12, 0, 1};
338 case GK_GFX1250: return {12, 5, 0};
339 case GK_GFX1251: return {12, 5, 1};
340 case GK_GFX1310: return {13, 1, 0};
341
342 // Generic targets return the lowest common denominator
343 // within their family. That is, the ISA that is the most
344 // restricted in terms of features.
345 //
346 // gfx9-generic is tricky because there is no lowest
347 // common denominator, so we return gfx900 which has mad-mix
348 // but this family doesn't have it.
349 //
350 // This API should never be used to check for a particular
351 // feature anyway.
352 //
353 // TODO: Split up this API depending on its caller so
354 // generic target handling is more obvious and less risky.
355 case GK_GFX9_GENERIC: return {9, 0, 0};
356 case GK_GFX9_4_GENERIC: return {9, 4, 0};
357 case GK_GFX10_1_GENERIC: return {10, 1, 0};
358 case GK_GFX10_3_GENERIC: return {10, 3, 0};
359 case GK_GFX11_GENERIC: return {11, 0, 3};
360 case GK_GFX12_GENERIC: return {12, 0, 0};
361 case GK_GFX12_5_GENERIC: return {12, 5, 0};
362 default: return {0, 0, 0};
363 }
364 // clang-format on
365}
366
368 assert(T.isAMDGPU());
369 auto ProcKind = T.isAMDGCN() ? parseArchAMDGCN(Arch) : parseArchR600(Arch);
370 if (ProcKind == GK_NONE)
371 return StringRef();
372
373 return T.isAMDGCN() ? getArchNameAMDGCN(ProcKind) : getArchNameR600(ProcKind);
374}
375
376static std::pair<FeatureError, StringRef>
378 const StringMap<bool> &DefaultFeatures,
379 StringMap<bool> &Features) {
380 const bool IsNullGPU = GPU.empty();
381 const bool TargetHasWave32 = DefaultFeatures.count("wavefrontsize32");
382 const bool TargetHasWave64 = DefaultFeatures.count("wavefrontsize64");
383
384 auto Wave32Itr = Features.find("wavefrontsize32");
385 auto Wave64Itr = Features.find("wavefrontsize64");
386 const bool EnableWave32 =
387 Wave32Itr != Features.end() && Wave32Itr->getValue();
388 const bool EnableWave64 =
389 Wave64Itr != Features.end() && Wave64Itr->getValue();
390 const bool DisableWave32 =
391 Wave32Itr != Features.end() && !Wave32Itr->getValue();
392 const bool DisableWave64 =
393 Wave64Itr != Features.end() && !Wave64Itr->getValue();
394
395 if (EnableWave32 && EnableWave64)
397 "'+wavefrontsize32' and '+wavefrontsize64' are mutually exclusive"};
398 if (DisableWave32 && DisableWave64)
400 "'-wavefrontsize32' and '-wavefrontsize64' are mutually exclusive"};
401
402 if (!IsNullGPU) {
403 if (TargetHasWave64) {
404 if (EnableWave32)
405 return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "+wavefrontsize32"};
406 if (DisableWave64)
407 return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "-wavefrontsize64"};
408 }
409
410 if (TargetHasWave32) {
411 if (EnableWave64)
412 return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "+wavefrontsize64"};
413 if (DisableWave32)
414 return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "-wavefrontsize32"};
415 }
416 }
417
418 // Don't assume any wavesize with an unknown subtarget.
419 // Default to wave32 if target supports both.
420 if (!IsNullGPU && !EnableWave32 && !EnableWave64 && !TargetHasWave32 &&
421 !TargetHasWave64)
422 Features.insert(std::make_pair("wavefrontsize32", true));
423
424 for (const auto &Entry : DefaultFeatures) {
425 if (!Features.count(Entry.getKey()))
426 Features[Entry.getKey()] = Entry.getValue();
427 }
428
429 return {NO_ERROR, StringRef()};
430}
431
432/// Fills Features map with default values for given target GPU.
433/// \p Features contains overriding target features and this function returns
434/// default target features with entries overridden by \p Features.
435static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
436 StringMap<bool> &Features) {
438 switch (Kind) {
439 case GK_GFX1310:
440 case GK_GFX1251:
441 case GK_GFX1250:
443 Features["ci-insts"] = true;
444 Features["dot7-insts"] = true;
445 Features["dot8-insts"] = true;
446 Features["dl-insts"] = true;
447 Features["16-bit-insts"] = true;
448 Features["dpp"] = true;
449 Features["gfx8-insts"] = true;
450 Features["gfx9-insts"] = true;
451 Features["gfx10-insts"] = true;
452 Features["gfx10-3-insts"] = true;
453 Features["gfx11-insts"] = true;
454 Features["gfx12-insts"] = true;
455 Features["gfx1250-insts"] = true;
456 Features["bitop3-insts"] = true;
457 Features["prng-inst"] = true;
458 Features["tanh-insts"] = true;
459 Features["tensor-cvt-lut-insts"] = true;
460 Features["transpose-load-f4f6-insts"] = true;
461 Features["bf16-trans-insts"] = true;
462 Features["bf16-cvt-insts"] = true;
463 Features["bf16-pk-insts"] = true;
464 Features["fp8-conversion-insts"] = true;
465 Features["fp8e5m3-insts"] = true;
466 Features["permlane16-swap"] = true;
467 Features["ashr-pk-insts"] = true;
468 Features["add-min-max-insts"] = true;
469 Features["pk-add-min-max-insts"] = true;
470 Features["atomic-buffer-pk-add-bf16-inst"] = true;
471 Features["vmem-pref-insts"] = true;
472 Features["atomic-fadd-rtn-insts"] = true;
473 Features["atomic-buffer-global-pk-add-f16-insts"] = true;
474 Features["atomic-flat-pk-add-16-insts"] = true;
475 Features["atomic-global-pk-add-bf16-inst"] = true;
476 Features["atomic-ds-pk-add-16-insts"] = true;
477 Features["setprio-inc-wg-inst"] = true;
478 Features["s-wakeup-barrier-inst"] = true;
479 Features["atomic-fmin-fmax-global-f32"] = true;
480 Features["atomic-fmin-fmax-global-f64"] = true;
481 Features["wavefrontsize32"] = true;
482 Features["clusters"] = true;
483 Features["mcast-load-insts"] = true;
484 Features["cube-insts"] = true;
485 Features["lerp-inst"] = true;
486 Features["sad-insts"] = true;
487 Features["qsad-insts"] = true;
488 Features["cvt-pknorm-vop2-insts"] = true;
489 break;
490 case GK_GFX1201:
491 case GK_GFX1200:
492 case GK_GFX12_GENERIC:
493 Features["ci-insts"] = true;
494 Features["dot7-insts"] = true;
495 Features["dot8-insts"] = true;
496 Features["dot9-insts"] = true;
497 Features["dot10-insts"] = true;
498 Features["dot11-insts"] = true;
499 Features["dot12-insts"] = true;
500 Features["dl-insts"] = true;
501 Features["atomic-ds-pk-add-16-insts"] = true;
502 Features["atomic-flat-pk-add-16-insts"] = true;
503 Features["atomic-buffer-global-pk-add-f16-insts"] = true;
504 Features["atomic-buffer-pk-add-bf16-inst"] = true;
505 Features["atomic-global-pk-add-bf16-inst"] = true;
506 Features["16-bit-insts"] = true;
507 Features["dpp"] = true;
508 Features["gfx8-insts"] = true;
509 Features["gfx9-insts"] = true;
510 Features["gfx10-insts"] = true;
511 Features["gfx10-3-insts"] = true;
512 Features["gfx11-insts"] = true;
513 Features["gfx12-insts"] = true;
514 Features["atomic-fadd-rtn-insts"] = true;
515 Features["image-insts"] = true;
516 Features["cube-insts"] = true;
517 Features["lerp-inst"] = true;
518 Features["sad-insts"] = true;
519 Features["qsad-insts"] = true;
520 Features["cvt-pknorm-vop2-insts"] = true;
521 Features["fp8-conversion-insts"] = true;
522 Features["wmma-128b-insts"] = true;
523 Features["atomic-fmin-fmax-global-f32"] = true;
524 break;
525 case GK_GFX1170:
526 Features["ci-insts"] = true;
527 Features["dot7-insts"] = true;
528 Features["dot8-insts"] = true;
529 Features["dot9-insts"] = true;
530 Features["dot10-insts"] = true;
531 Features["dot12-insts"] = true;
532 Features["dl-insts"] = true;
533 Features["16-bit-insts"] = true;
534 Features["dpp"] = true;
535 Features["gfx8-insts"] = true;
536 Features["gfx9-insts"] = true;
537 Features["gfx10-insts"] = true;
538 Features["gfx10-3-insts"] = true;
539 Features["gfx11-insts"] = true;
540 Features["atomic-fadd-rtn-insts"] = true;
541 Features["image-insts"] = true;
542 Features["cube-insts"] = true;
543 Features["lerp-inst"] = true;
544 Features["sad-insts"] = true;
545 Features["qsad-insts"] = true;
546 Features["cvt-pknorm-vop2-insts"] = true;
547 Features["gws"] = true;
548 Features["dot11-insts"] = true;
549 Features["fp8-conversion-insts"] = true;
550 Features["wmma-128b-insts"] = true;
551 Features["atomic-fmin-fmax-global-f32"] = true;
552 break;
553 case GK_GFX1153:
554 case GK_GFX1152:
555 case GK_GFX1151:
556 case GK_GFX1150:
557 case GK_GFX1103:
558 case GK_GFX1102:
559 case GK_GFX1101:
560 case GK_GFX1100:
561 case GK_GFX11_GENERIC:
562 Features["ci-insts"] = true;
563 Features["dot5-insts"] = true;
564 Features["dot7-insts"] = true;
565 Features["dot8-insts"] = true;
566 Features["dot9-insts"] = true;
567 Features["dot10-insts"] = true;
568 Features["dot12-insts"] = true;
569 Features["dl-insts"] = true;
570 Features["16-bit-insts"] = true;
571 Features["dpp"] = true;
572 Features["gfx8-insts"] = true;
573 Features["gfx9-insts"] = true;
574 Features["gfx10-insts"] = true;
575 Features["gfx10-3-insts"] = true;
576 Features["gfx11-insts"] = true;
577 Features["atomic-fadd-rtn-insts"] = true;
578 Features["image-insts"] = true;
579 Features["cube-insts"] = true;
580 Features["lerp-inst"] = true;
581 Features["sad-insts"] = true;
582 Features["qsad-insts"] = true;
583 Features["cvt-pknorm-vop2-insts"] = true;
584 Features["gws"] = true;
585 Features["wmma-256b-insts"] = true;
586 Features["atomic-fmin-fmax-global-f32"] = true;
587 break;
588 case GK_GFX1036:
589 case GK_GFX1035:
590 case GK_GFX1034:
591 case GK_GFX1033:
592 case GK_GFX1032:
593 case GK_GFX1031:
594 case GK_GFX1030:
596 Features["ci-insts"] = true;
597 Features["dot1-insts"] = true;
598 Features["dot2-insts"] = true;
599 Features["dot5-insts"] = true;
600 Features["dot6-insts"] = true;
601 Features["dot7-insts"] = true;
602 Features["dot10-insts"] = true;
603 Features["dl-insts"] = true;
604 Features["16-bit-insts"] = true;
605 Features["dpp"] = true;
606 Features["gfx8-insts"] = true;
607 Features["gfx9-insts"] = true;
608 Features["gfx10-insts"] = true;
609 Features["gfx10-3-insts"] = true;
610 Features["image-insts"] = true;
611 Features["s-memrealtime"] = true;
612 Features["s-memtime-inst"] = true;
613 Features["gws"] = true;
614 Features["vmem-to-lds-load-insts"] = true;
615 Features["atomic-fmin-fmax-global-f32"] = true;
616 Features["atomic-fmin-fmax-global-f64"] = true;
617 Features["cube-insts"] = true;
618 Features["lerp-inst"] = true;
619 Features["sad-insts"] = true;
620 Features["qsad-insts"] = true;
621 Features["cvt-pknorm-vop2-insts"] = true;
622 break;
623 case GK_GFX1012:
624 case GK_GFX1011:
625 Features["dot1-insts"] = true;
626 Features["dot2-insts"] = true;
627 Features["dot5-insts"] = true;
628 Features["dot6-insts"] = true;
629 Features["dot7-insts"] = true;
630 Features["dot10-insts"] = true;
631 [[fallthrough]];
632 case GK_GFX1013:
633 case GK_GFX1010:
635 Features["dl-insts"] = true;
636 Features["ci-insts"] = true;
637 Features["16-bit-insts"] = true;
638 Features["dpp"] = true;
639 Features["gfx8-insts"] = true;
640 Features["gfx9-insts"] = true;
641 Features["gfx10-insts"] = true;
642 Features["image-insts"] = true;
643 Features["s-memrealtime"] = true;
644 Features["s-memtime-inst"] = true;
645 Features["gws"] = true;
646 Features["vmem-to-lds-load-insts"] = true;
647 Features["atomic-fmin-fmax-global-f32"] = true;
648 Features["atomic-fmin-fmax-global-f64"] = true;
649 Features["cube-insts"] = true;
650 Features["lerp-inst"] = true;
651 Features["sad-insts"] = true;
652 Features["qsad-insts"] = true;
653 Features["cvt-pknorm-vop2-insts"] = true;
654 break;
655 case GK_GFX950:
656 Features["bitop3-insts"] = true;
657 Features["fp6bf6-cvt-scale-insts"] = true;
658 Features["fp4-cvt-scale-insts"] = true;
659 Features["bf8-cvt-scale-insts"] = true;
660 Features["fp8-cvt-scale-insts"] = true;
661 Features["f16bf16-to-fp6bf6-cvt-scale-insts"] = true;
662 Features["f32-to-f16bf16-cvt-sr-insts"] = true;
663 Features["prng-inst"] = true;
664 Features["permlane16-swap"] = true;
665 Features["permlane32-swap"] = true;
666 Features["ashr-pk-insts"] = true;
667 Features["dot12-insts"] = true;
668 Features["dot13-insts"] = true;
669 Features["atomic-buffer-pk-add-bf16-inst"] = true;
670 Features["gfx950-insts"] = true;
671 [[fallthrough]];
672 case GK_GFX942:
673 Features["fp8-insts"] = true;
674 Features["fp8-conversion-insts"] = true;
675 if (Kind != GK_GFX950)
676 Features["xf32-insts"] = true;
677 [[fallthrough]];
679 Features["gfx940-insts"] = true;
680 Features["atomic-ds-pk-add-16-insts"] = true;
681 Features["atomic-flat-pk-add-16-insts"] = true;
682 Features["atomic-global-pk-add-bf16-inst"] = true;
683 Features["gfx90a-insts"] = true;
684 Features["atomic-buffer-global-pk-add-f16-insts"] = true;
685 Features["atomic-fadd-rtn-insts"] = true;
686 Features["dot3-insts"] = true;
687 Features["dot4-insts"] = true;
688 Features["dot5-insts"] = true;
689 Features["dot6-insts"] = true;
690 Features["mai-insts"] = true;
691 Features["dl-insts"] = true;
692 Features["dot1-insts"] = true;
693 Features["dot2-insts"] = true;
694 Features["dot7-insts"] = true;
695 Features["dot10-insts"] = true;
696 Features["gfx9-insts"] = true;
697 Features["gfx8-insts"] = true;
698 Features["16-bit-insts"] = true;
699 Features["dpp"] = true;
700 Features["s-memrealtime"] = true;
701 Features["ci-insts"] = true;
702 Features["s-memtime-inst"] = true;
703 Features["gws"] = true;
704 Features["vmem-to-lds-load-insts"] = true;
705 Features["atomic-fmin-fmax-global-f64"] = true;
706 Features["wavefrontsize64"] = true;
707 Features["cube-insts"] = true;
708 Features["lerp-inst"] = true;
709 Features["sad-insts"] = true;
710 Features["qsad-insts"] = true;
711 Features["cvt-pknorm-vop2-insts"] = true;
712 break;
713 case GK_GFX90A:
714 Features["gfx90a-insts"] = true;
715 Features["atomic-buffer-global-pk-add-f16-insts"] = true;
716 Features["atomic-fadd-rtn-insts"] = true;
717 Features["atomic-fmin-fmax-global-f64"] = true;
718 [[fallthrough]];
719 case GK_GFX908:
720 Features["dot3-insts"] = true;
721 Features["dot4-insts"] = true;
722 Features["dot5-insts"] = true;
723 Features["dot6-insts"] = true;
724 Features["mai-insts"] = true;
725 [[fallthrough]];
726 case GK_GFX906:
727 Features["dl-insts"] = true;
728 Features["dot1-insts"] = true;
729 Features["dot2-insts"] = true;
730 Features["dot7-insts"] = true;
731 Features["dot10-insts"] = true;
732 [[fallthrough]];
733 case GK_GFX90C:
734 case GK_GFX909:
735 case GK_GFX904:
736 case GK_GFX902:
737 case GK_GFX900:
738 case GK_GFX9_GENERIC:
739 Features["gfx9-insts"] = true;
740 Features["vmem-to-lds-load-insts"] = true;
741 [[fallthrough]];
742 case GK_GFX810:
743 case GK_GFX805:
744 case GK_GFX803:
745 case GK_GFX802:
746 case GK_GFX801:
747 Features["gfx8-insts"] = true;
748 Features["16-bit-insts"] = true;
749 Features["dpp"] = true;
750 Features["s-memrealtime"] = true;
751 Features["ci-insts"] = true;
752 Features["image-insts"] = true;
753 Features["s-memtime-inst"] = true;
754 Features["gws"] = true;
755 Features["wavefrontsize64"] = true;
756 Features["cube-insts"] = true;
757 Features["lerp-inst"] = true;
758 Features["sad-insts"] = true;
759 Features["qsad-insts"] = true;
760 Features["cvt-pknorm-vop2-insts"] = true;
761 break;
762 case GK_GFX705:
763 case GK_GFX704:
764 case GK_GFX703:
765 case GK_GFX702:
766 case GK_GFX701:
767 case GK_GFX700:
768 Features["ci-insts"] = true;
769 Features["cube-insts"] = true;
770 Features["lerp-inst"] = true;
771 Features["sad-insts"] = true;
772 Features["qsad-insts"] = true;
773 Features["cvt-pknorm-vop2-insts"] = true;
774 Features["image-insts"] = true;
775 Features["s-memtime-inst"] = true;
776 Features["gws"] = true;
777 Features["atomic-fmin-fmax-global-f32"] = true;
778 Features["atomic-fmin-fmax-global-f64"] = true;
779 Features["wavefrontsize64"] = true;
780 break;
781 case GK_GFX602:
782 case GK_GFX601:
783 case GK_GFX600:
784 Features["image-insts"] = true;
785 Features["s-memtime-inst"] = true;
786 Features["gws"] = true;
787 Features["atomic-fmin-fmax-global-f32"] = true;
788 Features["atomic-fmin-fmax-global-f64"] = true;
789 Features["wavefrontsize64"] = true;
790 Features["cube-insts"] = true;
791 Features["lerp-inst"] = true;
792 Features["sad-insts"] = true;
793 Features["cvt-pknorm-vop2-insts"] = true;
794 break;
795 case GK_NONE:
796 break;
797 default:
798 llvm_unreachable("Unhandled GPU!");
799 }
800}
801
802/// Fills Features map with default values for given target GPU.
803/// \p Features contains overriding target features and this function returns
804/// default target features with entries overridden by \p Features.
805std::pair<FeatureError, StringRef>
807 StringMap<bool> &Features) {
808 // XXX - What does the member GPU mean if device name string passed here?
809 if (T.isSPIRV() && T.getOS() == Triple::OSType::AMDHSA) {
810 // AMDGCN SPIRV must support the union of all AMDGCN features. This list
811 // should be kept in sorted order and updated whenever new features are
812 // added.
813 Features["16-bit-insts"] = true;
814 Features["ashr-pk-insts"] = true;
815 Features["atomic-buffer-pk-add-bf16-inst"] = true;
816 Features["atomic-buffer-global-pk-add-f16-insts"] = true;
817 Features["atomic-ds-pk-add-16-insts"] = true;
818 Features["atomic-fadd-rtn-insts"] = true;
819 Features["atomic-flat-pk-add-16-insts"] = true;
820 Features["atomic-global-pk-add-bf16-inst"] = true;
821 Features["bf16-trans-insts"] = true;
822 Features["bf16-cvt-insts"] = true;
823 Features["bf8-cvt-scale-insts"] = true;
824 Features["bitop3-insts"] = true;
825 Features["ci-insts"] = true;
826 Features["dl-insts"] = true;
827 Features["dot1-insts"] = true;
828 Features["dot2-insts"] = true;
829 Features["dot3-insts"] = true;
830 Features["dot4-insts"] = true;
831 Features["dot5-insts"] = true;
832 Features["dot6-insts"] = true;
833 Features["dot7-insts"] = true;
834 Features["dot8-insts"] = true;
835 Features["dot9-insts"] = true;
836 Features["dot10-insts"] = true;
837 Features["dot11-insts"] = true;
838 Features["dot12-insts"] = true;
839 Features["dot13-insts"] = true;
840 Features["dpp"] = true;
841 Features["f16bf16-to-fp6bf6-cvt-scale-insts"] = true;
842 Features["f32-to-f16bf16-cvt-sr-insts"] = true;
843 Features["fp4-cvt-scale-insts"] = true;
844 Features["fp6bf6-cvt-scale-insts"] = true;
845 Features["fp8e5m3-insts"] = true;
846 Features["fp8-conversion-insts"] = true;
847 Features["fp8-cvt-scale-insts"] = true;
848 Features["fp8-insts"] = true;
849 Features["gfx8-insts"] = true;
850 Features["gfx9-insts"] = true;
851 Features["gfx90a-insts"] = true;
852 Features["gfx940-insts"] = true;
853 Features["gfx950-insts"] = true;
854 Features["gfx10-insts"] = true;
855 Features["gfx10-3-insts"] = true;
856 Features["gfx11-insts"] = true;
857 Features["gfx12-insts"] = true;
858 Features["gfx1250-insts"] = true;
859 Features["gws"] = true;
860 Features["image-insts"] = true;
861 Features["mai-insts"] = true;
862 Features["permlane16-swap"] = true;
863 Features["permlane32-swap"] = true;
864 Features["prng-inst"] = true;
865 Features["setprio-inc-wg-inst"] = true;
866 Features["s-memrealtime"] = true;
867 Features["s-memtime-inst"] = true;
868 Features["tanh-insts"] = true;
869 Features["tensor-cvt-lut-insts"] = true;
870 Features["transpose-load-f4f6-insts"] = true;
871 Features["vmem-pref-insts"] = true;
872 Features["vmem-to-lds-load-insts"] = true;
873 Features["wavefrontsize32"] = true;
874 Features["wavefrontsize64"] = true;
875 } else if (T.isAMDGCN()) {
876 StringMap<bool> DefaultFeatures;
877 fillAMDGCNFeatureMap(GPU, T, DefaultFeatures);
878 return insertWaveSizeFeature(GPU, T, DefaultFeatures, Features);
879 } else {
880 if (GPU.empty())
881 GPU = "r600";
882
883 switch (llvm::AMDGPU::parseArchR600(GPU)) {
884 case GK_CAYMAN:
885 case GK_CYPRESS:
886 case GK_RV770:
887 case GK_RV670:
888 // TODO: Add fp64 when implemented.
889 break;
890 case GK_TURKS:
891 case GK_CAICOS:
892 case GK_BARTS:
893 case GK_SUMO:
894 case GK_REDWOOD:
895 case GK_JUNIPER:
896 case GK_CEDAR:
897 case GK_RV730:
898 case GK_RV710:
899 case GK_RS880:
900 case GK_R630:
901 case GK_R600:
902 break;
903 default:
904 llvm_unreachable("Unhandled GPU!");
905 }
906 }
907 return {NO_ERROR, StringRef()};
908}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, StringMap< bool > &Features)
Fills Features map with default values for given target GPU.
static void setImpliedBits(FeatureBitset &Bits, const FeatureBitset &Implies, ArrayRef< BasicSubtargetFeatureKV > FeatureTable)
For each feature that is (transitively) implied by this feature, set it.
static std::pair< FeatureError, StringRef > insertWaveSizeFeature(StringRef GPU, const Triple &T, const StringMap< bool > &DefaultFeatures, StringMap< bool > &Features)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:131
const FeatureBitset & getAsBitset() const
Container class for subtarget features.
constexpr bool test(unsigned I) const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
Definition StringMap.h:133
iterator end()
Definition StringMap.h:224
iterator find(StringRef Key)
Definition StringMap.h:237
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition StringMap.h:285
bool insert(MapEntryTy *KeyValue)
insert - Insert the specified key/value pair into the map.
Definition StringMap.h:321
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:140
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Definition StringRef.h:636
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI StringRef getArchNameR600(GPUKind AK)
GPUKind
GPU kinds supported by the AMDGPU target.
LLVM_ABI StringRef getCanonicalArchName(const Triple &T, StringRef Arch)
LLVM_ABI void fillValidArchListR600(SmallVectorImpl< StringRef > &Values)
LLVM_ABI StringRef getArchFamilyNameAMDGCN(GPUKind AK)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
LLVM_ABI void fillValidArchListAMDGCN(SmallVectorImpl< StringRef > &Values)
LLVM_ABI GPUKind parseArchAMDGCN(StringRef CPU)
@ UNSUPPORTED_TARGET_FEATURE
@ INVALID_FEATURE_COMBINATION
@ FEATURE_FAST_DENORMAL_F32
LLVM_ABI std::pair< FeatureError, StringRef > fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, StringMap< bool > &Features)
Fills Features map with default values for given target GPU.
LLVM_ABI StringRef getArchNameAMDGCN(GPUKind AK)
LLVM_ABI unsigned getArchAttrAMDGCN(GPUKind AK)
LLVM_ABI unsigned getArchAttrR600(GPUKind AK)
LLVM_ABI GPUKind parseArchR600(StringRef CPU)
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1765
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2052
LLVM_ABI std::optional< llvm::StringMap< bool > > getCPUDefaultTargetFeatures(StringRef CPU, ArrayRef< BasicSubtargetSubTypeKV > ProcDesc, ArrayRef< BasicSubtargetFeatureKV > ProcFeatures)
Instruction set architecture version.
Used to provide key value pairs for feature and CPU bit flags.
FeatureBitArray Implies
K-V bit mask.