LLVM 20.0.0git
NVPTXSubtarget.h
Go to the documentation of this file.
1//=====-- NVPTXSubtarget.h - Define Subtarget for the NVPTX ---*- C++ -*--====//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file declares the NVPTX specific subclass of TargetSubtarget.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXSUBTARGET_H
14#define LLVM_LIB_TARGET_NVPTX_NVPTXSUBTARGET_H
15
16#include "NVPTX.h"
17#include "NVPTXFrameLowering.h"
18#include "NVPTXISelLowering.h"
19#include "NVPTXInstrInfo.h"
20#include "NVPTXRegisterInfo.h"
22#include "llvm/IR/DataLayout.h"
23#include <string>
24
25#define GET_SUBTARGETINFO_HEADER
26#include "NVPTXGenSubtargetInfo.inc"
27
28namespace llvm {
29
31 virtual void anchor();
32 std::string TargetName;
33
34 // PTX version x.y is represented as 10*x+y, e.g. 3.1 == 31
35 unsigned PTXVersion;
36
37 // Full SM version x.y is represented as 100*x+10*y+feature, e.g. 3.1 == 310
38 // sm_90a == 901
39 unsigned int FullSmVersion;
40
41 // SM version x.y is represented as 10*x+y, e.g. 3.1 == 31. Derived from
42 // FullSmVersion.
43 unsigned int SmVersion;
44
45 NVPTXInstrInfo InstrInfo;
47 std::unique_ptr<const SelectionDAGTargetInfo> TSInfo;
48
49 // NVPTX does not have any call stack frame, but need a NVPTX specific
50 // FrameLowering class because TargetFrameLowering is abstract.
51 NVPTXFrameLowering FrameLowering;
52
53public:
54 /// This constructor initializes the data members to match that
55 /// of the specified module.
56 ///
57 NVPTXSubtarget(const Triple &TT, const std::string &CPU,
58 const std::string &FS, const NVPTXTargetMachine &TM);
59
60 ~NVPTXSubtarget() override;
61
62 const TargetFrameLowering *getFrameLowering() const override {
63 return &FrameLowering;
64 }
65 const NVPTXInstrInfo *getInstrInfo() const override { return &InstrInfo; }
66 const NVPTXRegisterInfo *getRegisterInfo() const override {
67 return &InstrInfo.getRegisterInfo();
68 }
69 const NVPTXTargetLowering *getTargetLowering() const override {
70 return &TLInfo;
71 }
72
73 const SelectionDAGTargetInfo *getSelectionDAGInfo() const override;
74
75 bool hasAtomAddF64() const { return SmVersion >= 60; }
76 bool hasAtomScope() const { return SmVersion >= 60; }
77 bool hasAtomBitwise64() const { return SmVersion >= 32; }
78 bool hasAtomMinMax64() const { return SmVersion >= 32; }
79 bool hasAtomCas16() const { return SmVersion >= 70 && PTXVersion >= 63; }
80 bool hasClusters() const { return SmVersion >= 90 && PTXVersion >= 78; }
81 bool hasLDG() const { return SmVersion >= 32; }
82 bool hasHWROT32() const { return SmVersion >= 32; }
83 bool hasFP16Math() const { return SmVersion >= 53; }
84 bool hasBF16Math() const { return SmVersion >= 80; }
85 bool allowFP16Math() const;
86 bool hasMaskOperator() const { return PTXVersion >= 71; }
87 bool hasNoReturn() const { return SmVersion >= 30 && PTXVersion >= 64; }
88 // Does SM & PTX support memory orderings (weak and atomic: relaxed, acquire,
89 // release, acq_rel, sc) ?
90 bool hasMemoryOrdering() const { return SmVersion >= 70 && PTXVersion >= 60; }
91 // Does SM & PTX support atomic relaxed MMIO operations ?
92 bool hasRelaxedMMIO() const { return SmVersion >= 70 && PTXVersion >= 82; }
93 bool hasDotInstructions() const {
94 return SmVersion >= 61 && PTXVersion >= 50;
95 }
96 // Prior to CUDA 12.3 ptxas did not recognize that the trap instruction
97 // terminates a basic block. Instead, it would assume that control flow
98 // continued to the next instruction. The next instruction could be in the
99 // block that's lexically below it. This would lead to a phantom CFG edges
100 // being created within ptxas. This issue was fixed in CUDA 12.3. Thus, when
101 // PTX ISA versions 8.3+ we can confidently say that the bug will not be
102 // present.
103 bool hasPTXASUnreachableBug() const { return PTXVersion < 83; }
104 bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; }
105 unsigned int getFullSmVersion() const { return FullSmVersion; }
106 unsigned int getSmVersion() const { return getFullSmVersion() / 10; }
107 // GPUs with "a" suffix have include architecture-accelerated features that
108 // are supported on the specified architecture only, hence such targets do not
109 // follow the onion layer model. hasAAFeatures() allows distinguishing such
110 // GPU variants from the base GPU architecture.
111 // - 0 represents base GPU model,
112 // - non-zero value identifies particular architecture-accelerated variant.
113 bool hasAAFeatures() const { return getFullSmVersion() % 10; }
114 std::string getTargetName() const { return TargetName; }
115
116 // Get maximum value of required alignments among the supported data types.
117 // From the PTX ISA doc, section 8.2.3:
118 // The memory consistency model relates operations executed on memory
119 // locations with scalar data-types, which have a maximum size and alignment
120 // of 64 bits. Memory operations with a vector data-type are modelled as a
121 // set of equivalent memory operations with a scalar data-type, executed in
122 // an unspecified order on the elements in the vector.
123 unsigned getMaxRequiredAlignment() const { return 8; }
124
125 unsigned getPTXVersion() const { return PTXVersion; }
126
129
130 void failIfClustersUnsupported(std::string const &FailureMessage) const;
131};
132
133} // End llvm namespace
134
135#endif
unsigned SmVersion
Definition: NVVMReflect.cpp:78
const NVPTXRegisterInfo & getRegisterInfo() const
const NVPTXInstrInfo * getInstrInfo() const override
void failIfClustersUnsupported(std::string const &FailureMessage) const
std::string getTargetName() const
unsigned getMaxRequiredAlignment() const
bool hasAtomMinMax64() const
bool hasAtomAddF64() const
bool hasHWROT32() const
bool hasClusters() const
bool hasMaskOperator() const
bool hasAAFeatures() const
const NVPTXTargetLowering * getTargetLowering() const override
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
unsigned getPTXVersion() const
bool hasCvtaParam() const
~NVPTXSubtarget() override
const NVPTXRegisterInfo * getRegisterInfo() const override
unsigned int getFullSmVersion() const
unsigned int getSmVersion() const
bool hasDotInstructions() const
bool hasAtomBitwise64() const
bool hasRelaxedMMIO() const
bool hasBF16Math() const
bool allowFP16Math() const
const TargetFrameLowering * getFrameLowering() const override
bool hasAtomScope() const
bool hasAtomCas16() const
bool hasMemoryOrdering() const
NVPTXSubtarget & initializeSubtargetDependencies(StringRef CPU, StringRef FS)
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
bool hasPTXASUnreachableBug() const
bool hasFP16Math() const
bool hasNoReturn() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
Information about stack frame layout on the target.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18