Bug Summary

File:llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Warning:line 1925, column 13
Value stored to 'RemainderOffset' during its initialization is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AMDGPUISelDAGToDAG.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-13/lib/clang/13.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/build-llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/build-llvm/include -I /build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-13/lib/clang/13.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/build-llvm/lib/Target/AMDGPU -fdebug-prefix-map=/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2021-02-23-121308-24221-1 -x c++ /build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
1//===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8//
9/// \file
10/// Defines an instruction selector for the AMDGPU target.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "AMDGPUTargetMachine.h"
16#include "SIMachineFunctionInfo.h"
17#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
18#include "llvm/Analysis/ValueTracking.h"
19#include "llvm/CodeGen/FunctionLoweringInfo.h"
20#include "llvm/CodeGen/SelectionDAG.h"
21#include "llvm/CodeGen/SelectionDAGISel.h"
22#include "llvm/CodeGen/SelectionDAGNodes.h"
23#include "llvm/IR/IntrinsicsAMDGPU.h"
24#include "llvm/InitializePasses.h"
25
26#ifdef EXPENSIVE_CHECKS
27#include "llvm/Analysis/LoopInfo.h"
28#include "llvm/IR/Dominators.h"
29#endif
30
31#define DEBUG_TYPE"isel" "isel"
32
33using namespace llvm;
34
35namespace llvm {
36
37class R600InstrInfo;
38
39} // end namespace llvm
40
41//===----------------------------------------------------------------------===//
42// Instruction Selector Implementation
43//===----------------------------------------------------------------------===//
44
45namespace {
46
47static bool isNullConstantOrUndef(SDValue V) {
48 if (V.isUndef())
49 return true;
50
51 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
52 return Const != nullptr && Const->isNullValue();
53}
54
55static bool getConstantValue(SDValue N, uint32_t &Out) {
56 // This is only used for packed vectors, where ussing 0 for undef should
57 // always be good.
58 if (N.isUndef()) {
59 Out = 0;
60 return true;
61 }
62
63 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
64 Out = C->getAPIntValue().getSExtValue();
65 return true;
66 }
67
68 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
69 Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
70 return true;
71 }
72
73 return false;
74}
75
76// TODO: Handle undef as zero
77static SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG,
78 bool Negate = false) {
79 assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2)((N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands
() == 2) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2"
, "/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 79, __PRETTY_FUNCTION__))
;
80 uint32_t LHSVal, RHSVal;
81 if (getConstantValue(N->getOperand(0), LHSVal) &&
82 getConstantValue(N->getOperand(1), RHSVal)) {
83 SDLoc SL(N);
84 uint32_t K = Negate ?
85 (-LHSVal & 0xffff) | (-RHSVal << 16) :
86 (LHSVal & 0xffff) | (RHSVal << 16);
87 return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
88 DAG.getTargetConstant(K, SL, MVT::i32));
89 }
90
91 return nullptr;
92}
93
94static SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
95 return packConstantV2I16(N, DAG, true);
96}
97
98/// AMDGPU specific code to select AMDGPU machine instructions for
99/// SelectionDAG operations.
100class AMDGPUDAGToDAGISel : public SelectionDAGISel {
101 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
102 // make the right decision when generating code for different targets.
103 const GCNSubtarget *Subtarget;
104
105 // Default FP mode for the current function.
106 AMDGPU::SIModeRegisterDefaults Mode;
107
108 bool EnableLateStructurizeCFG;
109
110public:
111 explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
112 CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
113 : SelectionDAGISel(*TM, OptLevel) {
114 EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
115 }
116 ~AMDGPUDAGToDAGISel() override = default;
117
118 void getAnalysisUsage(AnalysisUsage &AU) const override {
119 AU.addRequired<AMDGPUArgumentUsageInfo>();
120 AU.addRequired<LegacyDivergenceAnalysis>();
121#ifdef EXPENSIVE_CHECKS
122 AU.addRequired<DominatorTreeWrapperPass>();
123 AU.addRequired<LoopInfoWrapperPass>();
124#endif
125 SelectionDAGISel::getAnalysisUsage(AU);
126 }
127
128 bool matchLoadD16FromBuildVector(SDNode *N) const;
129
130 bool runOnMachineFunction(MachineFunction &MF) override;
131 void PreprocessISelDAG() override;
132 void Select(SDNode *N) override;
133 StringRef getPassName() const override;
134 void PostprocessISelDAG() override;
135
136protected:
137 void SelectBuildVector(SDNode *N, unsigned RegClassID);
138
139private:
140 std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
141 bool isNoNanSrc(SDValue N) const;
142 bool isInlineImmediate(const SDNode *N, bool Negated = false) const;
143 bool isNegInlineImmediate(const SDNode *N) const {
144 return isInlineImmediate(N, true);
145 }
146
147 bool isInlineImmediate16(int64_t Imm) const {
148 return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm());
149 }
150
151 bool isInlineImmediate32(int64_t Imm) const {
152 return AMDGPU::isInlinableLiteral32(Imm, Subtarget->hasInv2PiInlineImm());
153 }
154
155 bool isInlineImmediate64(int64_t Imm) const {
156 return AMDGPU::isInlinableLiteral64(Imm, Subtarget->hasInv2PiInlineImm());
157 }
158
159 bool isInlineImmediate(const APFloat &Imm) const {
160 return Subtarget->getInstrInfo()->isInlineConstant(Imm);
161 }
162
163 bool isVGPRImm(const SDNode *N) const;
164 bool isUniformLoad(const SDNode *N) const;
165 bool isUniformBr(const SDNode *N) const;
166
167 bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
168 SDValue &RHS) const;
169
170 MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
171
172 SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const;
173 SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
174 SDNode *glueCopyToM0LDSInit(SDNode *N) const;
175
176 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
177 virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
178 virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
179 bool isDSOffsetLegal(SDValue Base, unsigned Offset) const;
180 bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1,
181 unsigned Size) const;
182 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
183 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
184 SDValue &Offset1) const;
185 bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
186 SDValue &Offset1) const;
187 bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0,
188 SDValue &Offset1, unsigned Size) const;
189 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
190 SDValue &SOffset, SDValue &Offset, SDValue &Offen,
191 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
192 SDValue &TFE, SDValue &DLC, SDValue &SWZ,
193 SDValue &SCCB) const;
194 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
195 SDValue &SOffset, SDValue &Offset, SDValue &GLC,
196 SDValue &SLC, SDValue &TFE, SDValue &DLC,
197 SDValue &SWZ, SDValue &SCCB) const;
198 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
199 SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
200 SDValue &SLC) const;
201 bool SelectMUBUFScratchOffen(SDNode *Parent,
202 SDValue Addr, SDValue &RSrc, SDValue &VAddr,
203 SDValue &SOffset, SDValue &ImmOffset) const;
204 bool SelectMUBUFScratchOffset(SDNode *Parent,
205 SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
206 SDValue &Offset) const;
207
208 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
209 SDValue &Offset, SDValue &GLC, SDValue &SLC,
210 SDValue &TFE, SDValue &DLC, SDValue &SWZ,
211 SDValue &SCCB) const;
212 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
213 SDValue &Offset, SDValue &SLC) const;
214 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
215 SDValue &Offset) const;
216
217 template <bool IsSigned>
218 bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
219 SDValue &Offset) const;
220 bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
221 SDValue &VOffset, SDValue &Offset) const;
222 bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
223 SDValue &Offset) const;
224
225 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
226 bool &Imm) const;
227 SDValue Expand32BitAddress(SDValue Addr) const;
228 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
229 bool &Imm) const;
230 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
231 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
232 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
233 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
234 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
235 bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
236
237 bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
238 bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,
239 bool AllowAbs = true) const;
240 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
241 bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
242 bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
243 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
244 SDValue &Clamp, SDValue &Omod) const;
245 bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
246 SDValue &Clamp, SDValue &Omod) const;
247 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
248 SDValue &Clamp, SDValue &Omod) const;
249
250 bool SelectVOP3OMods(SDValue In, SDValue &Src,
251 SDValue &Clamp, SDValue &Omod) const;
252
253 bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
254
255 bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
256
257 bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
258 bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
259 bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
260
261 SDValue getHi16Elt(SDValue In) const;
262
263 SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const;
264
265 void SelectADD_SUB_I64(SDNode *N);
266 void SelectAddcSubb(SDNode *N);
267 void SelectUADDO_USUBO(SDNode *N);
268 void SelectDIV_SCALE(SDNode *N);
269 void SelectMAD_64_32(SDNode *N);
270 void SelectFMA_W_CHAIN(SDNode *N);
271 void SelectFMUL_W_CHAIN(SDNode *N);
272
273 SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
274 uint32_t Offset, uint32_t Width);
275 void SelectS_BFEFromShifts(SDNode *N);
276 void SelectS_BFE(SDNode *N);
277 bool isCBranchSCC(const SDNode *N) const;
278 void SelectBRCOND(SDNode *N);
279 void SelectFMAD_FMA(SDNode *N);
280 void SelectATOMIC_CMP_SWAP(SDNode *N);
281 void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
282 void SelectDS_GWS(SDNode *N, unsigned IntrID);
283 void SelectInterpP1F16(SDNode *N);
284 void SelectINTRINSIC_W_CHAIN(SDNode *N);
285 void SelectINTRINSIC_WO_CHAIN(SDNode *N);
286 void SelectINTRINSIC_VOID(SDNode *N);
287
288protected:
289 // Include the pieces autogenerated from the target description.
290#include "AMDGPUGenDAGISel.inc"
291};
292
293class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
294 const R600Subtarget *Subtarget;
295
296 bool isConstantLoad(const MemSDNode *N, int cbID) const;
297 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
298 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
299 SDValue& Offset);
300public:
301 explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
302 AMDGPUDAGToDAGISel(TM, OptLevel) {}
303
304 void Select(SDNode *N) override;
305
306 bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
307 SDValue &Offset) override;
308 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
309 SDValue &Offset) override;
310
311 bool runOnMachineFunction(MachineFunction &MF) override;
312
313 void PreprocessISelDAG() override {}
314
315protected:
316 // Include the pieces autogenerated from the target description.
317#include "R600GenDAGISel.inc"
318};
319
320static SDValue stripBitcast(SDValue Val) {
321 return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
322}
323
324// Figure out if this is really an extract of the high 16-bits of a dword.
325static bool isExtractHiElt(SDValue In, SDValue &Out) {
326 In = stripBitcast(In);
327
328 if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
329 if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
330 if (!Idx->isOne())
331 return false;
332 Out = In.getOperand(0);
333 return true;
334 }
335 }
336
337 if (In.getOpcode() != ISD::TRUNCATE)
338 return false;
339
340 SDValue Srl = In.getOperand(0);
341 if (Srl.getOpcode() == ISD::SRL) {
342 if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
343 if (ShiftAmt->getZExtValue() == 16) {
344 Out = stripBitcast(Srl.getOperand(0));
345 return true;
346 }
347 }
348 }
349
350 return false;
351}
352
353// Look through operations that obscure just looking at the low 16-bits of the
354// same register.
355static SDValue stripExtractLoElt(SDValue In) {
356 if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
357 if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
358 if (Idx->isNullValue() && In.getValueSizeInBits() <= 32)
359 return In.getOperand(0);
360 }
361 }
362
363 if (In.getOpcode() == ISD::TRUNCATE) {
364 SDValue Src = In.getOperand(0);
365 if (Src.getValueType().getSizeInBits() == 32)
366 return stripBitcast(Src);
367 }
368
369 return In;
370}
371
372} // end anonymous namespace
373
374INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",static void *initializeAMDGPUDAGToDAGISelPassOnce(PassRegistry
&Registry) {
375 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)static void *initializeAMDGPUDAGToDAGISelPassOnce(PassRegistry
&Registry) {
376INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)initializeAMDGPUArgumentUsageInfoPass(Registry);
377INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)initializeAMDGPUPerfHintAnalysisPass(Registry);
378INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)initializeLegacyDivergenceAnalysisPass(Registry);
379#ifdef EXPENSIVE_CHECKS
380INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
381INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry);
382#endif
383INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",PassInfo *PI = new PassInfo( "AMDGPU DAG->DAG Pattern Instruction Selection"
, "amdgpu-isel", &AMDGPUDAGToDAGISel::ID, PassInfo::NormalCtor_t
(callDefaultCtor<AMDGPUDAGToDAGISel>), false, false); Registry
.registerPass(*PI, true); return PI; } static llvm::once_flag
InitializeAMDGPUDAGToDAGISelPassFlag; void llvm::initializeAMDGPUDAGToDAGISelPass
(PassRegistry &Registry) { llvm::call_once(InitializeAMDGPUDAGToDAGISelPassFlag
, initializeAMDGPUDAGToDAGISelPassOnce, std::ref(Registry)); }
384 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)PassInfo *PI = new PassInfo( "AMDGPU DAG->DAG Pattern Instruction Selection"
, "amdgpu-isel", &AMDGPUDAGToDAGISel::ID, PassInfo::NormalCtor_t
(callDefaultCtor<AMDGPUDAGToDAGISel>), false, false); Registry
.registerPass(*PI, true); return PI; } static llvm::once_flag
InitializeAMDGPUDAGToDAGISelPassFlag; void llvm::initializeAMDGPUDAGToDAGISelPass
(PassRegistry &Registry) { llvm::call_once(InitializeAMDGPUDAGToDAGISelPassFlag
, initializeAMDGPUDAGToDAGISelPassOnce, std::ref(Registry)); }
385
386/// This pass converts a legalized DAG into a AMDGPU-specific
387// DAG, ready for instruction scheduling.
388FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM,
389 CodeGenOpt::Level OptLevel) {
390 return new AMDGPUDAGToDAGISel(TM, OptLevel);
391}
392
393/// This pass converts a legalized DAG into a R600-specific
394// DAG, ready for instruction scheduling.
395FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
396 CodeGenOpt::Level OptLevel) {
397 return new R600DAGToDAGISel(TM, OptLevel);
398}
399
400bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
401#ifdef EXPENSIVE_CHECKS
402 DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
403 LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
404 for (auto &L : LI->getLoopsInPreorder()) {
405 assert(L->isLCSSAForm(DT))((L->isLCSSAForm(DT)) ? static_cast<void> (0) : __assert_fail
("L->isLCSSAForm(DT)", "/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 405, __PRETTY_FUNCTION__))
;
406 }
407#endif
408 Subtarget = &MF.getSubtarget<GCNSubtarget>();
409 Mode = AMDGPU::SIModeRegisterDefaults(MF.getFunction());
410 return SelectionDAGISel::runOnMachineFunction(MF);
411}
412
413bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const {
414 assert(Subtarget->d16PreservesUnusedBits())((Subtarget->d16PreservesUnusedBits()) ? static_cast<void
> (0) : __assert_fail ("Subtarget->d16PreservesUnusedBits()"
, "/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 414, __PRETTY_FUNCTION__))
;
415 MVT VT = N->getValueType(0).getSimpleVT();
416 if (VT != MVT::v2i16 && VT != MVT::v2f16)
417 return false;
418
419 SDValue Lo = N->getOperand(0);
420 SDValue Hi = N->getOperand(1);
421
422 LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi));
423
424 // build_vector lo, (load ptr) -> load_d16_hi ptr, lo
425 // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo
426 // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo
427
428 // Need to check for possible indirect dependencies on the other half of the
429 // vector to avoid introducing a cycle.
430 if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) {
431 SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
432
433 SDValue TiedIn = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Lo);
434 SDValue Ops[] = {
435 LdHi->getChain(), LdHi->getBasePtr(), TiedIn
436 };
437
438 unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
439 if (LdHi->getMemoryVT() == MVT::i8) {
440 LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?
441 AMDGPUISD::LOAD_D16_HI_I8 : AMDGPUISD::LOAD_D16_HI_U8;
442 } else {
443 assert(LdHi->getMemoryVT() == MVT::i16)((LdHi->getMemoryVT() == MVT::i16) ? static_cast<void>
(0) : __assert_fail ("LdHi->getMemoryVT() == MVT::i16", "/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 443, __PRETTY_FUNCTION__))
;
444 }
445
446 SDValue NewLoadHi =
447 CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
448 Ops, LdHi->getMemoryVT(),
449 LdHi->getMemOperand());
450
451 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
452 CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));
453 return true;
454 }
455
456 // build_vector (load ptr), hi -> load_d16_lo ptr, hi
457 // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi
458 // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi
459 LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo));
460 if (LdLo && Lo.hasOneUse()) {
461 SDValue TiedIn = getHi16Elt(Hi);
462 if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode()))
463 return false;
464
465 SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
466 unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
467 if (LdLo->getMemoryVT() == MVT::i8) {
468 LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?
469 AMDGPUISD::LOAD_D16_LO_I8 : AMDGPUISD::LOAD_D16_LO_U8;
470 } else {
471 assert(LdLo->getMemoryVT() == MVT::i16)((LdLo->getMemoryVT() == MVT::i16) ? static_cast<void>
(0) : __assert_fail ("LdLo->getMemoryVT() == MVT::i16", "/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 471, __PRETTY_FUNCTION__))
;
472 }
473
474 TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
475
476 SDValue Ops[] = {
477 LdLo->getChain(), LdLo->getBasePtr(), TiedIn
478 };
479
480 SDValue NewLoadLo =
481 CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
482 Ops, LdLo->getMemoryVT(),
483 LdLo->getMemOperand());
484
485 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
486 CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));
487 return true;
488 }
489
490 return false;
491}
492
493void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
494 if (!Subtarget->d16PreservesUnusedBits())
495 return;
496
497 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
498
499 bool MadeChange = false;
500 while (Position != CurDAG->allnodes_begin()) {
501 SDNode *N = &*--Position;
502 if (N->use_empty())
503 continue;
504
505 switch (N->getOpcode()) {
506 case ISD::BUILD_VECTOR:
507 MadeChange |= matchLoadD16FromBuildVector(N);
508 break;
509 default:
510 break;
511 }
512 }
513
514 if (MadeChange) {
515 CurDAG->RemoveDeadNodes();
516 LLVM_DEBUG(dbgs() << "After PreProcess:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("isel")) { dbgs() << "After PreProcess:\n"; CurDAG->
dump();; } } while (false)
517 CurDAG->dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("isel")) { dbgs() << "After PreProcess:\n"; CurDAG->
dump();; } } while (false)
;
518 }
519}
520
521bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
522 if (TM.Options.NoNaNsFPMath)
523 return true;
524
525 // TODO: Move into isKnownNeverNaN
526 if (N->getFlags().hasNoNaNs())
527 return true;
528
529 return CurDAG->isKnownNeverNaN(N);
530}
531
532bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N,
533 bool Negated) const {
534 if (N->isUndef())
535 return true;
536
537 const SIInstrInfo *TII = Subtarget->getInstrInfo();
538 if (Negated) {
539 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
540 return TII->isInlineConstant(-C->getAPIntValue());
541
542 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
543 return TII->isInlineConstant(-C->getValueAPF().bitcastToAPInt());
544
545 } else {
546 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
547 return TII->isInlineConstant(C->getAPIntValue());
548
549 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
550 return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
551 }
552
553 return false;
554}
555
556/// Determine the register class for \p OpNo
557/// \returns The register class of the virtual register that will be used for
558/// the given operand number \OpNo or NULL if the register class cannot be
559/// determined.
560const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
561 unsigned OpNo) const {
562 if (!N->isMachineOpcode()) {
563 if (N->getOpcode() == ISD::CopyToReg) {
564 Register Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
565 if (Reg.isVirtual()) {
566 MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
567 return MRI.getRegClass(Reg);
568 }
569
570 const SIRegisterInfo *TRI
571 = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
572 return TRI->getPhysRegClass(Reg);
573 }
574
575 return nullptr;
576 }
577
578 switch (N->getMachineOpcode()) {
579 default: {
580 const MCInstrDesc &Desc =
581 Subtarget->getInstrInfo()->get(N->getMachineOpcode());
582 unsigned OpIdx = Desc.getNumDefs() + OpNo;
583 if (OpIdx >= Desc.getNumOperands())
584 return nullptr;
585 int RegClass = Desc.OpInfo[OpIdx].RegClass;
586 if (RegClass == -1)
587 return nullptr;
588
589 return Subtarget->getRegisterInfo()->getRegClass(RegClass);
590 }
591 case AMDGPU::REG_SEQUENCE: {
592 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
593 const TargetRegisterClass *SuperRC =
594 Subtarget->getRegisterInfo()->getRegClass(RCID);
595
596 SDValue SubRegOp = N->getOperand(OpNo + 1);
597 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
598 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
599 SubRegIdx);
600 }
601 }
602}
603
604SDNode *AMDGPUDAGToDAGISel::glueCopyToOp(SDNode *N, SDValue NewChain,
605 SDValue Glue) const {
606 SmallVector <SDValue, 8> Ops;
607 Ops.push_back(NewChain); // Replace the chain.
608 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
609 Ops.push_back(N->getOperand(i));
610
611 Ops.push_back(Glue);
612 return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
613}
614
615SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
616 const SITargetLowering& Lowering =
617 *static_cast<const SITargetLowering*>(getTargetLowering());
618
619 assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain")((N->getOperand(0).getValueType() == MVT::Other &&
"Expected chain") ? static_cast<void> (0) : __assert_fail
("N->getOperand(0).getValueType() == MVT::Other && \"Expected chain\""
, "/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 619, __PRETTY_FUNCTION__))
;
620
621 SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), Val);
622 return glueCopyToOp(N, M0, M0.getValue(1));
623}
624
625SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
626 unsigned AS = cast<MemSDNode>(N)->getAddressSpace();
627 if (AS == AMDGPUAS::LOCAL_ADDRESS) {
628 if (Subtarget->ldsRequiresM0Init())
629 return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
630 } else if (AS == AMDGPUAS::REGION_ADDRESS) {
631 MachineFunction &MF = CurDAG->getMachineFunction();
632 unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
633 return
634 glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32));
635 }
636 return N;
637}
638
639MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
640 EVT VT) const {
641 SDNode *Lo = CurDAG->getMachineNode(
642 AMDGPU::S_MOV_B32, DL, MVT::i32,
643 CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
644 SDNode *Hi =
645 CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
646 CurDAG->getTargetConstant(Imm >> 32, DL, MVT::i32));
647 const SDValue Ops[] = {
648 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
649 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
650 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
651
652 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
653}
654
655void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
656 EVT VT = N->getValueType(0);
657 unsigned NumVectorElts = VT.getVectorNumElements();
658 EVT EltVT = VT.getVectorElementType();
659 SDLoc DL(N);
660 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
661
662 if (NumVectorElts == 1) {
663 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
664 RegClass);
665 return;
666 }
667
668 assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "((NumVectorElts <= 32 && "Vectors with more than 32 elements not "
"supported yet") ? static_cast<void> (0) : __assert_fail
("NumVectorElts <= 32 && \"Vectors with more than 32 elements not \" \"supported yet\""
, "/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 669, __PRETTY_FUNCTION__))
669 "supported yet")((NumVectorElts <= 32 && "Vectors with more than 32 elements not "
"supported yet") ? static_cast<void> (0) : __assert_fail
("NumVectorElts <= 32 && \"Vectors with more than 32 elements not \" \"supported yet\""
, "/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 669, __PRETTY_FUNCTION__))
;
670 // 32 = Max Num Vector Elements
671 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
672 // 1 = Vector Register Class
673 SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
674
675 bool IsGCN = CurDAG->getSubtarget().getTargetTriple().getArch() ==
676 Triple::amdgcn;
677 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
678 bool IsRegSeq = true;
679 unsigned NOps = N->getNumOperands();
680 for (unsigned i = 0; i < NOps; i++) {
681 // XXX: Why is this here?
682 if (isa<RegisterSDNode>(N->getOperand(i))) {
683 IsRegSeq = false;
684 break;
685 }
686 unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
687 : R600RegisterInfo::getSubRegFromChannel(i);
688 RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
689 RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
690 }
691 if (NOps != NumVectorElts) {
692 // Fill in the missing undef elements if this was a scalar_to_vector.
693 assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts)((N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps <
NumVectorElts) ? static_cast<void> (0) : __assert_fail
("N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts"
, "/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 693, __PRETTY_FUNCTION__))
;
694 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
695 DL, EltVT);
696 for (unsigned i = NOps; i < NumVectorElts; ++i) {
697 unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
698 : R600RegisterInfo::getSubRegFromChannel(i);
699 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
700 RegSeqArgs[1 + (2 * i) + 1] =
701 CurDAG->getTargetConstant(Sub, DL, MVT::i32);
702 }
703 }
704
705 if (!IsRegSeq)
706 SelectCode(N);
707 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
708}
709
710void AMDGPUDAGToDAGISel::Select(SDNode *N) {
711 unsigned int Opc = N->getOpcode();
712 if (N->isMachineOpcode()) {
713 N->setNodeId(-1);
714 return; // Already selected.
715 }
716
717 // isa<MemSDNode> almost works but is slightly too permissive for some DS
718 // intrinsics.
719 if (Opc == ISD::LOAD || Opc == ISD::STORE || isa<AtomicSDNode>(N) ||
720 (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
721 Opc == ISD::ATOMIC_LOAD_FADD ||
722 Opc == AMDGPUISD::ATOMIC_LOAD_FMIN ||
723 Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) {
724 N = glueCopyToM0LDSInit(N);
725 SelectCode(N);
726 return;
727 }
728
729 switch (Opc) {
730 default:
731 break;
732 // We are selecting i64 ADD here instead of custom lower it during
733 // DAG legalization, so we can fold some i64 ADDs used for address
734 // calculation into the LOAD and STORE instructions.
735 case ISD::ADDC:
736 case ISD::ADDE:
737 case ISD::SUBC:
738 case ISD::SUBE: {
739 if (N->getValueType(0) != MVT::i64)
740 break;
741
742 SelectADD_SUB_I64(N);
743 return;
744 }
745 case ISD::ADDCARRY:
746 case ISD::SUBCARRY:
747 if (N->getValueType(0) != MVT::i32)
748 break;
749
750 SelectAddcSubb(N);
751 return;
752 case ISD::UADDO:
753 case ISD::USUBO: {
754 SelectUADDO_USUBO(N);
755 return;
756 }
757 case AMDGPUISD::FMUL_W_CHAIN: {
758 SelectFMUL_W_CHAIN(N);
759 return;
760 }
761 case AMDGPUISD::FMA_W_CHAIN: {
762 SelectFMA_W_CHAIN(N);
763 return;
764 }
765
766 case ISD::SCALAR_TO_VECTOR:
767 case ISD::BUILD_VECTOR: {
768 EVT VT = N->getValueType(0);
769 unsigned NumVectorElts = VT.getVectorNumElements();
770 if (VT.getScalarSizeInBits() == 16) {
771 if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
772 if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) {
773 ReplaceNode(N, Packed);
774 return;
775 }
776 }
777
778 break;
779 }
780
781 assert(VT.getVectorElementType().bitsEq(MVT::i32))((VT.getVectorElementType().bitsEq(MVT::i32)) ? static_cast<
void> (0) : __assert_fail ("VT.getVectorElementType().bitsEq(MVT::i32)"
, "/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 781, __PRETTY_FUNCTION__))
;
782 unsigned RegClassID =
783 SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID();
784 SelectBuildVector(N, RegClassID);
785 return;
786 }
787 case ISD::BUILD_PAIR: {
788 SDValue RC, SubReg0, SubReg1;
789 SDLoc DL(N);
790 if (N->getValueType(0) == MVT::i128) {
791 RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32);
792 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
793 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
794 } else if (N->getValueType(0) == MVT::i64) {
795 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
796 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
797 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
798 } else {
799 llvm_unreachable("Unhandled value type for BUILD_PAIR")::llvm::llvm_unreachable_internal("Unhandled value type for BUILD_PAIR"
, "/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 799)
;
800 }
801 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
802 N->getOperand(1), SubReg1 };
803 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
804 N->getValueType(0), Ops));
805 return;
806 }
807
808 case ISD::Constant:
809 case ISD::ConstantFP: {
810 if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
811 break;
812
813 uint64_t Imm;
814 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
815 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
816 else {
817 ConstantSDNode *C = cast<ConstantSDNode>(N);
818 Imm = C->getZExtValue();
819 }
820
821 SDLoc DL(N);
822 ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
823 return;
824 }
825 case AMDGPUISD::BFE_I32:
826 case AMDGPUISD::BFE_U32: {
827 // There is a scalar version available, but unlike the vector version which
828 // has a separate operand for the offset and width, the scalar version packs
829 // the width and offset into a single operand. Try to move to the scalar
830 // version if the offsets are constant, so that we can try to keep extended
831 // loads of kernel arguments in SGPRs.
832
833 // TODO: Technically we could try to pattern match scalar bitshifts of
834 // dynamic values, but it's probably not useful.
835 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
836 if (!Offset)
837 break;
838
839 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
840 if (!Width)
841 break;
842
843 bool Signed = Opc == AMDGPUISD::BFE_I32;
844
845 uint32_t OffsetVal = Offset->getZExtValue();
846 uint32_t WidthVal = Width->getZExtValue();
847
848 ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
849 SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
850 return;
851 }
852 case AMDGPUISD::DIV_SCALE: {
853 SelectDIV_SCALE(N);
854 return;
855 }
856 case AMDGPUISD::MAD_I64_I32:
857 case AMDGPUISD::MAD_U64_U32: {
858 SelectMAD_64_32(N);
859 return;
860 }
861 case ISD::CopyToReg: {
862 const SITargetLowering& Lowering =
863 *static_cast<const SITargetLowering*>(getTargetLowering());
864 N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
865 break;
866 }
867 case ISD::AND:
868 case ISD::SRL:
869 case ISD::SRA:
870 case ISD::SIGN_EXTEND_INREG:
871 if (N->getValueType(0) != MVT::i32)
872 break;
873
874 SelectS_BFE(N);
875 return;
876 case ISD::BRCOND:
877 SelectBRCOND(N);
878 return;
879 case ISD::FMAD:
880 case ISD::FMA:
881 SelectFMAD_FMA(N);
882 return;
883 case AMDGPUISD::ATOMIC_CMP_SWAP:
884 SelectATOMIC_CMP_SWAP(N);
885 return;
886 case AMDGPUISD::CVT_PKRTZ_F16_F32:
887 case AMDGPUISD::CVT_PKNORM_I16_F32:
888 case AMDGPUISD::CVT_PKNORM_U16_F32:
889 case AMDGPUISD::CVT_PK_U16_U32:
890 case AMDGPUISD::CVT_PK_I16_I32: {
891 // Hack around using a legal type if f16 is illegal.
892 if (N->getValueType(0) == MVT::i32) {
893 MVT NewVT = Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;
894 N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
895 { N->getOperand(0), N->getOperand(1) });
896 SelectCode(N);
897 return;
898 }
899
900 break;
901 }
902 case ISD::INTRINSIC_W_CHAIN: {
903 SelectINTRINSIC_W_CHAIN(N);
904 return;
905 }
906 case ISD::INTRINSIC_WO_CHAIN: {
907 SelectINTRINSIC_WO_CHAIN(N);
908 return;
909 }
910 case ISD::INTRINSIC_VOID: {
911 SelectINTRINSIC_VOID(N);
912 return;
913 }
914 }
915
916 SelectCode(N);
917}
918
919bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
920 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
921 const Instruction *Term = BB->getTerminator();
922 return Term->getMetadata("amdgpu.uniform") ||
923 Term->getMetadata("structurizecfg.uniform");
924}
925
926static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr,
927 SDValue &N0, SDValue &N1) {
928 if (Addr.getValueType() == MVT::i64 && Addr.getOpcode() == ISD::BITCAST &&
929 Addr.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
930 // As we split 64-bit `or` earlier, it's complicated pattern to match, i.e.
931 // (i64 (bitcast (v2i32 (build_vector
932 // (or (extract_vector_elt V, 0), OFFSET),
933 // (extract_vector_elt V, 1)))))
934 SDValue Lo = Addr.getOperand(0).getOperand(0);
935 if (Lo.getOpcode() == ISD::OR && DAG.isBaseWithConstantOffset(Lo)) {
936 SDValue BaseLo = Lo.getOperand(0);
937 SDValue BaseHi = Addr.getOperand(0).getOperand(1);
938 // Check that split base (Lo and Hi) are extracted from the same one.
939 if (BaseLo.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
940 BaseHi.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
941 BaseLo.getOperand(0) == BaseHi.getOperand(0) &&
942 // Lo is statically extracted from index 0.
943 isa<ConstantSDNode>(BaseLo.getOperand(1)) &&
944 BaseLo.getConstantOperandVal(1) == 0 &&
945 // Hi is statically extracted from index 0.
946 isa<ConstantSDNode>(BaseHi.getOperand(1)) &&
947 BaseHi.getConstantOperandVal(1) == 1) {
948 N0 = BaseLo.getOperand(0).getOperand(0);
949 N1 = Lo.getOperand(1);
950 return true;
951 }
952 }
953 }
954 return false;
955}
956
957bool AMDGPUDAGToDAGISel::isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
958 SDValue &RHS) const {
959 if (CurDAG->isBaseWithConstantOffset(Addr)) {
960 LHS = Addr.getOperand(0);
961 RHS = Addr.getOperand(1);
962 return true;
963 }
964
965 if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, LHS, RHS)) {
966 assert(LHS && RHS && isa<ConstantSDNode>(RHS))((LHS && RHS && isa<ConstantSDNode>(RHS
)) ? static_cast<void> (0) : __assert_fail ("LHS && RHS && isa<ConstantSDNode>(RHS)"
, "/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 966, __PRETTY_FUNCTION__))
;
967 return true;
968 }
969
970 return false;
971}
972
973StringRef AMDGPUDAGToDAGISel::getPassName() const {
974 return "AMDGPU DAG->DAG Pattern Instruction Selection";
975}
976
977//===----------------------------------------------------------------------===//
978// Complex Patterns
979//===----------------------------------------------------------------------===//
980
981bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
982 SDValue &Offset) {
983 return false;
984}
985
986bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
987 SDValue &Offset) {
988 ConstantSDNode *C;
989 SDLoc DL(Addr);
990
991 if ((C = dyn_cast<ConstantSDNode>(Addr))) {
992 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
993 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
994 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
995 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
996 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
997 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
998 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
999 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
1000 Base = Addr.getOperand(0);
1001 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
1002 } else {
1003 Base = Addr;
1004 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1005 }
1006
1007 return true;
1008}
1009
1010SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
1011 const SDLoc &DL) const {
1012 SDNode *Mov = CurDAG->getMachineNode(
1013 AMDGPU::S_MOV_B32, DL, MVT::i32,
1014 CurDAG->getTargetConstant(Val, DL, MVT::i32));
1015 return SDValue(Mov, 0);
1016}
1017
1018// FIXME: Should only handle addcarry/subcarry
1019void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
1020 SDLoc DL(N);
1021 SDValue LHS = N->getOperand(0);
1022 SDValue RHS = N->getOperand(1);
1023
1024 unsigned Opcode = N->getOpcode();
1025 bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
1026 bool ProduceCarry =
1027 ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
1028 bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
1029
1030 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
1031 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
1032
1033 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1034 DL, MVT::i32, LHS, Sub0);
1035 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1036 DL, MVT::i32, LHS, Sub1);
1037
1038 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1039 DL, MVT::i32, RHS, Sub0);
1040 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1041 DL, MVT::i32, RHS, Sub1);
1042
1043 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
1044
1045 static const unsigned OpcMap[2][2][2] = {
1046 {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
1047 {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
1048 {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
1049 {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
1050
1051 unsigned Opc = OpcMap[0][N->isDivergent()][IsAdd];
1052 unsigned CarryOpc = OpcMap[1][N->isDivergent()][IsAdd];
1053
1054 SDNode *AddLo;
1055 if (!ConsumeCarry) {
1056 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
1057 AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
1058 } else {
1059 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
1060 AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
1061 }
1062 SDValue AddHiArgs[] = {
1063 SDValue(Hi0, 0),
1064 SDValue(Hi1, 0),
1065 SDValue(AddLo, 1)
1066 };
1067 SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
1068
1069 SDValue RegSequenceArgs[] = {
1070 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
1071 SDValue(AddLo,0),
1072 Sub0,
1073 SDValue(AddHi,0),
1074 Sub1,
1075 };
1076 SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1077 MVT::i64, RegSequenceArgs);
1078
1079 if (ProduceCarry) {
1080 // Replace the carry-use
1081 ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
1082 }
1083
1084 // Replace the remaining uses.
1085 ReplaceNode(N, RegSequence);
1086}
1087
1088void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {
1089 SDLoc DL(N);
1090 SDValue LHS = N->getOperand(0);
1091 SDValue RHS = N->getOperand(1);
1092 SDValue CI = N->getOperand(2);
1093
1094 if (N->isDivergent()) {
1095 unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64
1096 : AMDGPU::V_SUBB_U32_e64;
1097 CurDAG->SelectNodeTo(
1098 N, Opc, N->getVTList(),
1099 {LHS, RHS, CI,
1100 CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
1101 } else {
1102 unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::S_ADD_CO_PSEUDO
1103 : AMDGPU::S_SUB_CO_PSEUDO;
1104 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), {LHS, RHS, CI});
1105 }
1106}
1107
1108void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
1109 // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
1110 // carry out despite the _i32 name. These were renamed in VI to _U32.
1111 // FIXME: We should probably rename the opcodes here.
1112 bool IsAdd = N->getOpcode() == ISD::UADDO;
1113 bool IsVALU = N->isDivergent();
1114
1115 for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E;
1116 ++UI)
1117 if (UI.getUse().getResNo() == 1) {
1118 if ((IsAdd && (UI->getOpcode() != ISD::ADDCARRY)) ||
1119 (!IsAdd && (UI->getOpcode() != ISD::SUBCARRY))) {
1120 IsVALU = true;
1121 break;
1122 }
1123 }
1124
1125 if (IsVALU) {
1126 unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
1127
1128 CurDAG->SelectNodeTo(
1129 N, Opc, N->getVTList(),
1130 {N->getOperand(0), N->getOperand(1),
1131 CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
1132 } else {
1133 unsigned Opc = N->getOpcode() == ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
1134 : AMDGPU::S_USUBO_PSEUDO;
1135
1136 CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
1137 {N->getOperand(0), N->getOperand(1)});
1138 }
1139}
1140
1141void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
1142 SDLoc SL(N);
1143 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
1144 SDValue Ops[10];
1145
1146 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
1147 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
1148 SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
1149 Ops[8] = N->getOperand(0);
1150 Ops[9] = N->getOperand(4);
1151
1152 CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32_e64, N->getVTList(), Ops);
1153}
1154
1155void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
1156 SDLoc SL(N);
1157 // src0_modifiers, src0, src1_modifiers, src1, clamp, omod
1158 SDValue Ops[8];
1159
1160 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
1161 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
1162 Ops[6] = N->getOperand(0);
1163 Ops[7] = N->getOperand(3);
1164
1165 CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
1166}
1167
1168// We need to handle this here because tablegen doesn't support matching
1169// instructions with multiple outputs.
1170void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
1171 SDLoc SL(N);
1172 EVT VT = N->getValueType(0);
1173
1174 assert(VT == MVT::f32 || VT == MVT::f64)((VT == MVT::f32 || VT == MVT::f64) ? static_cast<void>
(0) : __assert_fail ("VT == MVT::f32 || VT == MVT::f64", "/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 1174, __PRETTY_FUNCTION__))
;
1175
1176 unsigned Opc
1177 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
1178
1179 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
1180 // omod
1181 SDValue Ops[8];
1182 SelectVOP3BMods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
1183 SelectVOP3BMods(N->getOperand(1), Ops[3], Ops[2]);
1184 SelectVOP3BMods(N->getOperand(2), Ops[5], Ops[4]);
1185 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1186}
1187
1188// We need to handle this here because tablegen doesn't support matching
1189// instructions with multiple outputs.
1190void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
1191 SDLoc SL(N);
1192 bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
1193 unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1194
1195 SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
1196 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
1197 Clamp };
1198 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1199}
1200
1201bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset) const {
1202 if (!isUInt<16>(Offset))
1203 return false;
1204
1205 if (!Base || Subtarget->hasUsableDSOffset() ||
1206 Subtarget->unsafeDSOffsetFoldingEnabled())
1207 return true;
1208
1209 // On Southern Islands instruction with a negative base value and an offset
1210 // don't seem to work.
1211 return CurDAG->SignBitIsZero(Base);
1212}
1213
1214bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
1215 SDValue &Offset) const {
1216 SDLoc DL(Addr);
1217 if (CurDAG->isBaseWithConstantOffset(Addr)) {
1218 SDValue N0 = Addr.getOperand(0);
1219 SDValue N1 = Addr.getOperand(1);
1220 ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1221 if (isDSOffsetLegal(N0, C1->getSExtValue())) {
1222 // (add n0, c0)
1223 Base = N0;
1224 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1225 return true;
1226 }
1227 } else if (Addr.getOpcode() == ISD::SUB) {
1228 // sub C, x -> add (sub 0, x), C
1229 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1230 int64_t ByteOffset = C->getSExtValue();
1231 if (isDSOffsetLegal(SDValue(), ByteOffset)) {
1232 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1233
1234 // XXX - This is kind of hacky. Create a dummy sub node so we can check
1235 // the known bits in isDSOffsetLegal. We need to emit the selected node
1236 // here, so this is thrown away.
1237 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
1238 Zero, Addr.getOperand(1));
1239
1240 if (isDSOffsetLegal(Sub, ByteOffset)) {
1241 SmallVector<SDValue, 3> Opnds;
1242 Opnds.push_back(Zero);
1243 Opnds.push_back(Addr.getOperand(1));
1244
1245 // FIXME: Select to VOP3 version for with-carry.
1246 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1247 if (Subtarget->hasAddNoCarry()) {
1248 SubOp = AMDGPU::V_SUB_U32_e64;
1249 Opnds.push_back(
1250 CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1251 }
1252
1253 MachineSDNode *MachineSub =
1254 CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1255
1256 Base = SDValue(MachineSub, 0);
1257 Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
1258 return true;
1259 }
1260 }
1261 }
1262 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1263 // If we have a constant address, prefer to put the constant into the
1264 // offset. This can save moves to load the constant address since multiple
1265 // operations can share the zero base address register, and enables merging
1266 // into read2 / write2 instructions.
1267
1268 SDLoc DL(Addr);
1269
1270 if (isDSOffsetLegal(SDValue(), CAddr->getZExtValue())) {
1271 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1272 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1273 DL, MVT::i32, Zero);
1274 Base = SDValue(MovZero, 0);
1275 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1276 return true;
1277 }
1278 }
1279
1280 // default case
1281 Base = Addr;
1282 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
1283 return true;
1284}
1285
1286bool AMDGPUDAGToDAGISel::isDSOffset2Legal(SDValue Base, unsigned Offset0,
1287 unsigned Offset1,
1288 unsigned Size) const {
1289 if (Offset0 % Size != 0 || Offset1 % Size != 0)
1290 return false;
1291 if (!isUInt<8>(Offset0 / Size) || !isUInt<8>(Offset1 / Size))
1292 return false;
1293
1294 if (!Base || Subtarget->hasUsableDSOffset() ||
1295 Subtarget->unsafeDSOffsetFoldingEnabled())
1296 return true;
1297
1298 // On Southern Islands instruction with a negative base value and an offset
1299 // don't seem to work.
1300 return CurDAG->SignBitIsZero(Base);
1301}
1302
1303// TODO: If offset is too big, put low 16-bit into offset.
1304bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
1305 SDValue &Offset0,
1306 SDValue &Offset1) const {
1307 return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 4);
1308}
1309
1310bool AMDGPUDAGToDAGISel::SelectDS128Bit8ByteAligned(SDValue Addr, SDValue &Base,
1311 SDValue &Offset0,
1312 SDValue &Offset1) const {
1313 return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 8);
1314}
1315
1316bool AMDGPUDAGToDAGISel::SelectDSReadWrite2(SDValue Addr, SDValue &Base,
1317 SDValue &Offset0, SDValue &Offset1,
1318 unsigned Size) const {
1319 SDLoc DL(Addr);
1320
1321 if (CurDAG->isBaseWithConstantOffset(Addr)) {
1322 SDValue N0 = Addr.getOperand(0);
1323 SDValue N1 = Addr.getOperand(1);
1324 ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1325 unsigned OffsetValue0 = C1->getZExtValue();
1326 unsigned OffsetValue1 = OffsetValue0 + Size;
1327
1328 // (add n0, c0)
1329 if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1, Size)) {
1330 Base = N0;
1331 Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1332 Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1333 return true;
1334 }
1335 } else if (Addr.getOpcode() == ISD::SUB) {
1336 // sub C, x -> add (sub 0, x), C
1337 if (const ConstantSDNode *C =
1338 dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1339 unsigned OffsetValue0 = C->getZExtValue();
1340 unsigned OffsetValue1 = OffsetValue0 + Size;
1341
1342 if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
1343 SDLoc DL(Addr);
1344 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1345
1346 // XXX - This is kind of hacky. Create a dummy sub node so we can check
1347 // the known bits in isDSOffsetLegal. We need to emit the selected node
1348 // here, so this is thrown away.
1349 SDValue Sub =
1350 CurDAG->getNode(ISD::SUB, DL, MVT::i32, Zero, Addr.getOperand(1));
1351
1352 if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1, Size)) {
1353 SmallVector<SDValue, 3> Opnds;
1354 Opnds.push_back(Zero);
1355 Opnds.push_back(Addr.getOperand(1));
1356 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1357 if (Subtarget->hasAddNoCarry()) {
1358 SubOp = AMDGPU::V_SUB_U32_e64;
1359 Opnds.push_back(
1360 CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1361 }
1362
1363 MachineSDNode *MachineSub = CurDAG->getMachineNode(
1364 SubOp, DL, MVT::getIntegerVT(Size * 8), Opnds);
1365
1366 Base = SDValue(MachineSub, 0);
1367 Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1368 Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1369 return true;
1370 }
1371 }
1372 }
1373 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1374 unsigned OffsetValue0 = CAddr->getZExtValue();
1375 unsigned OffsetValue1 = OffsetValue0 + Size;
1376
1377 if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
1378 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1379 MachineSDNode *MovZero =
1380 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, Zero);
1381 Base = SDValue(MovZero, 0);
1382 Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1383 Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1384 return true;
1385 }
1386 }
1387
1388 // default case
1389
1390 Base = Addr;
1391 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
1392 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
1393 return true;
1394}
1395
1396bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
1397 SDValue &VAddr, SDValue &SOffset,
1398 SDValue &Offset, SDValue &Offen,
1399 SDValue &Idxen, SDValue &Addr64,
1400 SDValue &GLC, SDValue &SLC,
1401 SDValue &TFE, SDValue &DLC,
1402 SDValue &SWZ, SDValue &SCCB) const {
1403 // Subtarget prefers to use flat instruction
1404 // FIXME: This should be a pattern predicate and not reach here
1405 if (Subtarget->useFlatForGlobal())
1406 return false;
1407
1408 SDLoc DL(Addr);
1409
1410 if (!GLC.getNode())
1411 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1412 if (!SLC.getNode())
1413 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1414 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
1415 DLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1416 SWZ = CurDAG->getTargetConstant(0, DL, MVT::i1);
1417 SCCB = CurDAG->getTargetConstant(0, DL, MVT::i1);
1418
1419 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1420 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1421 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1422 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1423
1424 ConstantSDNode *C1 = nullptr;
1425 SDValue N0 = Addr;
1426 if (CurDAG->isBaseWithConstantOffset(Addr)) {
1427 C1 = cast<ConstantSDNode>(Addr.getOperand(1));
1428 if (isUInt<32>(C1->getZExtValue()))
1429 N0 = Addr.getOperand(0);
1430 else
1431 C1 = nullptr;
1432 }
1433
1434 if (N0.getOpcode() == ISD::ADD) {
1435 // (add N2, N3) -> addr64, or
1436 // (add (add N2, N3), C1) -> addr64
1437 SDValue N2 = N0.getOperand(0);
1438 SDValue N3 = N0.getOperand(1);
1439 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1440
1441 if (N2->isDivergent()) {
1442 if (N3->isDivergent()) {
1443 // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
1444 // addr64, and construct the resource from a 0 address.
1445 Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1446 VAddr = N0;
1447 } else {
1448 // N2 is divergent, N3 is not.
1449 Ptr = N3;
1450 VAddr = N2;
1451 }
1452 } else {
1453 // N2 is not divergent.
1454 Ptr = N2;
1455 VAddr = N3;
1456 }
1457 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1458 } else if (N0->isDivergent()) {
1459 // N0 is divergent. Use it as the addr64, and construct the resource from a
1460 // 0 address.
1461 Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1462 VAddr = N0;
1463 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1464 } else {
1465 // N0 -> offset, or
1466 // (N0 + C1) -> offset
1467 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1468 Ptr = N0;
1469 }
1470
1471 if (!C1) {
1472 // No offset.
1473 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1474 return true;
1475 }
1476
1477 if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
1478 // Legal offset for instruction.
1479 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1480 return true;
1481 }
1482
1483 // Illegal offset, store it in soffset.
1484 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1485 SOffset =
1486 SDValue(CurDAG->getMachineNode(
1487 AMDGPU::S_MOV_B32, DL, MVT::i32,
1488 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1489 0);
1490 return true;
1491}
1492
1493bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1494 SDValue &VAddr, SDValue &SOffset,
1495 SDValue &Offset, SDValue &GLC,
1496 SDValue &SLC, SDValue &TFE,
1497 SDValue &DLC, SDValue &SWZ,
1498 SDValue &SCCB) const {
1499 SDValue Ptr, Offen, Idxen, Addr64;
1500
1501 // addr64 bit was removed for volcanic islands.
1502 // FIXME: This should be a pattern predicate and not reach here
1503 if (!Subtarget->hasAddr64())
1504 return false;
1505
1506 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1507 GLC, SLC, TFE, DLC, SWZ, SCCB))
1508 return false;
1509
1510 ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1511 if (C->getSExtValue()) {
1512 SDLoc DL(Addr);
1513
1514 const SITargetLowering& Lowering =
1515 *static_cast<const SITargetLowering*>(getTargetLowering());
1516
1517 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1518 return true;
1519 }
1520
1521 return false;
1522}
1523
1524bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1525 SDValue &VAddr, SDValue &SOffset,
1526 SDValue &Offset,
1527 SDValue &SLC) const {
1528 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
1529 SDValue GLC, TFE, DLC, SWZ, SCCB;
1530
1531 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC, SWZ, SCCB);
1532}
1533
1534static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
1535 auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
1536 return PSV && PSV->isStack();
1537}
1538
1539std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1540 SDLoc DL(N);
1541
1542 auto *FI = dyn_cast<FrameIndexSDNode>(N);
1543 SDValue TFI =
1544 FI ? CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) : N;
1545
1546 // We rebase the base address into an absolute stack address and hence
1547 // use constant 0 for soffset. This value must be retained until
1548 // frame elimination and eliminateFrameIndex will choose the appropriate
1549 // frame register if need be.
1550 return std::make_pair(TFI, CurDAG->getTargetConstant(0, DL, MVT::i32));
1551}
1552
1553bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
1554 SDValue Addr, SDValue &Rsrc,
1555 SDValue &VAddr, SDValue &SOffset,
1556 SDValue &ImmOffset) const {
1557
1558 SDLoc DL(Addr);
1559 MachineFunction &MF = CurDAG->getMachineFunction();
1560 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1561
1562 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1563
1564 if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1565 int64_t Imm = CAddr->getSExtValue();
1566 const int64_t NullPtr =
1567 AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS);
1568 // Don't fold null pointer.
1569 if (Imm != NullPtr) {
1570 SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1571 MachineSDNode *MovHighBits = CurDAG->getMachineNode(
1572 AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits);
1573 VAddr = SDValue(MovHighBits, 0);
1574
1575 // In a call sequence, stores to the argument stack area are relative to the
1576 // stack pointer.
1577 const MachinePointerInfo &PtrInfo
1578 = cast<MemSDNode>(Parent)->getPointerInfo();
1579 SOffset = isStackPtrRelative(PtrInfo)
1580 ? CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32)
1581 : CurDAG->getTargetConstant(0, DL, MVT::i32);
1582 ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
1583 return true;
1584 }
1585 }
1586
1587 if (CurDAG->isBaseWithConstantOffset(Addr)) {
1588 // (add n0, c1)
1589
1590 SDValue N0 = Addr.getOperand(0);
1591 SDValue N1 = Addr.getOperand(1);
1592
1593 // Offsets in vaddr must be positive if range checking is enabled.
1594 //
1595 // The total computation of vaddr + soffset + offset must not overflow. If
1596 // vaddr is negative, even if offset is 0 the sgpr offset add will end up
1597 // overflowing.
1598 //
1599 // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
1600 // always perform a range check. If a negative vaddr base index was used,
1601 // this would fail the range check. The overall address computation would
1602 // compute a valid address, but this doesn't happen due to the range
1603 // check. For out-of-bounds MUBUF loads, a 0 is returned.
1604 //
1605 // Therefore it should be safe to fold any VGPR offset on gfx9 into the
1606 // MUBUF vaddr, but not on older subtargets which can only do this if the
1607 // sign bit is known 0.
1608 ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1609 if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) &&
1610 (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1611 CurDAG->SignBitIsZero(N0))) {
1612 std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1613 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1614 return true;
1615 }
1616 }
1617
1618 // (node)
1619 std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1620 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1621 return true;
1622}
1623
1624bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
1625 SDValue Addr,
1626 SDValue &SRsrc,
1627 SDValue &SOffset,
1628 SDValue &Offset) const {
1629 ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
1630 if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1631 return false;
1632
1633 SDLoc DL(Addr);
1634 MachineFunction &MF = CurDAG->getMachineFunction();
1635 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1636
1637 SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1638
1639 const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1640
1641 // FIXME: Get from MachinePointerInfo? We should only be using the frame
1642 // offset if we know this is in a call sequence.
1643 SOffset = isStackPtrRelative(PtrInfo)
1644 ? CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32)
1645 : CurDAG->getTargetConstant(0, DL, MVT::i32);
1646
1647 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1648 return true;
1649}
1650
1651bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1652 SDValue &SOffset, SDValue &Offset,
1653 SDValue &GLC, SDValue &SLC,
1654 SDValue &TFE, SDValue &DLC,
1655 SDValue &SWZ, SDValue &SCCB) const {
1656 SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1657 const SIInstrInfo *TII =
1658 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1659
1660 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1661 GLC, SLC, TFE, DLC, SWZ, SCCB))
1662 return false;
1663
1664 if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1665 !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1666 !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1667 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1668 APInt::getAllOnesValue(32).getZExtValue(); // Size
1669 SDLoc DL(Addr);
1670
1671 const SITargetLowering& Lowering =
1672 *static_cast<const SITargetLowering*>(getTargetLowering());
1673
1674 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1675 return true;
1676 }
1677 return false;
1678}
1679
1680bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1681 SDValue &Soffset, SDValue &Offset
1682 ) const {
1683 SDValue GLC, SLC, TFE, DLC, SWZ, SCCB;
1684
1685 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ, SCCB);
1686}
1687bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1688 SDValue &Soffset, SDValue &Offset,
1689 SDValue &SLC) const {
1690 SDValue GLC, TFE, DLC, SWZ, SCCB;
1691
1692 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ, SCCB);
1693}
1694
1695// Find a load or store from corresponding pattern root.
1696// Roots may be build_vector, bitconvert or their combinations.
1697static MemSDNode* findMemSDNode(SDNode *N) {
1698 N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode();
1699 if (MemSDNode *MN = dyn_cast<MemSDNode>(N))
1700 return MN;
1701 assert(isa<BuildVectorSDNode>(N))((isa<BuildVectorSDNode>(N)) ? static_cast<void> (
0) : __assert_fail ("isa<BuildVectorSDNode>(N)", "/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 1701, __PRETTY_FUNCTION__))
;
1702 for (SDValue V : N->op_values())
1703 if (MemSDNode *MN =
1704 dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V)))
1705 return MN;
1706 llvm_unreachable("cannot find MemSDNode in the pattern!")::llvm::llvm_unreachable_internal("cannot find MemSDNode in the pattern!"
, "/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 1706)
;
1707}
1708
1709template <bool IsSigned>
1710bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N,
1711 SDValue Addr,
1712 SDValue &VAddr,
1713 SDValue &Offset) const {
1714 int64_t OffsetVal = 0;
1715
1716 unsigned AS = findMemSDNode(N)->getAddressSpace();
1717
1718 if (Subtarget->hasFlatInstOffsets() &&
1719 (!Subtarget->hasFlatSegmentOffsetBug() ||
1720 AS != AMDGPUAS::FLAT_ADDRESS)) {
1721 SDValue N0, N1;
1722 if (isBaseWithConstantOffset64(Addr, N0, N1)) {
1723 uint64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1724
1725 const SIInstrInfo *TII = Subtarget->getInstrInfo();
1726 if (TII->isLegalFLATOffset(COffsetVal, AS, IsSigned)) {
1727 Addr = N0;
1728 OffsetVal = COffsetVal;
1729 } else {
1730 // If the offset doesn't fit, put the low bits into the offset field and
1731 // add the rest.
1732 //
1733 // For a FLAT instruction the hardware decides whether to access
1734 // global/scratch/shared memory based on the high bits of vaddr,
1735 // ignoring the offset field, so we have to ensure that when we add
1736 // remainder to vaddr it still points into the same underlying object.
1737 // The easiest way to do that is to make sure that we split the offset
1738 // into two pieces that are both >= 0 or both <= 0.
1739
1740 SDLoc DL(N);
1741 uint64_t RemainderOffset;
1742
1743 std::tie(OffsetVal, RemainderOffset)
1744 = TII->splitFlatOffset(COffsetVal, AS, IsSigned);
1745
1746 SDValue AddOffsetLo =
1747 getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
1748 SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1749
1750 if (Addr.getValueType().getSizeInBits() == 32) {
1751 SmallVector<SDValue, 3> Opnds;
1752 Opnds.push_back(N0);
1753 Opnds.push_back(AddOffsetLo);
1754 unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1755 if (Subtarget->hasAddNoCarry()) {
1756 AddOp = AMDGPU::V_ADD_U32_e64;
1757 Opnds.push_back(Clamp);
1758 }
1759 Addr = SDValue(CurDAG->getMachineNode(AddOp, DL, MVT::i32, Opnds), 0);
1760 } else {
1761 // TODO: Should this try to use a scalar add pseudo if the base address
1762 // is uniform and saddr is usable?
1763 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
1764 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
1765
1766 SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1767 DL, MVT::i32, N0, Sub0);
1768 SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1769 DL, MVT::i32, N0, Sub1);
1770
1771 SDValue AddOffsetHi =
1772 getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);
1773
1774 SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1);
1775
1776 SDNode *Add =
1777 CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64, DL, VTs,
1778 {AddOffsetLo, SDValue(N0Lo, 0), Clamp});
1779
1780 SDNode *Addc = CurDAG->getMachineNode(
1781 AMDGPU::V_ADDC_U32_e64, DL, VTs,
1782 {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp});
1783
1784 SDValue RegSequenceArgs[] = {
1785 CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),
1786 SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1};
1787
1788 Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1789 MVT::i64, RegSequenceArgs),
1790 0);
1791 }
1792 }
1793 }
1794 }
1795
1796 VAddr = Addr;
1797 Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
1798 return true;
1799}
1800
1801// If this matches zero_extend i32:x, return x
1802static SDValue matchZExtFromI32(SDValue Op) {
1803 if (Op.getOpcode() != ISD::ZERO_EXTEND)
1804 return SDValue();
1805
1806 SDValue ExtSrc = Op.getOperand(0);
1807 return (ExtSrc.getValueType() == MVT::i32) ? ExtSrc : SDValue();
1808}
1809
1810// Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
1811bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
1812 SDValue Addr,
1813 SDValue &SAddr,
1814 SDValue &VOffset,
1815 SDValue &Offset) const {
1816 int64_t ImmOffset = 0;
1817
1818 // Match the immediate offset first, which canonically is moved as low as
1819 // possible.
1820
1821 SDValue LHS, RHS;
1822 if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
1823 int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1824 const SIInstrInfo *TII = Subtarget->getInstrInfo();
1825
1826 if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, true)) {
1827 Addr = LHS;
1828 ImmOffset = COffsetVal;
1829 } else if (!LHS->isDivergent() && COffsetVal > 0) {
1830 SDLoc SL(N);
1831 // saddr + large_offset -> saddr + (voffset = large_offset & ~MaxOffset) +
1832 // (large_offset & MaxOffset);
1833 int64_t SplitImmOffset, RemainderOffset;
1834 std::tie(SplitImmOffset, RemainderOffset)
1835 = TII->splitFlatOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, true);
1836
1837 if (isUInt<32>(RemainderOffset)) {
1838 SDNode *VMov = CurDAG->getMachineNode(
1839 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1840 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
1841 VOffset = SDValue(VMov, 0);
1842 SAddr = LHS;
1843 Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
1844 return true;
1845 }
1846 }
1847 }
1848
1849 // Match the variable offset.
1850 if (Addr.getOpcode() != ISD::ADD) {
1851 if (Addr->isDivergent() || Addr.getOpcode() == ISD::UNDEF ||
1852 isa<ConstantSDNode>(Addr))
1853 return false;
1854
1855 // It's cheaper to materialize a single 32-bit zero for vaddr than the two
1856 // moves required to copy a 64-bit SGPR to VGPR.
1857 SAddr = Addr;
1858 SDNode *VMov = CurDAG->getMachineNode(
1859 AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
1860 CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));
1861 VOffset = SDValue(VMov, 0);
1862 Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1863 return true;
1864 }
1865
1866 LHS = Addr.getOperand(0);
1867 RHS = Addr.getOperand(1);
1868
1869 if (!LHS->isDivergent()) {
1870 // add (i64 sgpr), (zero_extend (i32 vgpr))
1871 if (SDValue ZextRHS = matchZExtFromI32(RHS)) {
1872 SAddr = LHS;
1873 VOffset = ZextRHS;
1874 }
1875 }
1876
1877 if (!SAddr && !RHS->isDivergent()) {
1878 // add (zero_extend (i32 vgpr)), (i64 sgpr)
1879 if (SDValue ZextLHS = matchZExtFromI32(LHS)) {
1880 SAddr = RHS;
1881 VOffset = ZextLHS;
1882 }
1883 }
1884
1885 if (!SAddr)
1886 return false;
1887
1888 Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1889 return true;
1890}
1891
1892// Match (32-bit SGPR base) + sext(imm offset)
1893bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *N,
1894 SDValue Addr,
1895 SDValue &SAddr,
1896 SDValue &Offset) const {
1897 if (Addr->isDivergent())
1898 return false;
1899
1900 SAddr = Addr;
1901 int64_t COffsetVal = 0;
1902
1903 if (CurDAG->isBaseWithConstantOffset(Addr)) {
1904 COffsetVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
1905 SAddr = Addr.getOperand(0);
1906 }
1907
1908 if (auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
1909 SAddr = CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
1910 } else if (SAddr.getOpcode() == ISD::ADD &&
1911 isa<FrameIndexSDNode>(SAddr.getOperand(0))) {
1912 // Materialize this into a scalar move for scalar address to avoid
1913 // readfirstlane.
1914 auto FI = cast<FrameIndexSDNode>(SAddr.getOperand(0));
1915 SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1916 FI->getValueType(0));
1917 SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_U32, SDLoc(SAddr),
1918 MVT::i32, TFI, SAddr.getOperand(1)),
1919 0);
1920 }
1921
1922 const SIInstrInfo *TII = Subtarget->getInstrInfo();
1923
1924 if (!TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true)) {
1925 int64_t RemainderOffset = COffsetVal;
Value stored to 'RemainderOffset' during its initialization is never read
1926 int64_t ImmField = 0;
1927 const unsigned NumBits = AMDGPU::getNumFlatOffsetBits(*Subtarget, true);
1928 // Use signed division by a power of two to truncate towards 0.
1929 int64_t D = 1LL << (NumBits - 1);
1930 RemainderOffset = (COffsetVal / D) * D;
1931 ImmField = COffsetVal - RemainderOffset;
1932
1933 assert(TII->isLegalFLATOffset(ImmField, AMDGPUAS::PRIVATE_ADDRESS, true))((TII->isLegalFLATOffset(ImmField, AMDGPUAS::PRIVATE_ADDRESS
, true)) ? static_cast<void> (0) : __assert_fail ("TII->isLegalFLATOffset(ImmField, AMDGPUAS::PRIVATE_ADDRESS, true)"
, "/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 1933, __PRETTY_FUNCTION__))
;
1934 assert(RemainderOffset + ImmField == COffsetVal)((RemainderOffset + ImmField == COffsetVal) ? static_cast<
void> (0) : __assert_fail ("RemainderOffset + ImmField == COffsetVal"
, "/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 1934, __PRETTY_FUNCTION__))
;
1935
1936 COffsetVal = ImmField;
1937
1938 SDLoc DL(N);
1939 SDValue AddOffset =
1940 getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
1941 SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_U32, DL, MVT::i32,
1942 SAddr, AddOffset), 0);
1943 }
1944
1945 Offset = CurDAG->getTargetConstant(COffsetVal, SDLoc(), MVT::i16);
1946
1947 return true;
1948}
1949
1950bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1951 SDValue &Offset, bool &Imm) const {
1952 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1953 if (!C) {
1954 if (ByteOffsetNode.getValueType().isScalarInteger() &&
1955 ByteOffsetNode.getValueType().getSizeInBits() == 32) {
1956 Offset = ByteOffsetNode;
1957 Imm = false;
1958 return true;
1959 }
1960 if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) {
1961 if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) {
1962 Offset = ByteOffsetNode.getOperand(0);
1963 Imm = false;
1964 return true;
1965 }
1966 }
1967 return false;
1968 }
1969
1970 SDLoc SL(ByteOffsetNode);
1971 // GFX9 and GFX10 have signed byte immediate offsets.
1972 int64_t ByteOffset = C->getSExtValue();
1973 Optional<int64_t> EncodedOffset =
1974 AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, false);
1975 if (EncodedOffset) {
1976 Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
1977 Imm = true;
1978 return true;
1979 }
1980
1981 // SGPR and literal offsets are unsigned.
1982 if (ByteOffset < 0)
1983 return false;
1984
1985 EncodedOffset = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset);
1986 if (EncodedOffset) {
1987 Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
1988 return true;
1989 }
1990
1991 if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
1992 return false;
1993
1994 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1995 Offset = SDValue(
1996 CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
1997
1998 return true;
1999}
2000
2001SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
2002 if (Addr.getValueType() != MVT::i32)
2003 return Addr;
2004
2005 // Zero-extend a 32-bit address.
2006 SDLoc SL(Addr);
2007
2008 const MachineFunction &MF = CurDAG->getMachineFunction();
2009 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
2010 unsigned AddrHiVal = Info->get32BitAddressHighBits();
2011 SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
2012
2013 const SDValue Ops[] = {
2014 CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
2015 Addr,
2016 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
2017 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
2018 0),
2019 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
2020 };
2021
2022 return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
2023 Ops), 0);
2024}
2025
2026bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
2027 SDValue &Offset, bool &Imm) const {
2028 SDLoc SL(Addr);
2029
2030 // A 32-bit (address + offset) should not cause unsigned 32-bit integer
2031 // wraparound, because s_load instructions perform the addition in 64 bits.
2032 if ((Addr.getValueType() != MVT::i32 ||
2033 Addr->getFlags().hasNoUnsignedWrap())) {
2034 SDValue N0, N1;
2035 // Extract the base and offset if possible.
2036 if (CurDAG->isBaseWithConstantOffset(Addr) ||
2037 Addr.getOpcode() == ISD::ADD) {
2038 N0 = Addr.getOperand(0);
2039 N1 = Addr.getOperand(1);
2040 } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
2041 assert(N0 && N1 && isa<ConstantSDNode>(N1))((N0 && N1 && isa<ConstantSDNode>(N1)) ?
static_cast<void> (0) : __assert_fail ("N0 && N1 && isa<ConstantSDNode>(N1)"
, "/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 2041, __PRETTY_FUNCTION__))
;
2042 }
2043 if (N0 && N1) {
2044 if (SelectSMRDOffset(N1, Offset, Imm)) {
2045 SBase = Expand32BitAddress(N0);
2046 return true;
2047 }
2048 }
2049 }
2050 SBase = Expand32BitAddress(Addr);
2051 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
2052 Imm = true;
2053 return true;
2054}
2055
2056bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
2057 SDValue &Offset) const {
2058 bool Imm = false;
2059 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
2060}
2061
2062bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
2063 SDValue &Offset) const {
2064
2065 assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS)((Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS
) ? static_cast<void> (0) : __assert_fail ("Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
, "/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 2065, __PRETTY_FUNCTION__))
;
2066
2067 bool Imm = false;
2068 if (!SelectSMRD(Addr, SBase, Offset, Imm))
2069 return false;
2070
2071 return !Imm && isa<ConstantSDNode>(Offset);
2072}
2073
2074bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
2075 SDValue &Offset) const {
2076 bool Imm = false;
2077 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
2078 !isa<ConstantSDNode>(Offset);
2079}
2080
2081bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
2082 SDValue &Offset) const {
2083 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) {
2084 // The immediate offset for S_BUFFER instructions is unsigned.
2085 if (auto Imm =
2086 AMDGPU::getSMRDEncodedOffset(*Subtarget, C->getZExtValue(), true)) {
2087 Offset = CurDAG->getTargetConstant(*Imm, SDLoc(Addr), MVT::i32);
2088 return true;
2089 }
2090 }
2091
2092 return false;
2093}
2094
2095bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
2096 SDValue &Offset) const {
2097 assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS)((Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS
) ? static_cast<void> (0) : __assert_fail ("Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
, "/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 2097, __PRETTY_FUNCTION__))
;
2098
2099 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) {
2100 if (auto Imm = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget,
2101 C->getZExtValue())) {
2102 Offset = CurDAG->getTargetConstant(*Imm, SDLoc(Addr), MVT::i32);
2103 return true;
2104 }
2105 }
2106
2107 return false;
2108}
2109
2110bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
2111 SDValue &Base,
2112 SDValue &Offset) const {
2113 SDLoc DL(Index);
2114
2115 if (CurDAG->isBaseWithConstantOffset(Index)) {
2116 SDValue N0 = Index.getOperand(0);
2117 SDValue N1 = Index.getOperand(1);
2118 ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
2119
2120 // (add n0, c0)
2121 // Don't peel off the offset (c0) if doing so could possibly lead
2122 // the base (n0) to be negative.
2123 // (or n0, |c0|) can never change a sign given isBaseWithConstantOffset.
2124 if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0) ||
2125 (Index->getOpcode() == ISD::OR && C1->getSExtValue() >= 0)) {
2126 Base = N0;
2127 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
2128 return true;
2129 }
2130 }
2131
2132 if (isa<ConstantSDNode>(Index))
2133 return false;
2134
2135 Base = Index;
2136 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
2137 return true;
2138}
2139
2140SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
2141 SDValue Val, uint32_t Offset,
2142 uint32_t Width) {
2143 // Transformation function, pack the offset and width of a BFE into
2144 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
2145 // source, bits [5:0] contain the offset and bits [22:16] the width.
2146 uint32_t PackedVal = Offset | (Width << 16);
2147 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
2148
2149 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
2150}
2151
2152void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
2153 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
2154 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
2155 // Predicate: 0 < b <= c < 32
2156
2157 const SDValue &Shl = N->getOperand(0);
2158 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
2159 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2160
2161 if (B && C) {
2162 uint32_t BVal = B->getZExtValue();
2163 uint32_t CVal = C->getZExtValue();
2164
2165 if (0 < BVal && BVal <= CVal && CVal < 32) {
2166 bool Signed = N->getOpcode() == ISD::SRA;
2167 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2168
2169 ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
2170 32 - CVal));
2171 return;
2172 }
2173 }
2174 SelectCode(N);
2175}
2176
2177void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
2178 switch (N->getOpcode()) {
2179 case ISD::AND:
2180 if (N->getOperand(0).getOpcode() == ISD::SRL) {
2181 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
2182 // Predicate: isMask(mask)
2183 const SDValue &Srl = N->getOperand(0);
2184 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
2185 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
2186
2187 if (Shift && Mask) {
2188 uint32_t ShiftVal = Shift->getZExtValue();
2189 uint32_t MaskVal = Mask->getZExtValue();
2190
2191 if (isMask_32(MaskVal)) {
2192 uint32_t WidthVal = countPopulation(MaskVal);
2193
2194 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
2195 Srl.getOperand(0), ShiftVal, WidthVal));
2196 return;
2197 }
2198 }
2199 }
2200 break;
2201 case ISD::SRL:
2202 if (N->getOperand(0).getOpcode() == ISD::AND) {
2203 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
2204 // Predicate: isMask(mask >> b)
2205 const SDValue &And = N->getOperand(0);
2206 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
2207 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
2208
2209 if (Shift && Mask) {
2210 uint32_t ShiftVal = Shift->getZExtValue();
2211 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
2212
2213 if (isMask_32(MaskVal)) {
2214 uint32_t WidthVal = countPopulation(MaskVal);
2215
2216 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
2217 And.getOperand(0), ShiftVal, WidthVal));
2218 return;
2219 }
2220 }
2221 } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
2222 SelectS_BFEFromShifts(N);
2223 return;
2224 }
2225 break;
2226 case ISD::SRA:
2227 if (N->getOperand(0).getOpcode() == ISD::SHL) {
2228 SelectS_BFEFromShifts(N);
2229 return;
2230 }
2231 break;
2232
2233 case ISD::SIGN_EXTEND_INREG: {
2234 // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
2235 SDValue Src = N->getOperand(0);
2236 if (Src.getOpcode() != ISD::SRL)
2237 break;
2238
2239 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
2240 if (!Amt)
2241 break;
2242
2243 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2244 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
2245 Amt->getZExtValue(), Width));
2246 return;
2247 }
2248 }
2249
2250 SelectCode(N);
2251}
2252
2253bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
2254 assert(N->getOpcode() == ISD::BRCOND)((N->getOpcode() == ISD::BRCOND) ? static_cast<void>
(0) : __assert_fail ("N->getOpcode() == ISD::BRCOND", "/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 2254, __PRETTY_FUNCTION__))
;
2255 if (!N->hasOneUse())
2256 return false;
2257
2258 SDValue Cond = N->getOperand(1);
2259 if (Cond.getOpcode() == ISD::CopyToReg)
2260 Cond = Cond.getOperand(2);
2261
2262 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
2263 return false;
2264
2265 MVT VT = Cond.getOperand(0).getSimpleValueType();
2266 if (VT == MVT::i32)
2267 return true;
2268
2269 if (VT == MVT::i64) {
2270 auto ST = static_cast<const GCNSubtarget *>(Subtarget);
2271
2272 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
2273 return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
2274 }
2275
2276 return false;
2277}
2278
2279void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
2280 SDValue Cond = N->getOperand(1);
2281
2282 if (Cond.isUndef()) {
2283 CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
2284 N->getOperand(2), N->getOperand(0));
2285 return;
2286 }
2287
2288 const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
2289 const SIRegisterInfo *TRI = ST->getRegisterInfo();
2290
2291 bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
2292 unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
2293 Register CondReg = UseSCCBr ? AMDGPU::SCC : TRI->getVCC();
2294 SDLoc SL(N);
2295
2296 if (!UseSCCBr) {
2297 // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
2298 // analyzed what generates the vcc value, so we do not know whether vcc
2299 // bits for disabled lanes are 0. Thus we need to mask out bits for
2300 // disabled lanes.
2301 //
2302 // For the case that we select S_CBRANCH_SCC1 and it gets
2303 // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
2304 // SIInstrInfo::moveToVALU which inserts the S_AND).
2305 //
2306 // We could add an analysis of what generates the vcc value here and omit
2307 // the S_AND when is unnecessary. But it would be better to add a separate
2308 // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
2309 // catches both cases.
2310 Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? AMDGPU::S_AND_B32
2311 : AMDGPU::S_AND_B64,
2312 SL, MVT::i1,
2313 CurDAG->getRegister(ST->isWave32() ? AMDGPU::EXEC_LO
2314 : AMDGPU::EXEC,
2315 MVT::i1),
2316 Cond),
2317 0);
2318 }
2319
2320 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
2321 CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
2322 N->getOperand(2), // Basic Block
2323 VCC.getValue(0));
2324}
2325
2326void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
2327 MVT VT = N->getSimpleValueType(0);
2328 bool IsFMA = N->getOpcode() == ISD::FMA;
2329 if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
2330 !Subtarget->hasFmaMixInsts()) ||
2331 ((IsFMA && Subtarget->hasMadMixInsts()) ||
2332 (!IsFMA && Subtarget->hasFmaMixInsts()))) {
2333 SelectCode(N);
2334 return;
2335 }
2336
2337 SDValue Src0 = N->getOperand(0);
2338 SDValue Src1 = N->getOperand(1);
2339 SDValue Src2 = N->getOperand(2);
2340 unsigned Src0Mods, Src1Mods, Src2Mods;
2341
2342 // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
2343 // using the conversion from f16.
2344 bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
2345 bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
2346 bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
2347
2348 assert((IsFMA || !Mode.allFP32Denormals()) &&(((IsFMA || !Mode.allFP32Denormals()) && "fmad selected with denormals enabled"
) ? static_cast<void> (0) : __assert_fail ("(IsFMA || !Mode.allFP32Denormals()) && \"fmad selected with denormals enabled\""
, "/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 2349, __PRETTY_FUNCTION__))
2349 "fmad selected with denormals enabled")(((IsFMA || !Mode.allFP32Denormals()) && "fmad selected with denormals enabled"
) ? static_cast<void> (0) : __assert_fail ("(IsFMA || !Mode.allFP32Denormals()) && \"fmad selected with denormals enabled\""
, "/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 2349, __PRETTY_FUNCTION__))
;
2350 // TODO: We can select this with f32 denormals enabled if all the sources are
2351 // converted from f16 (in which case fmad isn't legal).
2352
2353 if (Sel0 || Sel1 || Sel2) {
2354 // For dummy operands.
2355 SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2356 SDValue Ops[] = {
2357 CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
2358 CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
2359 CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
2360 CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
2361 Zero, Zero
2362 };
2363
2364 CurDAG->SelectNodeTo(N,
2365 IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
2366 MVT::f32, Ops);
2367 } else {
2368 SelectCode(N);
2369 }
2370}
2371
2372// This is here because there isn't a way to use the generated sub0_sub1 as the
2373// subreg index to EXTRACT_SUBREG in tablegen.
2374void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
2375 MemSDNode *Mem = cast<MemSDNode>(N);
2376 unsigned AS = Mem->getAddressSpace();
2377 if (AS == AMDGPUAS::FLAT_ADDRESS) {
2378 SelectCode(N);
2379 return;
2380 }
2381
2382 MVT VT = N->getSimpleValueType(0);
2383 bool Is32 = (VT == MVT::i32);
2384 SDLoc SL(N);
2385
2386 MachineSDNode *CmpSwap = nullptr;
2387 if (Subtarget->hasAddr64()) {
2388 SDValue SRsrc, VAddr, SOffset, Offset, SLC;
2389
2390 if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
2391 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
2392 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
2393 SDValue CmpVal = Mem->getOperand(2);
2394 SDValue GLC = CurDAG->getTargetConstant(1, SL, MVT::i1);
2395
2396 // XXX - Do we care about glue operands?
2397
2398 SDValue Ops[] = {
2399 CmpVal, VAddr, SRsrc, SOffset, Offset, GLC, SLC, Mem->getChain()
2400 };
2401
2402 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
2403 }
2404 }
2405
2406 if (!CmpSwap) {
2407 SDValue SRsrc, SOffset, Offset, SLC;
2408 if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
2409 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
2410 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
2411
2412 SDValue CmpVal = Mem->getOperand(2);
2413 SDValue GLC = CurDAG->getTargetConstant(1, SL, MVT::i1);
2414 SDValue Ops[] = {
2415 CmpVal, SRsrc, SOffset, Offset, GLC, SLC, Mem->getChain()
2416 };
2417
2418 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
2419 }
2420 }
2421
2422 if (!CmpSwap) {
2423 SelectCode(N);
2424 return;
2425 }
2426
2427 MachineMemOperand *MMO = Mem->getMemOperand();
2428 CurDAG->setNodeMemRefs(CmpSwap, {MMO});
2429
2430 unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
2431 SDValue Extract
2432 = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
2433
2434 ReplaceUses(SDValue(N, 0), Extract);
2435 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
2436 CurDAG->RemoveDeadNode(N);
2437}
2438
2439void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
2440 // The address is assumed to be uniform, so if it ends up in a VGPR, it will
2441 // be copied to an SGPR with readfirstlane.
2442 unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2443 AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2444
2445 SDValue Chain = N->getOperand(0);
2446 SDValue Ptr = N->getOperand(2);
2447 MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2448 MachineMemOperand *MMO = M->getMemOperand();
2449 bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
2450
2451 SDValue Offset;
2452 if (CurDAG->isBaseWithConstantOffset(Ptr)) {
2453 SDValue PtrBase = Ptr.getOperand(0);
2454 SDValue PtrOffset = Ptr.getOperand(1);
2455
2456 const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
2457 if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue())) {
2458 N = glueCopyToM0(N, PtrBase);
2459 Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
2460 }
2461 }
2462
2463 if (!Offset) {
2464 N = glueCopyToM0(N, Ptr);
2465 Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2466 }
2467
2468 SDValue Ops[] = {
2469 Offset,
2470 CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
2471 Chain,
2472 N->getOperand(N->getNumOperands() - 1) // New glue
2473 };
2474
2475 SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2476 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2477}
2478
2479static unsigned gwsIntrinToOpcode(unsigned IntrID) {
2480 switch (IntrID) {
2481 case Intrinsic::amdgcn_ds_gws_init:
2482 return AMDGPU::DS_GWS_INIT;
2483 case Intrinsic::amdgcn_ds_gws_barrier:
2484 return AMDGPU::DS_GWS_BARRIER;
2485 case Intrinsic::amdgcn_ds_gws_sema_v:
2486 return AMDGPU::DS_GWS_SEMA_V;
2487 case Intrinsic::amdgcn_ds_gws_sema_br:
2488 return AMDGPU::DS_GWS_SEMA_BR;
2489 case Intrinsic::amdgcn_ds_gws_sema_p:
2490 return AMDGPU::DS_GWS_SEMA_P;
2491 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2492 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2493 default:
2494 llvm_unreachable("not a gws intrinsic")::llvm::llvm_unreachable_internal("not a gws intrinsic", "/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 2494)
;
2495 }
2496}
2497
2498void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
2499 if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2500 !Subtarget->hasGWSSemaReleaseAll()) {
2501 // Let this error.
2502 SelectCode(N);
2503 return;
2504 }
2505
2506 // Chain, intrinsic ID, vsrc, offset
2507 const bool HasVSrc = N->getNumOperands() == 4;
2508 assert(HasVSrc || N->getNumOperands() == 3)((HasVSrc || N->getNumOperands() == 3) ? static_cast<void
> (0) : __assert_fail ("HasVSrc || N->getNumOperands() == 3"
, "/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 2508, __PRETTY_FUNCTION__))
;
2509
2510 SDLoc SL(N);
2511 SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2);
2512 int ImmOffset = 0;
2513 MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2514 MachineMemOperand *MMO = M->getMemOperand();
2515
2516 // Don't worry if the offset ends up in a VGPR. Only one lane will have
2517 // effect, so SIFixSGPRCopies will validly insert readfirstlane.
2518
2519 // The resource id offset is computed as (<isa opaque base> + M0[21:16] +
2520 // offset field) % 64. Some versions of the programming guide omit the m0
2521 // part, or claim it's from offset 0.
2522 if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
2523 // If we have a constant offset, try to use the 0 in m0 as the base.
2524 // TODO: Look into changing the default m0 initialization value. If the
2525 // default -1 only set the low 16-bits, we could leave it as-is and add 1 to
2526 // the immediate offset.
2527 glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32));
2528 ImmOffset = ConstOffset->getZExtValue();
2529 } else {
2530 if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
2531 ImmOffset = BaseOffset.getConstantOperandVal(1);
2532 BaseOffset = BaseOffset.getOperand(0);
2533 }
2534
2535 // Prefer to do the shift in an SGPR since it should be possible to use m0
2536 // as the result directly. If it's already an SGPR, it will be eliminated
2537 // later.
2538 SDNode *SGPROffset
2539 = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
2540 BaseOffset);
2541 // Shift to offset in m0
2542 SDNode *M0Base
2543 = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
2544 SDValue(SGPROffset, 0),
2545 CurDAG->getTargetConstant(16, SL, MVT::i32));
2546 glueCopyToM0(N, SDValue(M0Base, 0));
2547 }
2548
2549 SDValue Chain = N->getOperand(0);
2550 SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
2551
2552 const unsigned Opc = gwsIntrinToOpcode(IntrID);
2553 SmallVector<SDValue, 5> Ops;
2554 if (HasVSrc)
2555 Ops.push_back(N->getOperand(2));
2556 Ops.push_back(OffsetField);
2557 Ops.push_back(Chain);
2558
2559 SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2560 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2561}
2562
2563void AMDGPUDAGToDAGISel::SelectInterpP1F16(SDNode *N) {
2564 if (Subtarget->getLDSBankCount() != 16) {
2565 // This is a single instruction with a pattern.
2566 SelectCode(N);
2567 return;
2568 }
2569
2570 SDLoc DL(N);
2571
2572 // This requires 2 instructions. It is possible to write a pattern to support
2573 // this, but the generated isel emitter doesn't correctly deal with multiple
2574 // output instructions using the same physical register input. The copy to m0
2575 // is incorrectly placed before the second instruction.
2576 //
2577 // TODO: Match source modifiers.
2578 //
2579 // def : Pat <
2580 // (int_amdgcn_interp_p1_f16
2581 // (VOP3Mods f32:$src0, i32:$src0_modifiers),
2582 // (i32 timm:$attrchan), (i32 timm:$attr),
2583 // (i1 timm:$high), M0),
2584 // (V_INTERP_P1LV_F16 $src0_modifiers, VGPR_32:$src0, timm:$attr,
2585 // timm:$attrchan, 0,
2586 // (V_INTERP_MOV_F32 2, timm:$attr, timm:$attrchan), timm:$high)> {
2587 // let Predicates = [has16BankLDS];
2588 // }
2589
2590 // 16 bank LDS
2591 SDValue ToM0 = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, AMDGPU::M0,
2592 N->getOperand(5), SDValue());
2593
2594 SDVTList VTs = CurDAG->getVTList(MVT::f32, MVT::Other);
2595
2596 SDNode *InterpMov =
2597 CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32, DL, VTs, {
2598 CurDAG->getTargetConstant(2, DL, MVT::i32), // P0
2599 N->getOperand(3), // Attr
2600 N->getOperand(2), // Attrchan
2601 ToM0.getValue(1) // In glue
2602 });
2603
2604 SDNode *InterpP1LV =
2605 CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16, DL, MVT::f32, {
2606 CurDAG->getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
2607 N->getOperand(1), // Src0
2608 N->getOperand(3), // Attr
2609 N->getOperand(2), // Attrchan
2610 CurDAG->getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
2611 SDValue(InterpMov, 0), // Src2 - holds two f16 values selected by high
2612 N->getOperand(4), // high
2613 CurDAG->getTargetConstant(0, DL, MVT::i1), // $clamp
2614 CurDAG->getTargetConstant(0, DL, MVT::i32), // $omod
2615 SDValue(InterpMov, 1)
2616 });
2617
2618 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), SDValue(InterpP1LV, 0));
2619}
2620
2621void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
2622 unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2623 switch (IntrID) {
2624 case Intrinsic::amdgcn_ds_append:
2625 case Intrinsic::amdgcn_ds_consume: {
2626 if (N->getValueType(0) != MVT::i32)
2627 break;
2628 SelectDSAppendConsume(N, IntrID);
2629 return;
2630 }
2631 }
2632
2633 SelectCode(N);
2634}
2635
2636void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
2637 unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
2638 unsigned Opcode;
2639 switch (IntrID) {
2640 case Intrinsic::amdgcn_wqm:
2641 Opcode = AMDGPU::WQM;
2642 break;
2643 case Intrinsic::amdgcn_softwqm:
2644 Opcode = AMDGPU::SOFT_WQM;
2645 break;
2646 case Intrinsic::amdgcn_wwm:
2647 Opcode = AMDGPU::WWM;
2648 break;
2649 case Intrinsic::amdgcn_interp_p1_f16:
2650 SelectInterpP1F16(N);
2651 return;
2652 default:
2653 SelectCode(N);
2654 return;
2655 }
2656
2657 SDValue Src = N->getOperand(1);
2658 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src});
2659}
2660
2661void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
2662 unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2663 switch (IntrID) {
2664 case Intrinsic::amdgcn_ds_gws_init:
2665 case Intrinsic::amdgcn_ds_gws_barrier:
2666 case Intrinsic::amdgcn_ds_gws_sema_v:
2667 case Intrinsic::amdgcn_ds_gws_sema_br:
2668 case Intrinsic::amdgcn_ds_gws_sema_p:
2669 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2670 SelectDS_GWS(N, IntrID);
2671 return;
2672 default:
2673 break;
2674 }
2675
2676 SelectCode(N);
2677}
2678
2679bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
2680 unsigned &Mods,
2681 bool AllowAbs) const {
2682 Mods = 0;
2683 Src = In;
2684
2685 if (Src.getOpcode() == ISD::FNEG) {
2686 Mods |= SISrcMods::NEG;
2687 Src = Src.getOperand(0);
2688 }
2689
2690 if (AllowAbs && Src.getOpcode() == ISD::FABS) {
2691 Mods |= SISrcMods::ABS;
2692 Src = Src.getOperand(0);
2693 }
2694
2695 return true;
2696}
2697
2698bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
2699 SDValue &SrcMods) const {
2700 unsigned Mods;
2701 if (SelectVOP3ModsImpl(In, Src, Mods)) {
2702 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2703 return true;
2704 }
2705
2706 return false;
2707}
2708
2709bool AMDGPUDAGToDAGISel::SelectVOP3BMods(SDValue In, SDValue &Src,
2710 SDValue &SrcMods) const {
2711 unsigned Mods;
2712 if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) {
2713 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2714 return true;
2715 }
2716
2717 return false;
2718}
2719
2720bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
2721 SDValue &SrcMods) const {
2722 SelectVOP3Mods(In, Src, SrcMods);
2723 return isNoNanSrc(Src);
2724}
2725
2726bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
2727 if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
2728 return false;
2729
2730 Src = In;
2731 return true;
2732}
2733
2734bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
2735 SDValue &SrcMods, SDValue &Clamp,
2736 SDValue &Omod) const {
2737 SDLoc DL(In);
2738 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2739 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2740
2741 return SelectVOP3Mods(In, Src, SrcMods);
2742}
2743
2744bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(SDValue In, SDValue &Src,
2745 SDValue &SrcMods, SDValue &Clamp,
2746 SDValue &Omod) const {
2747 SDLoc DL(In);
2748 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2749 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2750
2751 return SelectVOP3BMods(In, Src, SrcMods);
2752}
2753
2754bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
2755 SDValue &Clamp, SDValue &Omod) const {
2756 Src = In;
2757
2758 SDLoc DL(In);
2759 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2760 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2761
2762 return true;
2763}
2764
2765bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
2766 SDValue &SrcMods) const {
2767 unsigned Mods = 0;
2768 Src = In;
2769
2770 if (Src.getOpcode() == ISD::FNEG) {
2771 Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
2772 Src = Src.getOperand(0);
2773 }
2774
2775 if (Src.getOpcode() == ISD::BUILD_VECTOR) {
2776 unsigned VecMods = Mods;
2777
2778 SDValue Lo = stripBitcast(Src.getOperand(0));
2779 SDValue Hi = stripBitcast(Src.getOperand(1));
2780
2781 if (Lo.getOpcode() == ISD::FNEG) {
2782 Lo = stripBitcast(Lo.getOperand(0));
2783 Mods ^= SISrcMods::NEG;
2784 }
2785
2786 if (Hi.getOpcode() == ISD::FNEG) {
2787 Hi = stripBitcast(Hi.getOperand(0));
2788 Mods ^= SISrcMods::NEG_HI;
2789 }
2790
2791 if (isExtractHiElt(Lo, Lo))
2792 Mods |= SISrcMods::OP_SEL_0;
2793
2794 if (isExtractHiElt(Hi, Hi))
2795 Mods |= SISrcMods::OP_SEL_1;
2796
2797 unsigned VecSize = Src.getValueSizeInBits();
2798 Lo = stripExtractLoElt(Lo);
2799 Hi = stripExtractLoElt(Hi);
2800
2801 if (Lo.getValueSizeInBits() > VecSize) {
2802 Lo = CurDAG->getTargetExtractSubreg(
2803 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
2804 MVT::getIntegerVT(VecSize), Lo);
2805 }
2806
2807 if (Hi.getValueSizeInBits() > VecSize) {
2808 Hi = CurDAG->getTargetExtractSubreg(
2809 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
2810 MVT::getIntegerVT(VecSize), Hi);
2811 }
2812
2813 assert(Lo.getValueSizeInBits() <= VecSize &&((Lo.getValueSizeInBits() <= VecSize && Hi.getValueSizeInBits
() <= VecSize) ? static_cast<void> (0) : __assert_fail
("Lo.getValueSizeInBits() <= VecSize && Hi.getValueSizeInBits() <= VecSize"
, "/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 2814, __PRETTY_FUNCTION__))
2814 Hi.getValueSizeInBits() <= VecSize)((Lo.getValueSizeInBits() <= VecSize && Hi.getValueSizeInBits
() <= VecSize) ? static_cast<void> (0) : __assert_fail
("Lo.getValueSizeInBits() <= VecSize && Hi.getValueSizeInBits() <= VecSize"
, "/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 2814, __PRETTY_FUNCTION__))
;
2815
2816 if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
2817 // Really a scalar input. Just select from the low half of the register to
2818 // avoid packing.
2819
2820 if (VecSize == 32 || VecSize == Lo.getValueSizeInBits()) {
2821 Src = Lo;
2822 } else {
2823 assert(Lo.getValueSizeInBits() == 32 && VecSize == 64)((Lo.getValueSizeInBits() == 32 && VecSize == 64) ? static_cast
<void> (0) : __assert_fail ("Lo.getValueSizeInBits() == 32 && VecSize == 64"
, "/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 2823, __PRETTY_FUNCTION__))
;
2824
2825 SDLoc SL(In);
2826 SDValue Undef = SDValue(
2827 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL,
2828 Lo.getValueType()), 0);
2829 auto RC = Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
2830 : AMDGPU::SReg_64RegClassID;
2831 const SDValue Ops[] = {
2832 CurDAG->getTargetConstant(RC, SL, MVT::i32),
2833 Lo, CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
2834 Undef, CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) };
2835
2836 Src = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL,
2837 Src.getValueType(), Ops), 0);
2838 }
2839 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2840 return true;
2841 }
2842
2843 if (VecSize == 64 && Lo == Hi && isa<ConstantFPSDNode>(Lo)) {
2844 uint64_t Lit = cast<ConstantFPSDNode>(Lo)->getValueAPF()
2845 .bitcastToAPInt().getZExtValue();
2846 if (AMDGPU::isInlinableLiteral32(Lit, Subtarget->hasInv2PiInlineImm())) {
2847 Src = CurDAG->getTargetConstant(Lit, SDLoc(In), MVT::i64);;
2848 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2849 return true;
2850 }
2851 }
2852
2853 Mods = VecMods;
2854 }
2855
2856 // Packed instructions do not have abs modifiers.
2857 Mods |= SISrcMods::OP_SEL_1;
2858
2859 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2860 return true;
2861}
2862
2863bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
2864 SDValue &SrcMods) const {
2865 Src = In;
2866 // FIXME: Handle op_sel
2867 SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2868 return true;
2869}
2870
2871bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
2872 SDValue &SrcMods) const {
2873 // FIXME: Handle op_sel
2874 return SelectVOP3Mods(In, Src, SrcMods);
2875}
2876
2877// The return value is not whether the match is possible (which it always is),
2878// but whether or not it a conversion is really used.
2879bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
2880 unsigned &Mods) const {
2881 Mods = 0;
2882 SelectVOP3ModsImpl(In, Src, Mods);
2883
2884 if (Src.getOpcode() == ISD::FP_EXTEND) {
2885 Src = Src.getOperand(0);
2886 assert(Src.getValueType() == MVT::f16)((Src.getValueType() == MVT::f16) ? static_cast<void> (
0) : __assert_fail ("Src.getValueType() == MVT::f16", "/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 2886, __PRETTY_FUNCTION__))
;
2887 Src = stripBitcast(Src);
2888
2889 // Be careful about folding modifiers if we already have an abs. fneg is
2890 // applied last, so we don't want to apply an earlier fneg.
2891 if ((Mods & SISrcMods::ABS) == 0) {
2892 unsigned ModsTmp;
2893 SelectVOP3ModsImpl(Src, Src, ModsTmp);
2894
2895 if ((ModsTmp & SISrcMods::NEG) != 0)
2896 Mods ^= SISrcMods::NEG;
2897
2898 if ((ModsTmp & SISrcMods::ABS) != 0)
2899 Mods |= SISrcMods::ABS;
2900 }
2901
2902 // op_sel/op_sel_hi decide the source type and source.
2903 // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
2904 // If the sources's op_sel is set, it picks the high half of the source
2905 // register.
2906
2907 Mods |= SISrcMods::OP_SEL_1;
2908 if (isExtractHiElt(Src, Src)) {
2909 Mods |= SISrcMods::OP_SEL_0;
2910
2911 // TODO: Should we try to look for neg/abs here?
2912 }
2913
2914 return true;
2915 }
2916
2917 return false;
2918}
2919
2920bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
2921 SDValue &SrcMods) const {
2922 unsigned Mods = 0;
2923 SelectVOP3PMadMixModsImpl(In, Src, Mods);
2924 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2925 return true;
2926}
2927
2928SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
2929 if (In.isUndef())
2930 return CurDAG->getUNDEF(MVT::i32);
2931
2932 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
2933 SDLoc SL(In);
2934 return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
2935 }
2936
2937 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
2938 SDLoc SL(In);
2939 return CurDAG->getConstant(
2940 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
2941 }
2942
2943 SDValue Src;
2944 if (isExtractHiElt(In, Src))
2945 return Src;
2946
2947 return SDValue();
2948}
2949
2950bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
2951 assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn)((CurDAG->getTarget().getTargetTriple().getArch() == Triple
::amdgcn) ? static_cast<void> (0) : __assert_fail ("CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn"
, "/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 2951, __PRETTY_FUNCTION__))
;
2952
2953 const SIRegisterInfo *SIRI =
2954 static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
2955 const SIInstrInfo * SII =
2956 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
2957
2958 unsigned Limit = 0;
2959 bool AllUsesAcceptSReg = true;
2960 for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
2961 Limit < 10 && U != E; ++U, ++Limit) {
2962 const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
2963
2964 // If the register class is unknown, it could be an unknown
2965 // register class that needs to be an SGPR, e.g. an inline asm
2966 // constraint
2967 if (!RC || SIRI->isSGPRClass(RC))
2968 return false;
2969
2970 if (RC != &AMDGPU::VS_32RegClass) {
2971 AllUsesAcceptSReg = false;
2972 SDNode * User = *U;
2973 if (User->isMachineOpcode()) {
2974 unsigned Opc = User->getMachineOpcode();
2975 MCInstrDesc Desc = SII->get(Opc);
2976 if (Desc.isCommutable()) {
2977 unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
2978 unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
2979 if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
2980 unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
2981 const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
2982 if (CommutedRC == &AMDGPU::VS_32RegClass)
2983 AllUsesAcceptSReg = true;
2984 }
2985 }
2986 }
2987 // If "AllUsesAcceptSReg == false" so far we haven't suceeded
2988 // commuting current user. This means have at least one use
2989 // that strictly require VGPR. Thus, we will not attempt to commute
2990 // other user instructions.
2991 if (!AllUsesAcceptSReg)
2992 break;
2993 }
2994 }
2995 return !AllUsesAcceptSReg && (Limit < 10);
2996}
2997
2998bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
2999 auto Ld = cast<LoadSDNode>(N);
3000
3001 return Ld->getAlignment() >= 4 &&
3002 (
3003 (
3004 (
3005 Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
3006 Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
3007 )
3008 &&
3009 !N->isDivergent()
3010 )
3011 ||
3012 (
3013 Subtarget->getScalarizeGlobalBehavior() &&
3014 Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
3015 Ld->isSimple() &&
3016 !N->isDivergent() &&
3017 static_cast<const SITargetLowering *>(
3018 getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)
3019 )
3020 );
3021}
3022
3023void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
3024 const AMDGPUTargetLowering& Lowering =
3025 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
3026 bool IsModified = false;
3027 do {
3028 IsModified = false;
3029
3030 // Go over all selected nodes and try to fold them a bit more
3031 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
3032 while (Position != CurDAG->allnodes_end()) {
3033 SDNode *Node = &*Position++;
3034 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
3035 if (!MachineNode)
3036 continue;
3037
3038 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
3039 if (ResNode != Node) {
3040 if (ResNode)
3041 ReplaceUses(Node, ResNode);
3042 IsModified = true;
3043 }
3044 }
3045 CurDAG->RemoveDeadNodes();
3046 } while (IsModified);
3047}
3048
3049bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
3050 Subtarget = &MF.getSubtarget<R600Subtarget>();
3051 return SelectionDAGISel::runOnMachineFunction(MF);
3052}
3053
3054bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
3055 if (!N->readMem())
3056 return false;
3057 if (CbId == -1)
3058 return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
3059 N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
3060
3061 return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
3062}
3063
3064bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
3065 SDValue& IntPtr) {
3066 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
3067 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
3068 true);
3069 return true;
3070 }
3071 return false;
3072}
3073
3074bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
3075 SDValue& BaseReg, SDValue &Offset) {
3076 if (!isa<ConstantSDNode>(Addr)) {
3077 BaseReg = Addr;
3078 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
3079 return true;
3080 }
3081 return false;
3082}
3083
3084void R600DAGToDAGISel::Select(SDNode *N) {
3085 unsigned int Opc = N->getOpcode();
3086 if (N->isMachineOpcode()) {
3087 N->setNodeId(-1);
3088 return; // Already selected.
3089 }
3090
3091 switch (Opc) {
3092 default: break;
3093 case AMDGPUISD::BUILD_VERTICAL_VECTOR:
3094 case ISD::SCALAR_TO_VECTOR:
3095 case ISD::BUILD_VECTOR: {
3096 EVT VT = N->getValueType(0);
3097 unsigned NumVectorElts = VT.getVectorNumElements();
3098 unsigned RegClassID;
3099 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
3100 // that adds a 128 bits reg copy when going through TwoAddressInstructions
3101 // pass. We want to avoid 128 bits copies as much as possible because they
3102 // can't be bundled by our scheduler.
3103 switch(NumVectorElts) {
3104 case 2: RegClassID = R600::R600_Reg64RegClassID; break;
3105 case 4:
3106 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
3107 RegClassID = R600::R600_Reg128VerticalRegClassID;
3108 else
3109 RegClassID = R600::R600_Reg128RegClassID;
3110 break;
3111 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR")::llvm::llvm_unreachable_internal("Do not know how to lower this BUILD_VECTOR"
, "/build/llvm-toolchain-snapshot-13~++20210223111116+16ede0956cb1/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 3111)
;
3112 }
3113 SelectBuildVector(N, RegClassID);
3114 return;
3115 }
3116 }
3117
3118 SelectCode(N);
3119}
3120
3121bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
3122 SDValue &Offset) {
3123 ConstantSDNode *C;
3124 SDLoc DL(Addr);
3125
3126 if ((C = dyn_cast<ConstantSDNode>(Addr))) {
3127 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
3128 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
3129 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
3130 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
3131 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
3132 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
3133 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
3134 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
3135 Base = Addr.getOperand(0);
3136 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
3137 } else {
3138 Base = Addr;
3139 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
3140 }
3141
3142 return true;
3143}
3144
3145bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
3146 SDValue &Offset) {
3147 ConstantSDNode *IMMOffset;
3148
3149 if (Addr.getOpcode() == ISD::ADD
3150 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
3151 && isInt<16>(IMMOffset->getZExtValue())) {
3152
3153 Base = Addr.getOperand(0);
3154 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
3155 MVT::i32);
3156 return true;
3157 // If the pointer address is constant, we can move it to the offset field.
3158 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
3159 && isInt<16>(IMMOffset->getZExtValue())) {
3160 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
3161 SDLoc(CurDAG->getEntryNode()),
3162 R600::ZERO, MVT::i32);
3163 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
3164 MVT::i32);
3165 return true;
3166 }
3167
3168 // Default case, no offset
3169 Base = Addr;
3170 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
3171 return true;
3172}