Bug Summary

File:llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Warning:line 1929, column 13
Value stored to 'RemainderOffset' during its initialization is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AMDGPUISelDAGToDAG.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-12/lib/clang/12.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/build-llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/build-llvm/include -I /build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-12/lib/clang/12.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/build-llvm/lib/Target/AMDGPU -fdebug-prefix-map=/build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-11-24-172238-38865-1 -x c++ /build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
1//===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8//
9/// \file
10/// Defines an instruction selector for the AMDGPU target.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "AMDGPUArgumentUsageInfo.h"
16#include "AMDGPUISelLowering.h" // For AMDGPUISD
17#include "AMDGPUInstrInfo.h"
18#include "AMDGPUPerfHintAnalysis.h"
19#include "AMDGPUSubtarget.h"
20#include "AMDGPUTargetMachine.h"
21#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22#include "SIDefines.h"
23#include "SIISelLowering.h"
24#include "SIInstrInfo.h"
25#include "SIMachineFunctionInfo.h"
26#include "SIRegisterInfo.h"
27#include "llvm/ADT/APInt.h"
28#include "llvm/ADT/SmallVector.h"
29#include "llvm/ADT/StringRef.h"
30#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
31#include "llvm/Analysis/LoopInfo.h"
32#include "llvm/Analysis/ValueTracking.h"
33#include "llvm/CodeGen/FunctionLoweringInfo.h"
34#include "llvm/CodeGen/ISDOpcodes.h"
35#include "llvm/CodeGen/MachineFunction.h"
36#include "llvm/CodeGen/MachineRegisterInfo.h"
37#include "llvm/CodeGen/SelectionDAG.h"
38#include "llvm/CodeGen/SelectionDAGISel.h"
39#include "llvm/CodeGen/SelectionDAGNodes.h"
40#include "llvm/CodeGen/ValueTypes.h"
41#include "llvm/IR/BasicBlock.h"
42#include "llvm/InitializePasses.h"
43#ifdef EXPENSIVE_CHECKS
44#include "llvm/IR/Dominators.h"
45#endif
46#include "llvm/IR/Instruction.h"
47#include "llvm/MC/MCInstrDesc.h"
48#include "llvm/Support/Casting.h"
49#include "llvm/Support/CodeGen.h"
50#include "llvm/Support/ErrorHandling.h"
51#include "llvm/Support/MachineValueType.h"
52#include "llvm/Support/MathExtras.h"
53#include <cassert>
54#include <cstdint>
55#include <new>
56#include <vector>
57
58#define DEBUG_TYPE"isel" "isel"
59
60using namespace llvm;
61
62namespace llvm {
63
64class R600InstrInfo;
65
66} // end namespace llvm
67
68//===----------------------------------------------------------------------===//
69// Instruction Selector Implementation
70//===----------------------------------------------------------------------===//
71
72namespace {
73
74static bool isNullConstantOrUndef(SDValue V) {
75 if (V.isUndef())
76 return true;
77
78 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
79 return Const != nullptr && Const->isNullValue();
80}
81
82static bool getConstantValue(SDValue N, uint32_t &Out) {
83 // This is only used for packed vectors, where ussing 0 for undef should
84 // always be good.
85 if (N.isUndef()) {
86 Out = 0;
87 return true;
88 }
89
90 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
91 Out = C->getAPIntValue().getSExtValue();
92 return true;
93 }
94
95 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
96 Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
97 return true;
98 }
99
100 return false;
101}
102
103// TODO: Handle undef as zero
104static SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG,
105 bool Negate = false) {
106 assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2)((N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands
() == 2) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2"
, "/build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 106, __PRETTY_FUNCTION__))
;
107 uint32_t LHSVal, RHSVal;
108 if (getConstantValue(N->getOperand(0), LHSVal) &&
109 getConstantValue(N->getOperand(1), RHSVal)) {
110 SDLoc SL(N);
111 uint32_t K = Negate ?
112 (-LHSVal & 0xffff) | (-RHSVal << 16) :
113 (LHSVal & 0xffff) | (RHSVal << 16);
114 return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
115 DAG.getTargetConstant(K, SL, MVT::i32));
116 }
117
118 return nullptr;
119}
120
121static SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
122 return packConstantV2I16(N, DAG, true);
123}
124
125/// AMDGPU specific code to select AMDGPU machine instructions for
126/// SelectionDAG operations.
127class AMDGPUDAGToDAGISel : public SelectionDAGISel {
128 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
129 // make the right decision when generating code for different targets.
130 const GCNSubtarget *Subtarget;
131
132 // Default FP mode for the current function.
133 AMDGPU::SIModeRegisterDefaults Mode;
134
135 bool EnableLateStructurizeCFG;
136
137public:
138 explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
139 CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
140 : SelectionDAGISel(*TM, OptLevel) {
141 EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
142 }
143 ~AMDGPUDAGToDAGISel() override = default;
144
145 void getAnalysisUsage(AnalysisUsage &AU) const override {
146 AU.addRequired<AMDGPUArgumentUsageInfo>();
147 AU.addRequired<LegacyDivergenceAnalysis>();
148#ifdef EXPENSIVE_CHECKS
149 AU.addRequired<DominatorTreeWrapperPass>();
150 AU.addRequired<LoopInfoWrapperPass>();
151#endif
152 SelectionDAGISel::getAnalysisUsage(AU);
153 }
154
155 bool matchLoadD16FromBuildVector(SDNode *N) const;
156
157 bool runOnMachineFunction(MachineFunction &MF) override;
158 void PreprocessISelDAG() override;
159 void Select(SDNode *N) override;
160 StringRef getPassName() const override;
161 void PostprocessISelDAG() override;
162
163protected:
164 void SelectBuildVector(SDNode *N, unsigned RegClassID);
165
166private:
167 std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
168 bool isNoNanSrc(SDValue N) const;
169 bool isInlineImmediate(const SDNode *N, bool Negated = false) const;
170 bool isNegInlineImmediate(const SDNode *N) const {
171 return isInlineImmediate(N, true);
172 }
173
174 bool isInlineImmediate16(int64_t Imm) const {
175 return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm());
176 }
177
178 bool isInlineImmediate32(int64_t Imm) const {
179 return AMDGPU::isInlinableLiteral32(Imm, Subtarget->hasInv2PiInlineImm());
180 }
181
182 bool isInlineImmediate64(int64_t Imm) const {
183 return AMDGPU::isInlinableLiteral64(Imm, Subtarget->hasInv2PiInlineImm());
184 }
185
186 bool isInlineImmediate(const APFloat &Imm) const {
187 return Subtarget->getInstrInfo()->isInlineConstant(Imm);
188 }
189
190 bool isVGPRImm(const SDNode *N) const;
191 bool isUniformLoad(const SDNode *N) const;
192 bool isUniformBr(const SDNode *N) const;
193
194 bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
195 SDValue &RHS) const;
196
197 MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
198
199 SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const;
200 SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
201 SDNode *glueCopyToM0LDSInit(SDNode *N) const;
202
203 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
204 virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
205 virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
206 bool isDSOffsetLegal(SDValue Base, unsigned Offset) const;
207 bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1,
208 unsigned Size) const;
209 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
210 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
211 SDValue &Offset1) const;
212 bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
213 SDValue &Offset1) const;
214 bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0,
215 SDValue &Offset1, unsigned Size) const;
216 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
217 SDValue &SOffset, SDValue &Offset, SDValue &Offen,
218 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
219 SDValue &TFE, SDValue &DLC, SDValue &SWZ) const;
220 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
221 SDValue &SOffset, SDValue &Offset, SDValue &GLC,
222 SDValue &SLC, SDValue &TFE, SDValue &DLC,
223 SDValue &SWZ) const;
224 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
225 SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
226 SDValue &SLC) const;
227 bool SelectMUBUFScratchOffen(SDNode *Parent,
228 SDValue Addr, SDValue &RSrc, SDValue &VAddr,
229 SDValue &SOffset, SDValue &ImmOffset) const;
230 bool SelectMUBUFScratchOffset(SDNode *Parent,
231 SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
232 SDValue &Offset) const;
233
234 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
235 SDValue &Offset, SDValue &GLC, SDValue &SLC,
236 SDValue &TFE, SDValue &DLC, SDValue &SWZ) const;
237 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
238 SDValue &Offset, SDValue &SLC) const;
239 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
240 SDValue &Offset) const;
241
242 template <bool IsSigned>
243 bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
244 SDValue &Offset) const;
245 bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
246 SDValue &VOffset, SDValue &Offset) const;
247 bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
248 SDValue &Offset) const;
249
250 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
251 bool &Imm) const;
252 SDValue Expand32BitAddress(SDValue Addr) const;
253 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
254 bool &Imm) const;
255 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
256 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
257 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
258 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
259 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
260 bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
261
262 bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
263 bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,
264 bool AllowAbs = true) const;
265 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
266 bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
267 bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
268 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
269 SDValue &Clamp, SDValue &Omod) const;
270 bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
271 SDValue &Clamp, SDValue &Omod) const;
272 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
273 SDValue &Clamp, SDValue &Omod) const;
274
275 bool SelectVOP3OMods(SDValue In, SDValue &Src,
276 SDValue &Clamp, SDValue &Omod) const;
277
278 bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
279
280 bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
281
282 bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
283 bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
284 bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
285
286 SDValue getHi16Elt(SDValue In) const;
287
288 SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const;
289
290 void SelectADD_SUB_I64(SDNode *N);
291 void SelectAddcSubb(SDNode *N);
292 void SelectUADDO_USUBO(SDNode *N);
293 void SelectDIV_SCALE(SDNode *N);
294 void SelectMAD_64_32(SDNode *N);
295 void SelectFMA_W_CHAIN(SDNode *N);
296 void SelectFMUL_W_CHAIN(SDNode *N);
297
298 SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
299 uint32_t Offset, uint32_t Width);
300 void SelectS_BFEFromShifts(SDNode *N);
301 void SelectS_BFE(SDNode *N);
302 bool isCBranchSCC(const SDNode *N) const;
303 void SelectBRCOND(SDNode *N);
304 void SelectFMAD_FMA(SDNode *N);
305 void SelectATOMIC_CMP_SWAP(SDNode *N);
306 void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
307 void SelectDS_GWS(SDNode *N, unsigned IntrID);
308 void SelectInterpP1F16(SDNode *N);
309 void SelectINTRINSIC_W_CHAIN(SDNode *N);
310 void SelectINTRINSIC_WO_CHAIN(SDNode *N);
311 void SelectINTRINSIC_VOID(SDNode *N);
312
313protected:
314 // Include the pieces autogenerated from the target description.
315#include "AMDGPUGenDAGISel.inc"
316};
317
318class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
319 const R600Subtarget *Subtarget;
320
321 bool isConstantLoad(const MemSDNode *N, int cbID) const;
322 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
323 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
324 SDValue& Offset);
325public:
326 explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
327 AMDGPUDAGToDAGISel(TM, OptLevel) {}
328
329 void Select(SDNode *N) override;
330
331 bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
332 SDValue &Offset) override;
333 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
334 SDValue &Offset) override;
335
336 bool runOnMachineFunction(MachineFunction &MF) override;
337
338 void PreprocessISelDAG() override {}
339
340protected:
341 // Include the pieces autogenerated from the target description.
342#include "R600GenDAGISel.inc"
343};
344
345static SDValue stripBitcast(SDValue Val) {
346 return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
347}
348
349// Figure out if this is really an extract of the high 16-bits of a dword.
350static bool isExtractHiElt(SDValue In, SDValue &Out) {
351 In = stripBitcast(In);
352 if (In.getOpcode() != ISD::TRUNCATE)
353 return false;
354
355 SDValue Srl = In.getOperand(0);
356 if (Srl.getOpcode() == ISD::SRL) {
357 if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
358 if (ShiftAmt->getZExtValue() == 16) {
359 Out = stripBitcast(Srl.getOperand(0));
360 return true;
361 }
362 }
363 }
364
365 return false;
366}
367
368// Look through operations that obscure just looking at the low 16-bits of the
369// same register.
370static SDValue stripExtractLoElt(SDValue In) {
371 if (In.getOpcode() == ISD::TRUNCATE) {
372 SDValue Src = In.getOperand(0);
373 if (Src.getValueType().getSizeInBits() == 32)
374 return stripBitcast(Src);
375 }
376
377 return In;
378}
379
380} // end anonymous namespace
381
382INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",static void *initializeAMDGPUDAGToDAGISelPassOnce(PassRegistry
&Registry) {
383 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)static void *initializeAMDGPUDAGToDAGISelPassOnce(PassRegistry
&Registry) {
384INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)initializeAMDGPUArgumentUsageInfoPass(Registry);
385INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)initializeAMDGPUPerfHintAnalysisPass(Registry);
386INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)initializeLegacyDivergenceAnalysisPass(Registry);
387#ifdef EXPENSIVE_CHECKS
388INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
389INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry);
390#endif
391INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",PassInfo *PI = new PassInfo( "AMDGPU DAG->DAG Pattern Instruction Selection"
, "amdgpu-isel", &AMDGPUDAGToDAGISel::ID, PassInfo::NormalCtor_t
(callDefaultCtor<AMDGPUDAGToDAGISel>), false, false); Registry
.registerPass(*PI, true); return PI; } static llvm::once_flag
InitializeAMDGPUDAGToDAGISelPassFlag; void llvm::initializeAMDGPUDAGToDAGISelPass
(PassRegistry &Registry) { llvm::call_once(InitializeAMDGPUDAGToDAGISelPassFlag
, initializeAMDGPUDAGToDAGISelPassOnce, std::ref(Registry)); }
392 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)PassInfo *PI = new PassInfo( "AMDGPU DAG->DAG Pattern Instruction Selection"
, "amdgpu-isel", &AMDGPUDAGToDAGISel::ID, PassInfo::NormalCtor_t
(callDefaultCtor<AMDGPUDAGToDAGISel>), false, false); Registry
.registerPass(*PI, true); return PI; } static llvm::once_flag
InitializeAMDGPUDAGToDAGISelPassFlag; void llvm::initializeAMDGPUDAGToDAGISelPass
(PassRegistry &Registry) { llvm::call_once(InitializeAMDGPUDAGToDAGISelPassFlag
, initializeAMDGPUDAGToDAGISelPassOnce, std::ref(Registry)); }
393
394/// This pass converts a legalized DAG into a AMDGPU-specific
395// DAG, ready for instruction scheduling.
396FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM,
397 CodeGenOpt::Level OptLevel) {
398 return new AMDGPUDAGToDAGISel(TM, OptLevel);
399}
400
401/// This pass converts a legalized DAG into a R600-specific
402// DAG, ready for instruction scheduling.
403FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
404 CodeGenOpt::Level OptLevel) {
405 return new R600DAGToDAGISel(TM, OptLevel);
406}
407
408bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
409#ifdef EXPENSIVE_CHECKS
410 DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
411 LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
412 for (auto &L : LI->getLoopsInPreorder()) {
413 assert(L->isLCSSAForm(DT))((L->isLCSSAForm(DT)) ? static_cast<void> (0) : __assert_fail
("L->isLCSSAForm(DT)", "/build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 413, __PRETTY_FUNCTION__))
;
414 }
415#endif
416 Subtarget = &MF.getSubtarget<GCNSubtarget>();
417 Mode = AMDGPU::SIModeRegisterDefaults(MF.getFunction());
418 return SelectionDAGISel::runOnMachineFunction(MF);
419}
420
421bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const {
422 assert(Subtarget->d16PreservesUnusedBits())((Subtarget->d16PreservesUnusedBits()) ? static_cast<void
> (0) : __assert_fail ("Subtarget->d16PreservesUnusedBits()"
, "/build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 422, __PRETTY_FUNCTION__))
;
423 MVT VT = N->getValueType(0).getSimpleVT();
424 if (VT != MVT::v2i16 && VT != MVT::v2f16)
425 return false;
426
427 SDValue Lo = N->getOperand(0);
428 SDValue Hi = N->getOperand(1);
429
430 LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi));
431
432 // build_vector lo, (load ptr) -> load_d16_hi ptr, lo
433 // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo
434 // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo
435
436 // Need to check for possible indirect dependencies on the other half of the
437 // vector to avoid introducing a cycle.
438 if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) {
439 SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
440
441 SDValue TiedIn = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Lo);
442 SDValue Ops[] = {
443 LdHi->getChain(), LdHi->getBasePtr(), TiedIn
444 };
445
446 unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
447 if (LdHi->getMemoryVT() == MVT::i8) {
448 LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?
449 AMDGPUISD::LOAD_D16_HI_I8 : AMDGPUISD::LOAD_D16_HI_U8;
450 } else {
451 assert(LdHi->getMemoryVT() == MVT::i16)((LdHi->getMemoryVT() == MVT::i16) ? static_cast<void>
(0) : __assert_fail ("LdHi->getMemoryVT() == MVT::i16", "/build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 451, __PRETTY_FUNCTION__))
;
452 }
453
454 SDValue NewLoadHi =
455 CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
456 Ops, LdHi->getMemoryVT(),
457 LdHi->getMemOperand());
458
459 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
460 CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));
461 return true;
462 }
463
464 // build_vector (load ptr), hi -> load_d16_lo ptr, hi
465 // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi
466 // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi
467 LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo));
468 if (LdLo && Lo.hasOneUse()) {
469 SDValue TiedIn = getHi16Elt(Hi);
470 if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode()))
471 return false;
472
473 SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
474 unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
475 if (LdLo->getMemoryVT() == MVT::i8) {
476 LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?
477 AMDGPUISD::LOAD_D16_LO_I8 : AMDGPUISD::LOAD_D16_LO_U8;
478 } else {
479 assert(LdLo->getMemoryVT() == MVT::i16)((LdLo->getMemoryVT() == MVT::i16) ? static_cast<void>
(0) : __assert_fail ("LdLo->getMemoryVT() == MVT::i16", "/build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 479, __PRETTY_FUNCTION__))
;
480 }
481
482 TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
483
484 SDValue Ops[] = {
485 LdLo->getChain(), LdLo->getBasePtr(), TiedIn
486 };
487
488 SDValue NewLoadLo =
489 CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
490 Ops, LdLo->getMemoryVT(),
491 LdLo->getMemOperand());
492
493 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
494 CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));
495 return true;
496 }
497
498 return false;
499}
500
501void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
502 if (!Subtarget->d16PreservesUnusedBits())
503 return;
504
505 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
506
507 bool MadeChange = false;
508 while (Position != CurDAG->allnodes_begin()) {
509 SDNode *N = &*--Position;
510 if (N->use_empty())
511 continue;
512
513 switch (N->getOpcode()) {
514 case ISD::BUILD_VECTOR:
515 MadeChange |= matchLoadD16FromBuildVector(N);
516 break;
517 default:
518 break;
519 }
520 }
521
522 if (MadeChange) {
523 CurDAG->RemoveDeadNodes();
524 LLVM_DEBUG(dbgs() << "After PreProcess:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("isel")) { dbgs() << "After PreProcess:\n"; CurDAG->
dump();; } } while (false)
525 CurDAG->dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("isel")) { dbgs() << "After PreProcess:\n"; CurDAG->
dump();; } } while (false)
;
526 }
527}
528
529bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
530 if (TM.Options.NoNaNsFPMath)
531 return true;
532
533 // TODO: Move into isKnownNeverNaN
534 if (N->getFlags().hasNoNaNs())
535 return true;
536
537 return CurDAG->isKnownNeverNaN(N);
538}
539
540bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N,
541 bool Negated) const {
542 if (N->isUndef())
543 return true;
544
545 const SIInstrInfo *TII = Subtarget->getInstrInfo();
546 if (Negated) {
547 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
548 return TII->isInlineConstant(-C->getAPIntValue());
549
550 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
551 return TII->isInlineConstant(-C->getValueAPF().bitcastToAPInt());
552
553 } else {
554 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
555 return TII->isInlineConstant(C->getAPIntValue());
556
557 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
558 return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
559 }
560
561 return false;
562}
563
564/// Determine the register class for \p OpNo
565/// \returns The register class of the virtual register that will be used for
566/// the given operand number \OpNo or NULL if the register class cannot be
567/// determined.
568const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
569 unsigned OpNo) const {
570 if (!N->isMachineOpcode()) {
571 if (N->getOpcode() == ISD::CopyToReg) {
572 Register Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
573 if (Reg.isVirtual()) {
574 MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
575 return MRI.getRegClass(Reg);
576 }
577
578 const SIRegisterInfo *TRI
579 = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
580 return TRI->getPhysRegClass(Reg);
581 }
582
583 return nullptr;
584 }
585
586 switch (N->getMachineOpcode()) {
587 default: {
588 const MCInstrDesc &Desc =
589 Subtarget->getInstrInfo()->get(N->getMachineOpcode());
590 unsigned OpIdx = Desc.getNumDefs() + OpNo;
591 if (OpIdx >= Desc.getNumOperands())
592 return nullptr;
593 int RegClass = Desc.OpInfo[OpIdx].RegClass;
594 if (RegClass == -1)
595 return nullptr;
596
597 return Subtarget->getRegisterInfo()->getRegClass(RegClass);
598 }
599 case AMDGPU::REG_SEQUENCE: {
600 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
601 const TargetRegisterClass *SuperRC =
602 Subtarget->getRegisterInfo()->getRegClass(RCID);
603
604 SDValue SubRegOp = N->getOperand(OpNo + 1);
605 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
606 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
607 SubRegIdx);
608 }
609 }
610}
611
612SDNode *AMDGPUDAGToDAGISel::glueCopyToOp(SDNode *N, SDValue NewChain,
613 SDValue Glue) const {
614 SmallVector <SDValue, 8> Ops;
615 Ops.push_back(NewChain); // Replace the chain.
616 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
617 Ops.push_back(N->getOperand(i));
618
619 Ops.push_back(Glue);
620 return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
621}
622
623SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
624 const SITargetLowering& Lowering =
625 *static_cast<const SITargetLowering*>(getTargetLowering());
626
627 assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain")((N->getOperand(0).getValueType() == MVT::Other &&
"Expected chain") ? static_cast<void> (0) : __assert_fail
("N->getOperand(0).getValueType() == MVT::Other && \"Expected chain\""
, "/build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 627, __PRETTY_FUNCTION__))
;
628
629 SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), Val);
630 return glueCopyToOp(N, M0, M0.getValue(1));
631}
632
633SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
634 unsigned AS = cast<MemSDNode>(N)->getAddressSpace();
635 if (AS == AMDGPUAS::LOCAL_ADDRESS) {
636 if (Subtarget->ldsRequiresM0Init())
637 return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
638 } else if (AS == AMDGPUAS::REGION_ADDRESS) {
639 MachineFunction &MF = CurDAG->getMachineFunction();
640 unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
641 return
642 glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32));
643 }
644 return N;
645}
646
647MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
648 EVT VT) const {
649 SDNode *Lo = CurDAG->getMachineNode(
650 AMDGPU::S_MOV_B32, DL, MVT::i32,
651 CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
652 SDNode *Hi =
653 CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
654 CurDAG->getTargetConstant(Imm >> 32, DL, MVT::i32));
655 const SDValue Ops[] = {
656 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
657 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
658 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
659
660 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
661}
662
663void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
664 EVT VT = N->getValueType(0);
665 unsigned NumVectorElts = VT.getVectorNumElements();
666 EVT EltVT = VT.getVectorElementType();
667 SDLoc DL(N);
668 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
669
670 if (NumVectorElts == 1) {
671 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
672 RegClass);
673 return;
674 }
675
676 assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "((NumVectorElts <= 32 && "Vectors with more than 32 elements not "
"supported yet") ? static_cast<void> (0) : __assert_fail
("NumVectorElts <= 32 && \"Vectors with more than 32 elements not \" \"supported yet\""
, "/build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 677, __PRETTY_FUNCTION__))
677 "supported yet")((NumVectorElts <= 32 && "Vectors with more than 32 elements not "
"supported yet") ? static_cast<void> (0) : __assert_fail
("NumVectorElts <= 32 && \"Vectors with more than 32 elements not \" \"supported yet\""
, "/build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 677, __PRETTY_FUNCTION__))
;
678 // 32 = Max Num Vector Elements
679 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
680 // 1 = Vector Register Class
681 SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
682
683 bool IsGCN = CurDAG->getSubtarget().getTargetTriple().getArch() ==
684 Triple::amdgcn;
685 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
686 bool IsRegSeq = true;
687 unsigned NOps = N->getNumOperands();
688 for (unsigned i = 0; i < NOps; i++) {
689 // XXX: Why is this here?
690 if (isa<RegisterSDNode>(N->getOperand(i))) {
691 IsRegSeq = false;
692 break;
693 }
694 unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
695 : R600RegisterInfo::getSubRegFromChannel(i);
696 RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
697 RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
698 }
699 if (NOps != NumVectorElts) {
700 // Fill in the missing undef elements if this was a scalar_to_vector.
701 assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts)((N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps <
NumVectorElts) ? static_cast<void> (0) : __assert_fail
("N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts"
, "/build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 701, __PRETTY_FUNCTION__))
;
702 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
703 DL, EltVT);
704 for (unsigned i = NOps; i < NumVectorElts; ++i) {
705 unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
706 : R600RegisterInfo::getSubRegFromChannel(i);
707 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
708 RegSeqArgs[1 + (2 * i) + 1] =
709 CurDAG->getTargetConstant(Sub, DL, MVT::i32);
710 }
711 }
712
713 if (!IsRegSeq)
714 SelectCode(N);
715 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
716}
717
718void AMDGPUDAGToDAGISel::Select(SDNode *N) {
719 unsigned int Opc = N->getOpcode();
720 if (N->isMachineOpcode()) {
721 N->setNodeId(-1);
722 return; // Already selected.
723 }
724
725 // isa<MemSDNode> almost works but is slightly too permissive for some DS
726 // intrinsics.
727 if (Opc == ISD::LOAD || Opc == ISD::STORE || isa<AtomicSDNode>(N) ||
728 (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
729 Opc == ISD::ATOMIC_LOAD_FADD ||
730 Opc == AMDGPUISD::ATOMIC_LOAD_FMIN ||
731 Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) {
732 N = glueCopyToM0LDSInit(N);
733 SelectCode(N);
734 return;
735 }
736
737 switch (Opc) {
738 default:
739 break;
740 // We are selecting i64 ADD here instead of custom lower it during
741 // DAG legalization, so we can fold some i64 ADDs used for address
742 // calculation into the LOAD and STORE instructions.
743 case ISD::ADDC:
744 case ISD::ADDE:
745 case ISD::SUBC:
746 case ISD::SUBE: {
747 if (N->getValueType(0) != MVT::i64)
748 break;
749
750 SelectADD_SUB_I64(N);
751 return;
752 }
753 case ISD::ADDCARRY:
754 case ISD::SUBCARRY:
755 if (N->getValueType(0) != MVT::i32)
756 break;
757
758 SelectAddcSubb(N);
759 return;
760 case ISD::UADDO:
761 case ISD::USUBO: {
762 SelectUADDO_USUBO(N);
763 return;
764 }
765 case AMDGPUISD::FMUL_W_CHAIN: {
766 SelectFMUL_W_CHAIN(N);
767 return;
768 }
769 case AMDGPUISD::FMA_W_CHAIN: {
770 SelectFMA_W_CHAIN(N);
771 return;
772 }
773
774 case ISD::SCALAR_TO_VECTOR:
775 case ISD::BUILD_VECTOR: {
776 EVT VT = N->getValueType(0);
777 unsigned NumVectorElts = VT.getVectorNumElements();
778 if (VT.getScalarSizeInBits() == 16) {
779 if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
780 if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) {
781 ReplaceNode(N, Packed);
782 return;
783 }
784 }
785
786 break;
787 }
788
789 assert(VT.getVectorElementType().bitsEq(MVT::i32))((VT.getVectorElementType().bitsEq(MVT::i32)) ? static_cast<
void> (0) : __assert_fail ("VT.getVectorElementType().bitsEq(MVT::i32)"
, "/build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 789, __PRETTY_FUNCTION__))
;
790 unsigned RegClassID =
791 SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID();
792 SelectBuildVector(N, RegClassID);
793 return;
794 }
795 case ISD::BUILD_PAIR: {
796 SDValue RC, SubReg0, SubReg1;
797 SDLoc DL(N);
798 if (N->getValueType(0) == MVT::i128) {
799 RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32);
800 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
801 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
802 } else if (N->getValueType(0) == MVT::i64) {
803 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
804 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
805 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
806 } else {
807 llvm_unreachable("Unhandled value type for BUILD_PAIR")::llvm::llvm_unreachable_internal("Unhandled value type for BUILD_PAIR"
, "/build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 807)
;
808 }
809 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
810 N->getOperand(1), SubReg1 };
811 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
812 N->getValueType(0), Ops));
813 return;
814 }
815
816 case ISD::Constant:
817 case ISD::ConstantFP: {
818 if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
819 break;
820
821 uint64_t Imm;
822 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
823 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
824 else {
825 ConstantSDNode *C = cast<ConstantSDNode>(N);
826 Imm = C->getZExtValue();
827 }
828
829 SDLoc DL(N);
830 ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
831 return;
832 }
833 case AMDGPUISD::BFE_I32:
834 case AMDGPUISD::BFE_U32: {
835 // There is a scalar version available, but unlike the vector version which
836 // has a separate operand for the offset and width, the scalar version packs
837 // the width and offset into a single operand. Try to move to the scalar
838 // version if the offsets are constant, so that we can try to keep extended
839 // loads of kernel arguments in SGPRs.
840
841 // TODO: Technically we could try to pattern match scalar bitshifts of
842 // dynamic values, but it's probably not useful.
843 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
844 if (!Offset)
845 break;
846
847 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
848 if (!Width)
849 break;
850
851 bool Signed = Opc == AMDGPUISD::BFE_I32;
852
853 uint32_t OffsetVal = Offset->getZExtValue();
854 uint32_t WidthVal = Width->getZExtValue();
855
856 ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
857 SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
858 return;
859 }
860 case AMDGPUISD::DIV_SCALE: {
861 SelectDIV_SCALE(N);
862 return;
863 }
864 case AMDGPUISD::MAD_I64_I32:
865 case AMDGPUISD::MAD_U64_U32: {
866 SelectMAD_64_32(N);
867 return;
868 }
869 case ISD::CopyToReg: {
870 const SITargetLowering& Lowering =
871 *static_cast<const SITargetLowering*>(getTargetLowering());
872 N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
873 break;
874 }
875 case ISD::AND:
876 case ISD::SRL:
877 case ISD::SRA:
878 case ISD::SIGN_EXTEND_INREG:
879 if (N->getValueType(0) != MVT::i32)
880 break;
881
882 SelectS_BFE(N);
883 return;
884 case ISD::BRCOND:
885 SelectBRCOND(N);
886 return;
887 case ISD::FMAD:
888 case ISD::FMA:
889 SelectFMAD_FMA(N);
890 return;
891 case AMDGPUISD::ATOMIC_CMP_SWAP:
892 SelectATOMIC_CMP_SWAP(N);
893 return;
894 case AMDGPUISD::CVT_PKRTZ_F16_F32:
895 case AMDGPUISD::CVT_PKNORM_I16_F32:
896 case AMDGPUISD::CVT_PKNORM_U16_F32:
897 case AMDGPUISD::CVT_PK_U16_U32:
898 case AMDGPUISD::CVT_PK_I16_I32: {
899 // Hack around using a legal type if f16 is illegal.
900 if (N->getValueType(0) == MVT::i32) {
901 MVT NewVT = Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;
902 N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
903 { N->getOperand(0), N->getOperand(1) });
904 SelectCode(N);
905 return;
906 }
907
908 break;
909 }
910 case ISD::INTRINSIC_W_CHAIN: {
911 SelectINTRINSIC_W_CHAIN(N);
912 return;
913 }
914 case ISD::INTRINSIC_WO_CHAIN: {
915 SelectINTRINSIC_WO_CHAIN(N);
916 return;
917 }
918 case ISD::INTRINSIC_VOID: {
919 SelectINTRINSIC_VOID(N);
920 return;
921 }
922 }
923
924 SelectCode(N);
925}
926
927bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
928 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
929 const Instruction *Term = BB->getTerminator();
930 return Term->getMetadata("amdgpu.uniform") ||
931 Term->getMetadata("structurizecfg.uniform");
932}
933
934static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr,
935 SDValue &N0, SDValue &N1) {
936 if (Addr.getValueType() == MVT::i64 && Addr.getOpcode() == ISD::BITCAST &&
937 Addr.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
938 // As we split 64-bit `or` earlier, it's complicated pattern to match, i.e.
939 // (i64 (bitcast (v2i32 (build_vector
940 // (or (extract_vector_elt V, 0), OFFSET),
941 // (extract_vector_elt V, 1)))))
942 SDValue Lo = Addr.getOperand(0).getOperand(0);
943 if (Lo.getOpcode() == ISD::OR && DAG.isBaseWithConstantOffset(Lo)) {
944 SDValue BaseLo = Lo.getOperand(0);
945 SDValue BaseHi = Addr.getOperand(0).getOperand(1);
946 // Check that split base (Lo and Hi) are extracted from the same one.
947 if (BaseLo.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
948 BaseHi.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
949 BaseLo.getOperand(0) == BaseHi.getOperand(0) &&
950 // Lo is statically extracted from index 0.
951 isa<ConstantSDNode>(BaseLo.getOperand(1)) &&
952 BaseLo.getConstantOperandVal(1) == 0 &&
953 // Hi is statically extracted from index 0.
954 isa<ConstantSDNode>(BaseHi.getOperand(1)) &&
955 BaseHi.getConstantOperandVal(1) == 1) {
956 N0 = BaseLo.getOperand(0).getOperand(0);
957 N1 = Lo.getOperand(1);
958 return true;
959 }
960 }
961 }
962 return false;
963}
964
965bool AMDGPUDAGToDAGISel::isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
966 SDValue &RHS) const {
967 if (CurDAG->isBaseWithConstantOffset(Addr)) {
968 LHS = Addr.getOperand(0);
969 RHS = Addr.getOperand(1);
970 return true;
971 }
972
973 if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, LHS, RHS)) {
974 assert(LHS && RHS && isa<ConstantSDNode>(RHS))((LHS && RHS && isa<ConstantSDNode>(RHS
)) ? static_cast<void> (0) : __assert_fail ("LHS && RHS && isa<ConstantSDNode>(RHS)"
, "/build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 974, __PRETTY_FUNCTION__))
;
975 return true;
976 }
977
978 return false;
979}
980
981StringRef AMDGPUDAGToDAGISel::getPassName() const {
982 return "AMDGPU DAG->DAG Pattern Instruction Selection";
983}
984
985//===----------------------------------------------------------------------===//
986// Complex Patterns
987//===----------------------------------------------------------------------===//
988
989bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
990 SDValue &Offset) {
991 return false;
992}
993
994bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
995 SDValue &Offset) {
996 ConstantSDNode *C;
997 SDLoc DL(Addr);
998
999 if ((C = dyn_cast<ConstantSDNode>(Addr))) {
1000 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
1001 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
1002 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
1003 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
1004 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
1005 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
1006 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
1007 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
1008 Base = Addr.getOperand(0);
1009 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
1010 } else {
1011 Base = Addr;
1012 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1013 }
1014
1015 return true;
1016}
1017
1018SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
1019 const SDLoc &DL) const {
1020 SDNode *Mov = CurDAG->getMachineNode(
1021 AMDGPU::S_MOV_B32, DL, MVT::i32,
1022 CurDAG->getTargetConstant(Val, DL, MVT::i32));
1023 return SDValue(Mov, 0);
1024}
1025
1026// FIXME: Should only handle addcarry/subcarry
1027void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
1028 SDLoc DL(N);
1029 SDValue LHS = N->getOperand(0);
1030 SDValue RHS = N->getOperand(1);
1031
1032 unsigned Opcode = N->getOpcode();
1033 bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
1034 bool ProduceCarry =
1035 ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
1036 bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
1037
1038 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
1039 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
1040
1041 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1042 DL, MVT::i32, LHS, Sub0);
1043 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1044 DL, MVT::i32, LHS, Sub1);
1045
1046 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1047 DL, MVT::i32, RHS, Sub0);
1048 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1049 DL, MVT::i32, RHS, Sub1);
1050
1051 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
1052
1053 static const unsigned OpcMap[2][2][2] = {
1054 {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
1055 {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
1056 {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
1057 {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
1058
1059 unsigned Opc = OpcMap[0][N->isDivergent()][IsAdd];
1060 unsigned CarryOpc = OpcMap[1][N->isDivergent()][IsAdd];
1061
1062 SDNode *AddLo;
1063 if (!ConsumeCarry) {
1064 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
1065 AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
1066 } else {
1067 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
1068 AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
1069 }
1070 SDValue AddHiArgs[] = {
1071 SDValue(Hi0, 0),
1072 SDValue(Hi1, 0),
1073 SDValue(AddLo, 1)
1074 };
1075 SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
1076
1077 SDValue RegSequenceArgs[] = {
1078 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
1079 SDValue(AddLo,0),
1080 Sub0,
1081 SDValue(AddHi,0),
1082 Sub1,
1083 };
1084 SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1085 MVT::i64, RegSequenceArgs);
1086
1087 if (ProduceCarry) {
1088 // Replace the carry-use
1089 ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
1090 }
1091
1092 // Replace the remaining uses.
1093 ReplaceNode(N, RegSequence);
1094}
1095
1096void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {
1097 SDLoc DL(N);
1098 SDValue LHS = N->getOperand(0);
1099 SDValue RHS = N->getOperand(1);
1100 SDValue CI = N->getOperand(2);
1101
1102 if (N->isDivergent()) {
1103 unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64
1104 : AMDGPU::V_SUBB_U32_e64;
1105 CurDAG->SelectNodeTo(
1106 N, Opc, N->getVTList(),
1107 {LHS, RHS, CI,
1108 CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
1109 } else {
1110 unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::S_ADD_CO_PSEUDO
1111 : AMDGPU::S_SUB_CO_PSEUDO;
1112 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), {LHS, RHS, CI});
1113 }
1114}
1115
1116void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
1117 // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
1118 // carry out despite the _i32 name. These were renamed in VI to _U32.
1119 // FIXME: We should probably rename the opcodes here.
1120 bool IsAdd = N->getOpcode() == ISD::UADDO;
1121 bool IsVALU = N->isDivergent();
1122
1123 for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E;
1124 ++UI)
1125 if (UI.getUse().getResNo() == 1) {
1126 if ((IsAdd && (UI->getOpcode() != ISD::ADDCARRY)) ||
1127 (!IsAdd && (UI->getOpcode() != ISD::SUBCARRY))) {
1128 IsVALU = true;
1129 break;
1130 }
1131 }
1132
1133 if (IsVALU) {
1134 unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
1135
1136 CurDAG->SelectNodeTo(
1137 N, Opc, N->getVTList(),
1138 {N->getOperand(0), N->getOperand(1),
1139 CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
1140 } else {
1141 unsigned Opc = N->getOpcode() == ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
1142 : AMDGPU::S_USUBO_PSEUDO;
1143
1144 CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
1145 {N->getOperand(0), N->getOperand(1)});
1146 }
1147}
1148
1149void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
1150 SDLoc SL(N);
1151 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
1152 SDValue Ops[10];
1153
1154 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
1155 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
1156 SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
1157 Ops[8] = N->getOperand(0);
1158 Ops[9] = N->getOperand(4);
1159
1160 CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
1161}
1162
1163void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
1164 SDLoc SL(N);
1165 // src0_modifiers, src0, src1_modifiers, src1, clamp, omod
1166 SDValue Ops[8];
1167
1168 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
1169 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
1170 Ops[6] = N->getOperand(0);
1171 Ops[7] = N->getOperand(3);
1172
1173 CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
1174}
1175
1176// We need to handle this here because tablegen doesn't support matching
1177// instructions with multiple outputs.
1178void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
1179 SDLoc SL(N);
1180 EVT VT = N->getValueType(0);
1181
1182 assert(VT == MVT::f32 || VT == MVT::f64)((VT == MVT::f32 || VT == MVT::f64) ? static_cast<void>
(0) : __assert_fail ("VT == MVT::f32 || VT == MVT::f64", "/build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 1182, __PRETTY_FUNCTION__))
;
1183
1184 unsigned Opc
1185 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
1186
1187 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
1188 // omod
1189 SDValue Ops[8];
1190 SelectVOP3BMods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
1191 SelectVOP3BMods(N->getOperand(1), Ops[3], Ops[2]);
1192 SelectVOP3BMods(N->getOperand(2), Ops[5], Ops[4]);
1193 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1194}
1195
1196// We need to handle this here because tablegen doesn't support matching
1197// instructions with multiple outputs.
1198void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
1199 SDLoc SL(N);
1200 bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
1201 unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32;
1202
1203 SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
1204 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
1205 Clamp };
1206 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1207}
1208
1209bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset) const {
1210 if (!isUInt<16>(Offset))
1211 return false;
1212
1213 if (!Base || Subtarget->hasUsableDSOffset() ||
1214 Subtarget->unsafeDSOffsetFoldingEnabled())
1215 return true;
1216
1217 // On Southern Islands instruction with a negative base value and an offset
1218 // don't seem to work.
1219 return CurDAG->SignBitIsZero(Base);
1220}
1221
1222bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
1223 SDValue &Offset) const {
1224 SDLoc DL(Addr);
1225 if (CurDAG->isBaseWithConstantOffset(Addr)) {
1226 SDValue N0 = Addr.getOperand(0);
1227 SDValue N1 = Addr.getOperand(1);
1228 ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1229 if (isDSOffsetLegal(N0, C1->getSExtValue())) {
1230 // (add n0, c0)
1231 Base = N0;
1232 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1233 return true;
1234 }
1235 } else if (Addr.getOpcode() == ISD::SUB) {
1236 // sub C, x -> add (sub 0, x), C
1237 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1238 int64_t ByteOffset = C->getSExtValue();
1239 if (isDSOffsetLegal(SDValue(), ByteOffset)) {
1240 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1241
1242 // XXX - This is kind of hacky. Create a dummy sub node so we can check
1243 // the known bits in isDSOffsetLegal. We need to emit the selected node
1244 // here, so this is thrown away.
1245 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
1246 Zero, Addr.getOperand(1));
1247
1248 if (isDSOffsetLegal(Sub, ByteOffset)) {
1249 SmallVector<SDValue, 3> Opnds;
1250 Opnds.push_back(Zero);
1251 Opnds.push_back(Addr.getOperand(1));
1252
1253 // FIXME: Select to VOP3 version for with-carry.
1254 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1255 if (Subtarget->hasAddNoCarry()) {
1256 SubOp = AMDGPU::V_SUB_U32_e64;
1257 Opnds.push_back(
1258 CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1259 }
1260
1261 MachineSDNode *MachineSub =
1262 CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1263
1264 Base = SDValue(MachineSub, 0);
1265 Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
1266 return true;
1267 }
1268 }
1269 }
1270 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1271 // If we have a constant address, prefer to put the constant into the
1272 // offset. This can save moves to load the constant address since multiple
1273 // operations can share the zero base address register, and enables merging
1274 // into read2 / write2 instructions.
1275
1276 SDLoc DL(Addr);
1277
1278 if (isDSOffsetLegal(SDValue(), CAddr->getZExtValue())) {
1279 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1280 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1281 DL, MVT::i32, Zero);
1282 Base = SDValue(MovZero, 0);
1283 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1284 return true;
1285 }
1286 }
1287
1288 // default case
1289 Base = Addr;
1290 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
1291 return true;
1292}
1293
1294bool AMDGPUDAGToDAGISel::isDSOffset2Legal(SDValue Base, unsigned Offset0,
1295 unsigned Offset1,
1296 unsigned Size) const {
1297 if (Offset0 % Size != 0 || Offset1 % Size != 0)
1298 return false;
1299 if (!isUInt<8>(Offset0 / Size) || !isUInt<8>(Offset1 / Size))
1300 return false;
1301
1302 if (!Base || Subtarget->hasUsableDSOffset() ||
1303 Subtarget->unsafeDSOffsetFoldingEnabled())
1304 return true;
1305
1306 // On Southern Islands instruction with a negative base value and an offset
1307 // don't seem to work.
1308 return CurDAG->SignBitIsZero(Base);
1309}
1310
1311// TODO: If offset is too big, put low 16-bit into offset.
1312bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
1313 SDValue &Offset0,
1314 SDValue &Offset1) const {
1315 return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 4);
1316}
1317
1318bool AMDGPUDAGToDAGISel::SelectDS128Bit8ByteAligned(SDValue Addr, SDValue &Base,
1319 SDValue &Offset0,
1320 SDValue &Offset1) const {
1321 return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 8);
1322}
1323
1324bool AMDGPUDAGToDAGISel::SelectDSReadWrite2(SDValue Addr, SDValue &Base,
1325 SDValue &Offset0, SDValue &Offset1,
1326 unsigned Size) const {
1327 SDLoc DL(Addr);
1328
1329 if (CurDAG->isBaseWithConstantOffset(Addr)) {
1330 SDValue N0 = Addr.getOperand(0);
1331 SDValue N1 = Addr.getOperand(1);
1332 ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1333 unsigned OffsetValue0 = C1->getZExtValue();
1334 unsigned OffsetValue1 = OffsetValue0 + Size;
1335
1336 // (add n0, c0)
1337 if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1, Size)) {
1338 Base = N0;
1339 Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1340 Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1341 return true;
1342 }
1343 } else if (Addr.getOpcode() == ISD::SUB) {
1344 // sub C, x -> add (sub 0, x), C
1345 if (const ConstantSDNode *C =
1346 dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1347 unsigned OffsetValue0 = C->getZExtValue();
1348 unsigned OffsetValue1 = OffsetValue0 + Size;
1349
1350 if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
1351 SDLoc DL(Addr);
1352 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1353
1354 // XXX - This is kind of hacky. Create a dummy sub node so we can check
1355 // the known bits in isDSOffsetLegal. We need to emit the selected node
1356 // here, so this is thrown away.
1357 SDValue Sub =
1358 CurDAG->getNode(ISD::SUB, DL, MVT::i32, Zero, Addr.getOperand(1));
1359
1360 if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1, Size)) {
1361 SmallVector<SDValue, 3> Opnds;
1362 Opnds.push_back(Zero);
1363 Opnds.push_back(Addr.getOperand(1));
1364 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1365 if (Subtarget->hasAddNoCarry()) {
1366 SubOp = AMDGPU::V_SUB_U32_e64;
1367 Opnds.push_back(
1368 CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1369 }
1370
1371 MachineSDNode *MachineSub = CurDAG->getMachineNode(
1372 SubOp, DL, MVT::getIntegerVT(Size * 8), Opnds);
1373
1374 Base = SDValue(MachineSub, 0);
1375 Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1376 Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1377 return true;
1378 }
1379 }
1380 }
1381 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1382 unsigned OffsetValue0 = CAddr->getZExtValue();
1383 unsigned OffsetValue1 = OffsetValue0 + Size;
1384
1385 if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
1386 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1387 MachineSDNode *MovZero =
1388 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, Zero);
1389 Base = SDValue(MovZero, 0);
1390 Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1391 Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1392 return true;
1393 }
1394 }
1395
1396 // default case
1397
1398 Base = Addr;
1399 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
1400 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
1401 return true;
1402}
1403
1404bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
1405 SDValue &VAddr, SDValue &SOffset,
1406 SDValue &Offset, SDValue &Offen,
1407 SDValue &Idxen, SDValue &Addr64,
1408 SDValue &GLC, SDValue &SLC,
1409 SDValue &TFE, SDValue &DLC,
1410 SDValue &SWZ) const {
1411 // Subtarget prefers to use flat instruction
1412 // FIXME: This should be a pattern predicate and not reach here
1413 if (Subtarget->useFlatForGlobal())
1414 return false;
1415
1416 SDLoc DL(Addr);
1417
1418 if (!GLC.getNode())
1419 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1420 if (!SLC.getNode())
1421 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1422 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
1423 DLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1424 SWZ = CurDAG->getTargetConstant(0, DL, MVT::i1);
1425
1426 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1427 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1428 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1429 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1430
1431 ConstantSDNode *C1 = nullptr;
1432 SDValue N0 = Addr;
1433 if (CurDAG->isBaseWithConstantOffset(Addr)) {
1434 C1 = cast<ConstantSDNode>(Addr.getOperand(1));
1435 if (isUInt<32>(C1->getZExtValue()))
1436 N0 = Addr.getOperand(0);
1437 else
1438 C1 = nullptr;
1439 }
1440
1441 if (N0.getOpcode() == ISD::ADD) {
1442 // (add N2, N3) -> addr64, or
1443 // (add (add N2, N3), C1) -> addr64
1444 SDValue N2 = N0.getOperand(0);
1445 SDValue N3 = N0.getOperand(1);
1446 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1447
1448 if (N2->isDivergent()) {
1449 if (N3->isDivergent()) {
1450 // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
1451 // addr64, and construct the resource from a 0 address.
1452 Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1453 VAddr = N0;
1454 } else {
1455 // N2 is divergent, N3 is not.
1456 Ptr = N3;
1457 VAddr = N2;
1458 }
1459 } else {
1460 // N2 is not divergent.
1461 Ptr = N2;
1462 VAddr = N3;
1463 }
1464 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1465 } else if (N0->isDivergent()) {
1466 // N0 is divergent. Use it as the addr64, and construct the resource from a
1467 // 0 address.
1468 Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1469 VAddr = N0;
1470 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1471 } else {
1472 // N0 -> offset, or
1473 // (N0 + C1) -> offset
1474 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1475 Ptr = N0;
1476 }
1477
1478 if (!C1) {
1479 // No offset.
1480 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1481 return true;
1482 }
1483
1484 if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
1485 // Legal offset for instruction.
1486 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1487 return true;
1488 }
1489
1490 // Illegal offset, store it in soffset.
1491 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1492 SOffset =
1493 SDValue(CurDAG->getMachineNode(
1494 AMDGPU::S_MOV_B32, DL, MVT::i32,
1495 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1496 0);
1497 return true;
1498}
1499
1500bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1501 SDValue &VAddr, SDValue &SOffset,
1502 SDValue &Offset, SDValue &GLC,
1503 SDValue &SLC, SDValue &TFE,
1504 SDValue &DLC, SDValue &SWZ) const {
1505 SDValue Ptr, Offen, Idxen, Addr64;
1506
1507 // addr64 bit was removed for volcanic islands.
1508 // FIXME: This should be a pattern predicate and not reach here
1509 if (!Subtarget->hasAddr64())
1510 return false;
1511
1512 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1513 GLC, SLC, TFE, DLC, SWZ))
1514 return false;
1515
1516 ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1517 if (C->getSExtValue()) {
1518 SDLoc DL(Addr);
1519
1520 const SITargetLowering& Lowering =
1521 *static_cast<const SITargetLowering*>(getTargetLowering());
1522
1523 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1524 return true;
1525 }
1526
1527 return false;
1528}
1529
1530bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1531 SDValue &VAddr, SDValue &SOffset,
1532 SDValue &Offset,
1533 SDValue &SLC) const {
1534 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
1535 SDValue GLC, TFE, DLC, SWZ;
1536
1537 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC, SWZ);
1538}
1539
1540static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
1541 auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
1542 return PSV && PSV->isStack();
1543}
1544
1545std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1546 SDLoc DL(N);
1547
1548 auto *FI = dyn_cast<FrameIndexSDNode>(N);
1549 SDValue TFI =
1550 FI ? CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) : N;
1551
1552 // We rebase the base address into an absolute stack address and hence
1553 // use constant 0 for soffset.
1554 return std::make_pair(TFI, CurDAG->getTargetConstant(0, DL, MVT::i32));
1555}
1556
1557bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
1558 SDValue Addr, SDValue &Rsrc,
1559 SDValue &VAddr, SDValue &SOffset,
1560 SDValue &ImmOffset) const {
1561
1562 SDLoc DL(Addr);
1563 MachineFunction &MF = CurDAG->getMachineFunction();
1564 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1565
1566 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1567
1568 if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1569 int64_t Imm = CAddr->getSExtValue();
1570 const int64_t NullPtr =
1571 AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS);
1572 // Don't fold null pointer.
1573 if (Imm != NullPtr) {
1574 SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1575 MachineSDNode *MovHighBits = CurDAG->getMachineNode(
1576 AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits);
1577 VAddr = SDValue(MovHighBits, 0);
1578
1579 // In a call sequence, stores to the argument stack area are relative to the
1580 // stack pointer.
1581 const MachinePointerInfo &PtrInfo
1582 = cast<MemSDNode>(Parent)->getPointerInfo();
1583 SOffset = isStackPtrRelative(PtrInfo)
1584 ? CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32)
1585 : CurDAG->getTargetConstant(0, DL, MVT::i32);
1586 ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
1587 return true;
1588 }
1589 }
1590
1591 if (CurDAG->isBaseWithConstantOffset(Addr)) {
1592 // (add n0, c1)
1593
1594 SDValue N0 = Addr.getOperand(0);
1595 SDValue N1 = Addr.getOperand(1);
1596
1597 // Offsets in vaddr must be positive if range checking is enabled.
1598 //
1599 // The total computation of vaddr + soffset + offset must not overflow. If
1600 // vaddr is negative, even if offset is 0 the sgpr offset add will end up
1601 // overflowing.
1602 //
1603 // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
1604 // always perform a range check. If a negative vaddr base index was used,
1605 // this would fail the range check. The overall address computation would
1606 // compute a valid address, but this doesn't happen due to the range
1607 // check. For out-of-bounds MUBUF loads, a 0 is returned.
1608 //
1609 // Therefore it should be safe to fold any VGPR offset on gfx9 into the
1610 // MUBUF vaddr, but not on older subtargets which can only do this if the
1611 // sign bit is known 0.
1612 ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1613 if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) &&
1614 (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1615 CurDAG->SignBitIsZero(N0))) {
1616 std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1617 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1618 return true;
1619 }
1620 }
1621
1622 // (node)
1623 std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1624 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1625 return true;
1626}
1627
1628bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
1629 SDValue Addr,
1630 SDValue &SRsrc,
1631 SDValue &SOffset,
1632 SDValue &Offset) const {
1633 ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
1634 if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1635 return false;
1636
1637 SDLoc DL(Addr);
1638 MachineFunction &MF = CurDAG->getMachineFunction();
1639 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1640
1641 SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1642
1643 const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1644
1645 // FIXME: Get from MachinePointerInfo? We should only be using the frame
1646 // offset if we know this is in a call sequence.
1647 SOffset = isStackPtrRelative(PtrInfo)
1648 ? CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32)
1649 : CurDAG->getTargetConstant(0, DL, MVT::i32);
1650
1651 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1652 return true;
1653}
1654
1655bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1656 SDValue &SOffset, SDValue &Offset,
1657 SDValue &GLC, SDValue &SLC,
1658 SDValue &TFE, SDValue &DLC,
1659 SDValue &SWZ) const {
1660 SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1661 const SIInstrInfo *TII =
1662 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1663
1664 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1665 GLC, SLC, TFE, DLC, SWZ))
1666 return false;
1667
1668 if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1669 !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1670 !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1671 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1672 APInt::getAllOnesValue(32).getZExtValue(); // Size
1673 SDLoc DL(Addr);
1674
1675 const SITargetLowering& Lowering =
1676 *static_cast<const SITargetLowering*>(getTargetLowering());
1677
1678 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1679 return true;
1680 }
1681 return false;
1682}
1683
1684bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1685 SDValue &Soffset, SDValue &Offset
1686 ) const {
1687 SDValue GLC, SLC, TFE, DLC, SWZ;
1688
1689 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ);
1690}
1691bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1692 SDValue &Soffset, SDValue &Offset,
1693 SDValue &SLC) const {
1694 SDValue GLC, TFE, DLC, SWZ;
1695
1696 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ);
1697}
1698
1699// Find a load or store from corresponding pattern root.
1700// Roots may be build_vector, bitconvert or their combinations.
1701static MemSDNode* findMemSDNode(SDNode *N) {
1702 N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode();
1703 if (MemSDNode *MN = dyn_cast<MemSDNode>(N))
1704 return MN;
1705 assert(isa<BuildVectorSDNode>(N))((isa<BuildVectorSDNode>(N)) ? static_cast<void> (
0) : __assert_fail ("isa<BuildVectorSDNode>(N)", "/build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 1705, __PRETTY_FUNCTION__))
;
1706 for (SDValue V : N->op_values())
1707 if (MemSDNode *MN =
1708 dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V)))
1709 return MN;
1710 llvm_unreachable("cannot find MemSDNode in the pattern!")::llvm::llvm_unreachable_internal("cannot find MemSDNode in the pattern!"
, "/build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 1710)
;
1711}
1712
1713template <bool IsSigned>
1714bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N,
1715 SDValue Addr,
1716 SDValue &VAddr,
1717 SDValue &Offset) const {
1718 int64_t OffsetVal = 0;
1719
1720 unsigned AS = findMemSDNode(N)->getAddressSpace();
1721
1722 if (Subtarget->hasFlatInstOffsets() &&
1723 (!Subtarget->hasFlatSegmentOffsetBug() ||
1724 AS != AMDGPUAS::FLAT_ADDRESS)) {
1725 SDValue N0, N1;
1726 if (isBaseWithConstantOffset64(Addr, N0, N1)) {
1727 uint64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1728
1729 const SIInstrInfo *TII = Subtarget->getInstrInfo();
1730 if (TII->isLegalFLATOffset(COffsetVal, AS, IsSigned)) {
1731 Addr = N0;
1732 OffsetVal = COffsetVal;
1733 } else {
1734 // If the offset doesn't fit, put the low bits into the offset field and
1735 // add the rest.
1736 //
1737 // For a FLAT instruction the hardware decides whether to access
1738 // global/scratch/shared memory based on the high bits of vaddr,
1739 // ignoring the offset field, so we have to ensure that when we add
1740 // remainder to vaddr it still points into the same underlying object.
1741 // The easiest way to do that is to make sure that we split the offset
1742 // into two pieces that are both >= 0 or both <= 0.
1743
1744 SDLoc DL(N);
1745 uint64_t RemainderOffset;
1746
1747 std::tie(OffsetVal, RemainderOffset)
1748 = TII->splitFlatOffset(COffsetVal, AS, IsSigned);
1749
1750 SDValue AddOffsetLo =
1751 getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
1752 SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1753
1754 if (Addr.getValueType().getSizeInBits() == 32) {
1755 SmallVector<SDValue, 3> Opnds;
1756 Opnds.push_back(N0);
1757 Opnds.push_back(AddOffsetLo);
1758 unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1759 if (Subtarget->hasAddNoCarry()) {
1760 AddOp = AMDGPU::V_ADD_U32_e64;
1761 Opnds.push_back(Clamp);
1762 }
1763 Addr = SDValue(CurDAG->getMachineNode(AddOp, DL, MVT::i32, Opnds), 0);
1764 } else {
1765 // TODO: Should this try to use a scalar add pseudo if the base address
1766 // is uniform and saddr is usable?
1767 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
1768 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
1769
1770 SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1771 DL, MVT::i32, N0, Sub0);
1772 SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1773 DL, MVT::i32, N0, Sub1);
1774
1775 SDValue AddOffsetHi =
1776 getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);
1777
1778 SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1);
1779
1780 SDNode *Add =
1781 CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64, DL, VTs,
1782 {AddOffsetLo, SDValue(N0Lo, 0), Clamp});
1783
1784 SDNode *Addc = CurDAG->getMachineNode(
1785 AMDGPU::V_ADDC_U32_e64, DL, VTs,
1786 {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp});
1787
1788 SDValue RegSequenceArgs[] = {
1789 CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),
1790 SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1};
1791
1792 Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1793 MVT::i64, RegSequenceArgs),
1794 0);
1795 }
1796 }
1797 }
1798 }
1799
1800 VAddr = Addr;
1801 Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
1802 return true;
1803}
1804
1805// If this matches zero_extend i32:x, return x
1806static SDValue matchZExtFromI32(SDValue Op) {
1807 if (Op.getOpcode() != ISD::ZERO_EXTEND)
1808 return SDValue();
1809
1810 SDValue ExtSrc = Op.getOperand(0);
1811 return (ExtSrc.getValueType() == MVT::i32) ? ExtSrc : SDValue();
1812}
1813
1814// Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
1815bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
1816 SDValue Addr,
1817 SDValue &SAddr,
1818 SDValue &VOffset,
1819 SDValue &Offset) const {
1820 int64_t ImmOffset = 0;
1821
1822 // Match the immediate offset first, which canonically is moved as low as
1823 // possible.
1824
1825 SDValue LHS, RHS;
1826 if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
1827 int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1828 const SIInstrInfo *TII = Subtarget->getInstrInfo();
1829
1830 if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, true)) {
1831 Addr = LHS;
1832 ImmOffset = COffsetVal;
1833 } else if (!LHS->isDivergent() && COffsetVal > 0) {
1834 SDLoc SL(N);
1835 // saddr + large_offset -> saddr + (voffset = large_offset & ~MaxOffset) +
1836 // (large_offset & MaxOffset);
1837 int64_t SplitImmOffset, RemainderOffset;
1838 std::tie(SplitImmOffset, RemainderOffset)
1839 = TII->splitFlatOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, true);
1840
1841 if (isUInt<32>(RemainderOffset)) {
1842 SDNode *VMov = CurDAG->getMachineNode(
1843 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1844 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
1845 VOffset = SDValue(VMov, 0);
1846 SAddr = LHS;
1847 Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
1848 return true;
1849 }
1850 }
1851 }
1852
1853 // Match the variable offset.
1854 if (Addr.getOpcode() != ISD::ADD) {
1855 if (Addr->isDivergent() || Addr.getOpcode() == ISD::UNDEF ||
1856 isa<ConstantSDNode>(Addr))
1857 return false;
1858
1859 // It's cheaper to materialize a single 32-bit zero for vaddr than the two
1860 // moves required to copy a 64-bit SGPR to VGPR.
1861 SAddr = Addr;
1862 SDNode *VMov = CurDAG->getMachineNode(
1863 AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
1864 CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));
1865 VOffset = SDValue(VMov, 0);
1866 Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1867 return true;
1868 }
1869
1870 LHS = Addr.getOperand(0);
1871 RHS = Addr.getOperand(1);
1872
1873 if (!LHS->isDivergent()) {
1874 // add (i64 sgpr), (zero_extend (i32 vgpr))
1875 if (SDValue ZextRHS = matchZExtFromI32(RHS)) {
1876 SAddr = LHS;
1877 VOffset = ZextRHS;
1878 }
1879 }
1880
1881 if (!SAddr && !RHS->isDivergent()) {
1882 // add (zero_extend (i32 vgpr)), (i64 sgpr)
1883 if (SDValue ZextLHS = matchZExtFromI32(LHS)) {
1884 SAddr = RHS;
1885 VOffset = ZextLHS;
1886 }
1887 }
1888
1889 if (!SAddr)
1890 return false;
1891
1892 Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1893 return true;
1894}
1895
1896// Match (32-bit SGPR base) + sext(imm offset)
1897bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *N,
1898 SDValue Addr,
1899 SDValue &SAddr,
1900 SDValue &Offset) const {
1901 if (Addr->isDivergent())
1902 return false;
1903
1904 SAddr = Addr;
1905 int64_t COffsetVal = 0;
1906
1907 if (CurDAG->isBaseWithConstantOffset(Addr)) {
1908 COffsetVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
1909 SAddr = Addr.getOperand(0);
1910 }
1911
1912 if (auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
1913 SAddr = CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
1914 } else if (SAddr.getOpcode() == ISD::ADD &&
1915 isa<FrameIndexSDNode>(SAddr.getOperand(0))) {
1916 // Materialize this into a scalar move for scalar address to avoid
1917 // readfirstlane.
1918 auto FI = cast<FrameIndexSDNode>(SAddr.getOperand(0));
1919 SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1920 FI->getValueType(0));
1921 SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_U32, SDLoc(SAddr),
1922 MVT::i32, TFI, SAddr.getOperand(1)),
1923 0);
1924 }
1925
1926 const SIInstrInfo *TII = Subtarget->getInstrInfo();
1927
1928 if (!TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true)) {
1929 int64_t RemainderOffset = COffsetVal;
Value stored to 'RemainderOffset' during its initialization is never read
1930 int64_t ImmField = 0;
1931 const unsigned NumBits = TII->getNumFlatOffsetBits(true);
1932 // Use signed division by a power of two to truncate towards 0.
1933 int64_t D = 1LL << (NumBits - 1);
1934 RemainderOffset = (COffsetVal / D) * D;
1935 ImmField = COffsetVal - RemainderOffset;
1936
1937 assert(TII->isLegalFLATOffset(ImmField, AMDGPUAS::PRIVATE_ADDRESS, true))((TII->isLegalFLATOffset(ImmField, AMDGPUAS::PRIVATE_ADDRESS
, true)) ? static_cast<void> (0) : __assert_fail ("TII->isLegalFLATOffset(ImmField, AMDGPUAS::PRIVATE_ADDRESS, true)"
, "/build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 1937, __PRETTY_FUNCTION__))
;
1938 assert(RemainderOffset + ImmField == COffsetVal)((RemainderOffset + ImmField == COffsetVal) ? static_cast<
void> (0) : __assert_fail ("RemainderOffset + ImmField == COffsetVal"
, "/build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 1938, __PRETTY_FUNCTION__))
;
1939
1940 COffsetVal = ImmField;
1941
1942 SDLoc DL(N);
1943 SDValue AddOffset =
1944 getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
1945 SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_U32, DL, MVT::i32,
1946 SAddr, AddOffset), 0);
1947 }
1948
1949 Offset = CurDAG->getTargetConstant(COffsetVal, SDLoc(), MVT::i16);
1950
1951 return true;
1952}
1953
1954bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1955 SDValue &Offset, bool &Imm) const {
1956 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1957 if (!C) {
1958 if (ByteOffsetNode.getValueType().isScalarInteger() &&
1959 ByteOffsetNode.getValueType().getSizeInBits() == 32) {
1960 Offset = ByteOffsetNode;
1961 Imm = false;
1962 return true;
1963 }
1964 if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) {
1965 if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) {
1966 Offset = ByteOffsetNode.getOperand(0);
1967 Imm = false;
1968 return true;
1969 }
1970 }
1971 return false;
1972 }
1973
1974 SDLoc SL(ByteOffsetNode);
1975 // GFX9 and GFX10 have signed byte immediate offsets.
1976 int64_t ByteOffset = C->getSExtValue();
1977 Optional<int64_t> EncodedOffset =
1978 AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, false);
1979 if (EncodedOffset) {
1980 Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
1981 Imm = true;
1982 return true;
1983 }
1984
1985 // SGPR and literal offsets are unsigned.
1986 if (ByteOffset < 0)
1987 return false;
1988
1989 EncodedOffset = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset);
1990 if (EncodedOffset) {
1991 Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
1992 return true;
1993 }
1994
1995 if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
1996 return false;
1997
1998 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1999 Offset = SDValue(
2000 CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
2001
2002 return true;
2003}
2004
2005SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
2006 if (Addr.getValueType() != MVT::i32)
2007 return Addr;
2008
2009 // Zero-extend a 32-bit address.
2010 SDLoc SL(Addr);
2011
2012 const MachineFunction &MF = CurDAG->getMachineFunction();
2013 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
2014 unsigned AddrHiVal = Info->get32BitAddressHighBits();
2015 SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
2016
2017 const SDValue Ops[] = {
2018 CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
2019 Addr,
2020 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
2021 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
2022 0),
2023 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
2024 };
2025
2026 return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
2027 Ops), 0);
2028}
2029
2030bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
2031 SDValue &Offset, bool &Imm) const {
2032 SDLoc SL(Addr);
2033
2034 // A 32-bit (address + offset) should not cause unsigned 32-bit integer
2035 // wraparound, because s_load instructions perform the addition in 64 bits.
2036 if ((Addr.getValueType() != MVT::i32 ||
2037 Addr->getFlags().hasNoUnsignedWrap())) {
2038 SDValue N0, N1;
2039 // Extract the base and offset if possible.
2040 if (CurDAG->isBaseWithConstantOffset(Addr) ||
2041 Addr.getOpcode() == ISD::ADD) {
2042 N0 = Addr.getOperand(0);
2043 N1 = Addr.getOperand(1);
2044 } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
2045 assert(N0 && N1 && isa<ConstantSDNode>(N1))((N0 && N1 && isa<ConstantSDNode>(N1)) ?
static_cast<void> (0) : __assert_fail ("N0 && N1 && isa<ConstantSDNode>(N1)"
, "/build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 2045, __PRETTY_FUNCTION__))
;
2046 }
2047 if (N0 && N1) {
2048 if (SelectSMRDOffset(N1, Offset, Imm)) {
2049 SBase = Expand32BitAddress(N0);
2050 return true;
2051 }
2052 }
2053 }
2054 SBase = Expand32BitAddress(Addr);
2055 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
2056 Imm = true;
2057 return true;
2058}
2059
2060bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
2061 SDValue &Offset) const {
2062 bool Imm = false;
2063 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
2064}
2065
2066bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
2067 SDValue &Offset) const {
2068
2069 assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS)((Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS
) ? static_cast<void> (0) : __assert_fail ("Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
, "/build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 2069, __PRETTY_FUNCTION__))
;
2070
2071 bool Imm = false;
2072 if (!SelectSMRD(Addr, SBase, Offset, Imm))
2073 return false;
2074
2075 return !Imm && isa<ConstantSDNode>(Offset);
2076}
2077
2078bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
2079 SDValue &Offset) const {
2080 bool Imm = false;
2081 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
2082 !isa<ConstantSDNode>(Offset);
2083}
2084
2085bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
2086 SDValue &Offset) const {
2087 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) {
2088 // The immediate offset for S_BUFFER instructions is unsigned.
2089 if (auto Imm =
2090 AMDGPU::getSMRDEncodedOffset(*Subtarget, C->getZExtValue(), true)) {
2091 Offset = CurDAG->getTargetConstant(*Imm, SDLoc(Addr), MVT::i32);
2092 return true;
2093 }
2094 }
2095
2096 return false;
2097}
2098
2099bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
2100 SDValue &Offset) const {
2101 assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS)((Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS
) ? static_cast<void> (0) : __assert_fail ("Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
, "/build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 2101, __PRETTY_FUNCTION__))
;
2102
2103 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) {
2104 if (auto Imm = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget,
2105 C->getZExtValue())) {
2106 Offset = CurDAG->getTargetConstant(*Imm, SDLoc(Addr), MVT::i32);
2107 return true;
2108 }
2109 }
2110
2111 return false;
2112}
2113
2114bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
2115 SDValue &Base,
2116 SDValue &Offset) const {
2117 SDLoc DL(Index);
2118
2119 if (CurDAG->isBaseWithConstantOffset(Index)) {
2120 SDValue N0 = Index.getOperand(0);
2121 SDValue N1 = Index.getOperand(1);
2122 ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
2123
2124 // (add n0, c0)
2125 // Don't peel off the offset (c0) if doing so could possibly lead
2126 // the base (n0) to be negative.
2127 // (or n0, |c0|) can never change a sign given isBaseWithConstantOffset.
2128 if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0) ||
2129 (Index->getOpcode() == ISD::OR && C1->getSExtValue() >= 0)) {
2130 Base = N0;
2131 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
2132 return true;
2133 }
2134 }
2135
2136 if (isa<ConstantSDNode>(Index))
2137 return false;
2138
2139 Base = Index;
2140 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
2141 return true;
2142}
2143
2144SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
2145 SDValue Val, uint32_t Offset,
2146 uint32_t Width) {
2147 // Transformation function, pack the offset and width of a BFE into
2148 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
2149 // source, bits [5:0] contain the offset and bits [22:16] the width.
2150 uint32_t PackedVal = Offset | (Width << 16);
2151 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
2152
2153 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
2154}
2155
2156void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
2157 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
2158 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
2159 // Predicate: 0 < b <= c < 32
2160
2161 const SDValue &Shl = N->getOperand(0);
2162 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
2163 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2164
2165 if (B && C) {
2166 uint32_t BVal = B->getZExtValue();
2167 uint32_t CVal = C->getZExtValue();
2168
2169 if (0 < BVal && BVal <= CVal && CVal < 32) {
2170 bool Signed = N->getOpcode() == ISD::SRA;
2171 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2172
2173 ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
2174 32 - CVal));
2175 return;
2176 }
2177 }
2178 SelectCode(N);
2179}
2180
2181void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
2182 switch (N->getOpcode()) {
2183 case ISD::AND:
2184 if (N->getOperand(0).getOpcode() == ISD::SRL) {
2185 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
2186 // Predicate: isMask(mask)
2187 const SDValue &Srl = N->getOperand(0);
2188 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
2189 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
2190
2191 if (Shift && Mask) {
2192 uint32_t ShiftVal = Shift->getZExtValue();
2193 uint32_t MaskVal = Mask->getZExtValue();
2194
2195 if (isMask_32(MaskVal)) {
2196 uint32_t WidthVal = countPopulation(MaskVal);
2197
2198 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
2199 Srl.getOperand(0), ShiftVal, WidthVal));
2200 return;
2201 }
2202 }
2203 }
2204 break;
2205 case ISD::SRL:
2206 if (N->getOperand(0).getOpcode() == ISD::AND) {
2207 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
2208 // Predicate: isMask(mask >> b)
2209 const SDValue &And = N->getOperand(0);
2210 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
2211 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
2212
2213 if (Shift && Mask) {
2214 uint32_t ShiftVal = Shift->getZExtValue();
2215 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
2216
2217 if (isMask_32(MaskVal)) {
2218 uint32_t WidthVal = countPopulation(MaskVal);
2219
2220 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
2221 And.getOperand(0), ShiftVal, WidthVal));
2222 return;
2223 }
2224 }
2225 } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
2226 SelectS_BFEFromShifts(N);
2227 return;
2228 }
2229 break;
2230 case ISD::SRA:
2231 if (N->getOperand(0).getOpcode() == ISD::SHL) {
2232 SelectS_BFEFromShifts(N);
2233 return;
2234 }
2235 break;
2236
2237 case ISD::SIGN_EXTEND_INREG: {
2238 // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
2239 SDValue Src = N->getOperand(0);
2240 if (Src.getOpcode() != ISD::SRL)
2241 break;
2242
2243 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
2244 if (!Amt)
2245 break;
2246
2247 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2248 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
2249 Amt->getZExtValue(), Width));
2250 return;
2251 }
2252 }
2253
2254 SelectCode(N);
2255}
2256
2257bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
2258 assert(N->getOpcode() == ISD::BRCOND)((N->getOpcode() == ISD::BRCOND) ? static_cast<void>
(0) : __assert_fail ("N->getOpcode() == ISD::BRCOND", "/build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 2258, __PRETTY_FUNCTION__))
;
2259 if (!N->hasOneUse())
2260 return false;
2261
2262 SDValue Cond = N->getOperand(1);
2263 if (Cond.getOpcode() == ISD::CopyToReg)
2264 Cond = Cond.getOperand(2);
2265
2266 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
2267 return false;
2268
2269 MVT VT = Cond.getOperand(0).getSimpleValueType();
2270 if (VT == MVT::i32)
2271 return true;
2272
2273 if (VT == MVT::i64) {
2274 auto ST = static_cast<const GCNSubtarget *>(Subtarget);
2275
2276 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
2277 return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
2278 }
2279
2280 return false;
2281}
2282
2283void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
2284 SDValue Cond = N->getOperand(1);
2285
2286 if (Cond.isUndef()) {
2287 CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
2288 N->getOperand(2), N->getOperand(0));
2289 return;
2290 }
2291
2292 const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
2293 const SIRegisterInfo *TRI = ST->getRegisterInfo();
2294
2295 bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
2296 unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
2297 Register CondReg = UseSCCBr ? AMDGPU::SCC : TRI->getVCC();
2298 SDLoc SL(N);
2299
2300 if (!UseSCCBr) {
2301 // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
2302 // analyzed what generates the vcc value, so we do not know whether vcc
2303 // bits for disabled lanes are 0. Thus we need to mask out bits for
2304 // disabled lanes.
2305 //
2306 // For the case that we select S_CBRANCH_SCC1 and it gets
2307 // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
2308 // SIInstrInfo::moveToVALU which inserts the S_AND).
2309 //
2310 // We could add an analysis of what generates the vcc value here and omit
2311 // the S_AND when is unnecessary. But it would be better to add a separate
2312 // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
2313 // catches both cases.
2314 Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? AMDGPU::S_AND_B32
2315 : AMDGPU::S_AND_B64,
2316 SL, MVT::i1,
2317 CurDAG->getRegister(ST->isWave32() ? AMDGPU::EXEC_LO
2318 : AMDGPU::EXEC,
2319 MVT::i1),
2320 Cond),
2321 0);
2322 }
2323
2324 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
2325 CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
2326 N->getOperand(2), // Basic Block
2327 VCC.getValue(0));
2328}
2329
2330void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
2331 MVT VT = N->getSimpleValueType(0);
2332 bool IsFMA = N->getOpcode() == ISD::FMA;
2333 if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
2334 !Subtarget->hasFmaMixInsts()) ||
2335 ((IsFMA && Subtarget->hasMadMixInsts()) ||
2336 (!IsFMA && Subtarget->hasFmaMixInsts()))) {
2337 SelectCode(N);
2338 return;
2339 }
2340
2341 SDValue Src0 = N->getOperand(0);
2342 SDValue Src1 = N->getOperand(1);
2343 SDValue Src2 = N->getOperand(2);
2344 unsigned Src0Mods, Src1Mods, Src2Mods;
2345
2346 // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
2347 // using the conversion from f16.
2348 bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
2349 bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
2350 bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
2351
2352 assert((IsFMA || !Mode.allFP32Denormals()) &&(((IsFMA || !Mode.allFP32Denormals()) && "fmad selected with denormals enabled"
) ? static_cast<void> (0) : __assert_fail ("(IsFMA || !Mode.allFP32Denormals()) && \"fmad selected with denormals enabled\""
, "/build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 2353, __PRETTY_FUNCTION__))
2353 "fmad selected with denormals enabled")(((IsFMA || !Mode.allFP32Denormals()) && "fmad selected with denormals enabled"
) ? static_cast<void> (0) : __assert_fail ("(IsFMA || !Mode.allFP32Denormals()) && \"fmad selected with denormals enabled\""
, "/build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 2353, __PRETTY_FUNCTION__))
;
2354 // TODO: We can select this with f32 denormals enabled if all the sources are
2355 // converted from f16 (in which case fmad isn't legal).
2356
2357 if (Sel0 || Sel1 || Sel2) {
2358 // For dummy operands.
2359 SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2360 SDValue Ops[] = {
2361 CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
2362 CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
2363 CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
2364 CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
2365 Zero, Zero
2366 };
2367
2368 CurDAG->SelectNodeTo(N,
2369 IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
2370 MVT::f32, Ops);
2371 } else {
2372 SelectCode(N);
2373 }
2374}
2375
2376// This is here because there isn't a way to use the generated sub0_sub1 as the
2377// subreg index to EXTRACT_SUBREG in tablegen.
2378void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
2379 MemSDNode *Mem = cast<MemSDNode>(N);
2380 unsigned AS = Mem->getAddressSpace();
2381 if (AS == AMDGPUAS::FLAT_ADDRESS) {
2382 SelectCode(N);
2383 return;
2384 }
2385
2386 MVT VT = N->getSimpleValueType(0);
2387 bool Is32 = (VT == MVT::i32);
2388 SDLoc SL(N);
2389
2390 MachineSDNode *CmpSwap = nullptr;
2391 if (Subtarget->hasAddr64()) {
2392 SDValue SRsrc, VAddr, SOffset, Offset, SLC;
2393
2394 if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
2395 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
2396 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
2397 SDValue CmpVal = Mem->getOperand(2);
2398 SDValue GLC = CurDAG->getTargetConstant(1, SL, MVT::i1);
2399
2400 // XXX - Do we care about glue operands?
2401
2402 SDValue Ops[] = {
2403 CmpVal, VAddr, SRsrc, SOffset, Offset, GLC, SLC, Mem->getChain()
2404 };
2405
2406 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
2407 }
2408 }
2409
2410 if (!CmpSwap) {
2411 SDValue SRsrc, SOffset, Offset, SLC;
2412 if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
2413 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
2414 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
2415
2416 SDValue CmpVal = Mem->getOperand(2);
2417 SDValue GLC = CurDAG->getTargetConstant(1, SL, MVT::i1);
2418 SDValue Ops[] = {
2419 CmpVal, SRsrc, SOffset, Offset, GLC, SLC, Mem->getChain()
2420 };
2421
2422 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
2423 }
2424 }
2425
2426 if (!CmpSwap) {
2427 SelectCode(N);
2428 return;
2429 }
2430
2431 MachineMemOperand *MMO = Mem->getMemOperand();
2432 CurDAG->setNodeMemRefs(CmpSwap, {MMO});
2433
2434 unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
2435 SDValue Extract
2436 = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
2437
2438 ReplaceUses(SDValue(N, 0), Extract);
2439 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
2440 CurDAG->RemoveDeadNode(N);
2441}
2442
2443void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
2444 // The address is assumed to be uniform, so if it ends up in a VGPR, it will
2445 // be copied to an SGPR with readfirstlane.
2446 unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2447 AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2448
2449 SDValue Chain = N->getOperand(0);
2450 SDValue Ptr = N->getOperand(2);
2451 MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2452 MachineMemOperand *MMO = M->getMemOperand();
2453 bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
2454
2455 SDValue Offset;
2456 if (CurDAG->isBaseWithConstantOffset(Ptr)) {
2457 SDValue PtrBase = Ptr.getOperand(0);
2458 SDValue PtrOffset = Ptr.getOperand(1);
2459
2460 const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
2461 if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue())) {
2462 N = glueCopyToM0(N, PtrBase);
2463 Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
2464 }
2465 }
2466
2467 if (!Offset) {
2468 N = glueCopyToM0(N, Ptr);
2469 Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2470 }
2471
2472 SDValue Ops[] = {
2473 Offset,
2474 CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
2475 Chain,
2476 N->getOperand(N->getNumOperands() - 1) // New glue
2477 };
2478
2479 SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2480 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2481}
2482
2483static unsigned gwsIntrinToOpcode(unsigned IntrID) {
2484 switch (IntrID) {
2485 case Intrinsic::amdgcn_ds_gws_init:
2486 return AMDGPU::DS_GWS_INIT;
2487 case Intrinsic::amdgcn_ds_gws_barrier:
2488 return AMDGPU::DS_GWS_BARRIER;
2489 case Intrinsic::amdgcn_ds_gws_sema_v:
2490 return AMDGPU::DS_GWS_SEMA_V;
2491 case Intrinsic::amdgcn_ds_gws_sema_br:
2492 return AMDGPU::DS_GWS_SEMA_BR;
2493 case Intrinsic::amdgcn_ds_gws_sema_p:
2494 return AMDGPU::DS_GWS_SEMA_P;
2495 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2496 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2497 default:
2498 llvm_unreachable("not a gws intrinsic")::llvm::llvm_unreachable_internal("not a gws intrinsic", "/build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 2498)
;
2499 }
2500}
2501
2502void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
2503 if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2504 !Subtarget->hasGWSSemaReleaseAll()) {
2505 // Let this error.
2506 SelectCode(N);
2507 return;
2508 }
2509
2510 // Chain, intrinsic ID, vsrc, offset
2511 const bool HasVSrc = N->getNumOperands() == 4;
2512 assert(HasVSrc || N->getNumOperands() == 3)((HasVSrc || N->getNumOperands() == 3) ? static_cast<void
> (0) : __assert_fail ("HasVSrc || N->getNumOperands() == 3"
, "/build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 2512, __PRETTY_FUNCTION__))
;
2513
2514 SDLoc SL(N);
2515 SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2);
2516 int ImmOffset = 0;
2517 MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2518 MachineMemOperand *MMO = M->getMemOperand();
2519
2520 // Don't worry if the offset ends up in a VGPR. Only one lane will have
2521 // effect, so SIFixSGPRCopies will validly insert readfirstlane.
2522
2523 // The resource id offset is computed as (<isa opaque base> + M0[21:16] +
2524 // offset field) % 64. Some versions of the programming guide omit the m0
2525 // part, or claim it's from offset 0.
2526 if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
2527 // If we have a constant offset, try to use the 0 in m0 as the base.
2528 // TODO: Look into changing the default m0 initialization value. If the
2529 // default -1 only set the low 16-bits, we could leave it as-is and add 1 to
2530 // the immediate offset.
2531 glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32));
2532 ImmOffset = ConstOffset->getZExtValue();
2533 } else {
2534 if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
2535 ImmOffset = BaseOffset.getConstantOperandVal(1);
2536 BaseOffset = BaseOffset.getOperand(0);
2537 }
2538
2539 // Prefer to do the shift in an SGPR since it should be possible to use m0
2540 // as the result directly. If it's already an SGPR, it will be eliminated
2541 // later.
2542 SDNode *SGPROffset
2543 = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
2544 BaseOffset);
2545 // Shift to offset in m0
2546 SDNode *M0Base
2547 = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
2548 SDValue(SGPROffset, 0),
2549 CurDAG->getTargetConstant(16, SL, MVT::i32));
2550 glueCopyToM0(N, SDValue(M0Base, 0));
2551 }
2552
2553 SDValue Chain = N->getOperand(0);
2554 SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
2555
2556 const unsigned Opc = gwsIntrinToOpcode(IntrID);
2557 SmallVector<SDValue, 5> Ops;
2558 if (HasVSrc)
2559 Ops.push_back(N->getOperand(2));
2560 Ops.push_back(OffsetField);
2561 Ops.push_back(Chain);
2562
2563 SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2564 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2565}
2566
2567void AMDGPUDAGToDAGISel::SelectInterpP1F16(SDNode *N) {
2568 if (Subtarget->getLDSBankCount() != 16) {
2569 // This is a single instruction with a pattern.
2570 SelectCode(N);
2571 return;
2572 }
2573
2574 SDLoc DL(N);
2575
2576 // This requires 2 instructions. It is possible to write a pattern to support
2577 // this, but the generated isel emitter doesn't correctly deal with multiple
2578 // output instructions using the same physical register input. The copy to m0
2579 // is incorrectly placed before the second instruction.
2580 //
2581 // TODO: Match source modifiers.
2582 //
2583 // def : Pat <
2584 // (int_amdgcn_interp_p1_f16
2585 // (VOP3Mods f32:$src0, i32:$src0_modifiers),
2586 // (i32 timm:$attrchan), (i32 timm:$attr),
2587 // (i1 timm:$high), M0),
2588 // (V_INTERP_P1LV_F16 $src0_modifiers, VGPR_32:$src0, timm:$attr,
2589 // timm:$attrchan, 0,
2590 // (V_INTERP_MOV_F32 2, timm:$attr, timm:$attrchan), timm:$high)> {
2591 // let Predicates = [has16BankLDS];
2592 // }
2593
2594 // 16 bank LDS
2595 SDValue ToM0 = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, AMDGPU::M0,
2596 N->getOperand(5), SDValue());
2597
2598 SDVTList VTs = CurDAG->getVTList(MVT::f32, MVT::Other);
2599
2600 SDNode *InterpMov =
2601 CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32, DL, VTs, {
2602 CurDAG->getTargetConstant(2, DL, MVT::i32), // P0
2603 N->getOperand(3), // Attr
2604 N->getOperand(2), // Attrchan
2605 ToM0.getValue(1) // In glue
2606 });
2607
2608 SDNode *InterpP1LV =
2609 CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16, DL, MVT::f32, {
2610 CurDAG->getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
2611 N->getOperand(1), // Src0
2612 N->getOperand(3), // Attr
2613 N->getOperand(2), // Attrchan
2614 CurDAG->getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
2615 SDValue(InterpMov, 0), // Src2 - holds two f16 values selected by high
2616 N->getOperand(4), // high
2617 CurDAG->getTargetConstant(0, DL, MVT::i1), // $clamp
2618 CurDAG->getTargetConstant(0, DL, MVT::i32), // $omod
2619 SDValue(InterpMov, 1)
2620 });
2621
2622 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), SDValue(InterpP1LV, 0));
2623}
2624
2625void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
2626 unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2627 switch (IntrID) {
2628 case Intrinsic::amdgcn_ds_append:
2629 case Intrinsic::amdgcn_ds_consume: {
2630 if (N->getValueType(0) != MVT::i32)
2631 break;
2632 SelectDSAppendConsume(N, IntrID);
2633 return;
2634 }
2635 }
2636
2637 SelectCode(N);
2638}
2639
2640void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
2641 unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
2642 unsigned Opcode;
2643 switch (IntrID) {
2644 case Intrinsic::amdgcn_wqm:
2645 Opcode = AMDGPU::WQM;
2646 break;
2647 case Intrinsic::amdgcn_softwqm:
2648 Opcode = AMDGPU::SOFT_WQM;
2649 break;
2650 case Intrinsic::amdgcn_wwm:
2651 Opcode = AMDGPU::WWM;
2652 break;
2653 case Intrinsic::amdgcn_interp_p1_f16:
2654 SelectInterpP1F16(N);
2655 return;
2656 default:
2657 SelectCode(N);
2658 return;
2659 }
2660
2661 SDValue Src = N->getOperand(1);
2662 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src});
2663}
2664
2665void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
2666 unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2667 switch (IntrID) {
2668 case Intrinsic::amdgcn_ds_gws_init:
2669 case Intrinsic::amdgcn_ds_gws_barrier:
2670 case Intrinsic::amdgcn_ds_gws_sema_v:
2671 case Intrinsic::amdgcn_ds_gws_sema_br:
2672 case Intrinsic::amdgcn_ds_gws_sema_p:
2673 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2674 SelectDS_GWS(N, IntrID);
2675 return;
2676 default:
2677 break;
2678 }
2679
2680 SelectCode(N);
2681}
2682
2683bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
2684 unsigned &Mods,
2685 bool AllowAbs) const {
2686 Mods = 0;
2687 Src = In;
2688
2689 if (Src.getOpcode() == ISD::FNEG) {
2690 Mods |= SISrcMods::NEG;
2691 Src = Src.getOperand(0);
2692 }
2693
2694 if (AllowAbs && Src.getOpcode() == ISD::FABS) {
2695 Mods |= SISrcMods::ABS;
2696 Src = Src.getOperand(0);
2697 }
2698
2699 return true;
2700}
2701
2702bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
2703 SDValue &SrcMods) const {
2704 unsigned Mods;
2705 if (SelectVOP3ModsImpl(In, Src, Mods)) {
2706 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2707 return true;
2708 }
2709
2710 return false;
2711}
2712
2713bool AMDGPUDAGToDAGISel::SelectVOP3BMods(SDValue In, SDValue &Src,
2714 SDValue &SrcMods) const {
2715 unsigned Mods;
2716 if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) {
2717 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2718 return true;
2719 }
2720
2721 return false;
2722}
2723
2724bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
2725 SDValue &SrcMods) const {
2726 SelectVOP3Mods(In, Src, SrcMods);
2727 return isNoNanSrc(Src);
2728}
2729
2730bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
2731 if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
2732 return false;
2733
2734 Src = In;
2735 return true;
2736}
2737
2738bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
2739 SDValue &SrcMods, SDValue &Clamp,
2740 SDValue &Omod) const {
2741 SDLoc DL(In);
2742 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2743 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2744
2745 return SelectVOP3Mods(In, Src, SrcMods);
2746}
2747
2748bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(SDValue In, SDValue &Src,
2749 SDValue &SrcMods, SDValue &Clamp,
2750 SDValue &Omod) const {
2751 SDLoc DL(In);
2752 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2753 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2754
2755 return SelectVOP3BMods(In, Src, SrcMods);
2756}
2757
2758bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
2759 SDValue &Clamp, SDValue &Omod) const {
2760 Src = In;
2761
2762 SDLoc DL(In);
2763 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2764 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2765
2766 return true;
2767}
2768
2769bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
2770 SDValue &SrcMods) const {
2771 unsigned Mods = 0;
2772 Src = In;
2773
2774 if (Src.getOpcode() == ISD::FNEG) {
2775 Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
2776 Src = Src.getOperand(0);
2777 }
2778
2779 if (Src.getOpcode() == ISD::BUILD_VECTOR) {
2780 unsigned VecMods = Mods;
2781
2782 SDValue Lo = stripBitcast(Src.getOperand(0));
2783 SDValue Hi = stripBitcast(Src.getOperand(1));
2784
2785 if (Lo.getOpcode() == ISD::FNEG) {
2786 Lo = stripBitcast(Lo.getOperand(0));
2787 Mods ^= SISrcMods::NEG;
2788 }
2789
2790 if (Hi.getOpcode() == ISD::FNEG) {
2791 Hi = stripBitcast(Hi.getOperand(0));
2792 Mods ^= SISrcMods::NEG_HI;
2793 }
2794
2795 if (isExtractHiElt(Lo, Lo))
2796 Mods |= SISrcMods::OP_SEL_0;
2797
2798 if (isExtractHiElt(Hi, Hi))
2799 Mods |= SISrcMods::OP_SEL_1;
2800
2801 Lo = stripExtractLoElt(Lo);
2802 Hi = stripExtractLoElt(Hi);
2803
2804 if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
2805 // Really a scalar input. Just select from the low half of the register to
2806 // avoid packing.
2807
2808 Src = Lo;
2809 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2810 return true;
2811 }
2812
2813 Mods = VecMods;
2814 }
2815
2816 // Packed instructions do not have abs modifiers.
2817 Mods |= SISrcMods::OP_SEL_1;
2818
2819 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2820 return true;
2821}
2822
2823bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
2824 SDValue &SrcMods) const {
2825 Src = In;
2826 // FIXME: Handle op_sel
2827 SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2828 return true;
2829}
2830
2831bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
2832 SDValue &SrcMods) const {
2833 // FIXME: Handle op_sel
2834 return SelectVOP3Mods(In, Src, SrcMods);
2835}
2836
2837// The return value is not whether the match is possible (which it always is),
2838// but whether or not it a conversion is really used.
2839bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
2840 unsigned &Mods) const {
2841 Mods = 0;
2842 SelectVOP3ModsImpl(In, Src, Mods);
2843
2844 if (Src.getOpcode() == ISD::FP_EXTEND) {
2845 Src = Src.getOperand(0);
2846 assert(Src.getValueType() == MVT::f16)((Src.getValueType() == MVT::f16) ? static_cast<void> (
0) : __assert_fail ("Src.getValueType() == MVT::f16", "/build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 2846, __PRETTY_FUNCTION__))
;
2847 Src = stripBitcast(Src);
2848
2849 // Be careful about folding modifiers if we already have an abs. fneg is
2850 // applied last, so we don't want to apply an earlier fneg.
2851 if ((Mods & SISrcMods::ABS) == 0) {
2852 unsigned ModsTmp;
2853 SelectVOP3ModsImpl(Src, Src, ModsTmp);
2854
2855 if ((ModsTmp & SISrcMods::NEG) != 0)
2856 Mods ^= SISrcMods::NEG;
2857
2858 if ((ModsTmp & SISrcMods::ABS) != 0)
2859 Mods |= SISrcMods::ABS;
2860 }
2861
2862 // op_sel/op_sel_hi decide the source type and source.
2863 // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
2864 // If the sources's op_sel is set, it picks the high half of the source
2865 // register.
2866
2867 Mods |= SISrcMods::OP_SEL_1;
2868 if (isExtractHiElt(Src, Src)) {
2869 Mods |= SISrcMods::OP_SEL_0;
2870
2871 // TODO: Should we try to look for neg/abs here?
2872 }
2873
2874 return true;
2875 }
2876
2877 return false;
2878}
2879
2880bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
2881 SDValue &SrcMods) const {
2882 unsigned Mods = 0;
2883 SelectVOP3PMadMixModsImpl(In, Src, Mods);
2884 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2885 return true;
2886}
2887
2888SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
2889 if (In.isUndef())
2890 return CurDAG->getUNDEF(MVT::i32);
2891
2892 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
2893 SDLoc SL(In);
2894 return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
2895 }
2896
2897 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
2898 SDLoc SL(In);
2899 return CurDAG->getConstant(
2900 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
2901 }
2902
2903 SDValue Src;
2904 if (isExtractHiElt(In, Src))
2905 return Src;
2906
2907 return SDValue();
2908}
2909
2910bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
2911 assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn)((CurDAG->getTarget().getTargetTriple().getArch() == Triple
::amdgcn) ? static_cast<void> (0) : __assert_fail ("CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn"
, "/build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 2911, __PRETTY_FUNCTION__))
;
2912
2913 const SIRegisterInfo *SIRI =
2914 static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
2915 const SIInstrInfo * SII =
2916 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
2917
2918 unsigned Limit = 0;
2919 bool AllUsesAcceptSReg = true;
2920 for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
2921 Limit < 10 && U != E; ++U, ++Limit) {
2922 const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
2923
2924 // If the register class is unknown, it could be an unknown
2925 // register class that needs to be an SGPR, e.g. an inline asm
2926 // constraint
2927 if (!RC || SIRI->isSGPRClass(RC))
2928 return false;
2929
2930 if (RC != &AMDGPU::VS_32RegClass) {
2931 AllUsesAcceptSReg = false;
2932 SDNode * User = *U;
2933 if (User->isMachineOpcode()) {
2934 unsigned Opc = User->getMachineOpcode();
2935 MCInstrDesc Desc = SII->get(Opc);
2936 if (Desc.isCommutable()) {
2937 unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
2938 unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
2939 if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
2940 unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
2941 const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
2942 if (CommutedRC == &AMDGPU::VS_32RegClass)
2943 AllUsesAcceptSReg = true;
2944 }
2945 }
2946 }
2947 // If "AllUsesAcceptSReg == false" so far we haven't suceeded
2948 // commuting current user. This means have at least one use
2949 // that strictly require VGPR. Thus, we will not attempt to commute
2950 // other user instructions.
2951 if (!AllUsesAcceptSReg)
2952 break;
2953 }
2954 }
2955 return !AllUsesAcceptSReg && (Limit < 10);
2956}
2957
2958bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
2959 auto Ld = cast<LoadSDNode>(N);
2960
2961 return Ld->getAlignment() >= 4 &&
2962 (
2963 (
2964 (
2965 Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
2966 Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
2967 )
2968 &&
2969 !N->isDivergent()
2970 )
2971 ||
2972 (
2973 Subtarget->getScalarizeGlobalBehavior() &&
2974 Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
2975 Ld->isSimple() &&
2976 !N->isDivergent() &&
2977 static_cast<const SITargetLowering *>(
2978 getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)
2979 )
2980 );
2981}
2982
2983void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
2984 const AMDGPUTargetLowering& Lowering =
2985 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
2986 bool IsModified = false;
2987 do {
2988 IsModified = false;
2989
2990 // Go over all selected nodes and try to fold them a bit more
2991 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
2992 while (Position != CurDAG->allnodes_end()) {
2993 SDNode *Node = &*Position++;
2994 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
2995 if (!MachineNode)
2996 continue;
2997
2998 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
2999 if (ResNode != Node) {
3000 if (ResNode)
3001 ReplaceUses(Node, ResNode);
3002 IsModified = true;
3003 }
3004 }
3005 CurDAG->RemoveDeadNodes();
3006 } while (IsModified);
3007}
3008
3009bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
3010 Subtarget = &MF.getSubtarget<R600Subtarget>();
3011 return SelectionDAGISel::runOnMachineFunction(MF);
3012}
3013
3014bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
3015 if (!N->readMem())
3016 return false;
3017 if (CbId == -1)
3018 return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
3019 N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
3020
3021 return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
3022}
3023
3024bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
3025 SDValue& IntPtr) {
3026 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
3027 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
3028 true);
3029 return true;
3030 }
3031 return false;
3032}
3033
3034bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
3035 SDValue& BaseReg, SDValue &Offset) {
3036 if (!isa<ConstantSDNode>(Addr)) {
3037 BaseReg = Addr;
3038 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
3039 return true;
3040 }
3041 return false;
3042}
3043
3044void R600DAGToDAGISel::Select(SDNode *N) {
3045 unsigned int Opc = N->getOpcode();
3046 if (N->isMachineOpcode()) {
3047 N->setNodeId(-1);
3048 return; // Already selected.
3049 }
3050
3051 switch (Opc) {
3052 default: break;
3053 case AMDGPUISD::BUILD_VERTICAL_VECTOR:
3054 case ISD::SCALAR_TO_VECTOR:
3055 case ISD::BUILD_VECTOR: {
3056 EVT VT = N->getValueType(0);
3057 unsigned NumVectorElts = VT.getVectorNumElements();
3058 unsigned RegClassID;
3059 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
3060 // that adds a 128 bits reg copy when going through TwoAddressInstructions
3061 // pass. We want to avoid 128 bits copies as much as possible because they
3062 // can't be bundled by our scheduler.
3063 switch(NumVectorElts) {
3064 case 2: RegClassID = R600::R600_Reg64RegClassID; break;
3065 case 4:
3066 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
3067 RegClassID = R600::R600_Reg128VerticalRegClassID;
3068 else
3069 RegClassID = R600::R600_Reg128RegClassID;
3070 break;
3071 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR")::llvm::llvm_unreachable_internal("Do not know how to lower this BUILD_VECTOR"
, "/build/llvm-toolchain-snapshot-12~++20201124111112+7b5254223ac/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp"
, 3071)
;
3072 }
3073 SelectBuildVector(N, RegClassID);
3074 return;
3075 }
3076 }
3077
3078 SelectCode(N);
3079}
3080
3081bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
3082 SDValue &Offset) {
3083 ConstantSDNode *C;
3084 SDLoc DL(Addr);
3085
3086 if ((C = dyn_cast<ConstantSDNode>(Addr))) {
3087 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
3088 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
3089 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
3090 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
3091 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
3092 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
3093 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
3094 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
3095 Base = Addr.getOperand(0);
3096 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
3097 } else {
3098 Base = Addr;
3099 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
3100 }
3101
3102 return true;
3103}
3104
3105bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
3106 SDValue &Offset) {
3107 ConstantSDNode *IMMOffset;
3108
3109 if (Addr.getOpcode() == ISD::ADD
3110 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
3111 && isInt<16>(IMMOffset->getZExtValue())) {
3112
3113 Base = Addr.getOperand(0);
3114 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
3115 MVT::i32);
3116 return true;
3117 // If the pointer address is constant, we can move it to the offset field.
3118 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
3119 && isInt<16>(IMMOffset->getZExtValue())) {
3120 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
3121 SDLoc(CurDAG->getEntryNode()),
3122 R600::ZERO, MVT::i32);
3123 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
3124 MVT::i32);
3125 return true;
3126 }
3127
3128 // Default case, no offset
3129 Base = Addr;
3130 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
3131 return true;
3132}