/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Bug Summary

File:	llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Warning:	line 1925, column 13 Value stored to 'RemainderOffset' during its initialization is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AMDGPUISelDAGToDAG.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/build-llvm/lib/Target/AMDGPU -resource-dir /usr/lib/llvm-13/lib/clang/13.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/build-llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/build-llvm/include -I /build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-13/lib/clang/13.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/build-llvm/lib/Target/AMDGPU -fdebug-prefix-map=/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2021-03-02-022427-27315-1 -x c++ /build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

1	//===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//==-----------------------------------------------------------------------===//
8	//
9	/// \file
10	/// Defines an instruction selector for the AMDGPU target.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "AMDGPU.h"
15	#include "AMDGPUTargetMachine.h"
16	#include "SIMachineFunctionInfo.h"
17	#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
18	#include "llvm/Analysis/ValueTracking.h"
19	#include "llvm/CodeGen/FunctionLoweringInfo.h"
20	#include "llvm/CodeGen/SelectionDAG.h"
21	#include "llvm/CodeGen/SelectionDAGISel.h"
22	#include "llvm/CodeGen/SelectionDAGNodes.h"
23	#include "llvm/IR/IntrinsicsAMDGPU.h"
24	#include "llvm/InitializePasses.h"
25
26	#ifdef EXPENSIVE_CHECKS
27	#include "llvm/Analysis/LoopInfo.h"
28	#include "llvm/IR/Dominators.h"
29	#endif
30
31	#define DEBUG_TYPE"isel" "isel"
32
33	using namespace llvm;
34
35	namespace llvm {
36
37	class R600InstrInfo;
38
39	} // end namespace llvm
40
41	//===----------------------------------------------------------------------===//
42	// Instruction Selector Implementation
43	//===----------------------------------------------------------------------===//
44
45	namespace {
46
47	static bool isNullConstantOrUndef(SDValue V) {
48	if (V.isUndef())
49	return true;
50
51	ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
52	return Const != nullptr && Const->isNullValue();
53	}
54
55	static bool getConstantValue(SDValue N, uint32_t &Out) {
56	// This is only used for packed vectors, where ussing 0 for undef should
57	// always be good.
58	if (N.isUndef()) {
59	Out = 0;
60	return true;
61	}
62
63	if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
64	Out = C->getAPIntValue().getSExtValue();
65	return true;
66	}
67
68	if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
69	Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
70	return true;
71	}
72
73	return false;
74	}
75
76	// TODO: Handle undef as zero
77	static SDNode packConstantV2I16(const SDNode N, SelectionDAG &DAG,
78	bool Negate = false) {
79	assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2)((N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands () == 2) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp" , 79, __PRETTY_FUNCTION__));
80	uint32_t LHSVal, RHSVal;
81	if (getConstantValue(N->getOperand(0), LHSVal) &&
82	getConstantValue(N->getOperand(1), RHSVal)) {
83	SDLoc SL(N);
84	uint32_t K = Negate ?
85	(-LHSVal & 0xffff) \| (-RHSVal << 16) :
86	(LHSVal & 0xffff) \| (RHSVal << 16);
87	return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
88	DAG.getTargetConstant(K, SL, MVT::i32));
89	}
90
91	return nullptr;
92	}
93
94	static SDNode packNegConstantV2I16(const SDNode N, SelectionDAG &DAG) {
95	return packConstantV2I16(N, DAG, true);
96	}
97
98	/// AMDGPU specific code to select AMDGPU machine instructions for
99	/// SelectionDAG operations.
100	class AMDGPUDAGToDAGISel : public SelectionDAGISel {
101	// Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
102	// make the right decision when generating code for different targets.
103	const GCNSubtarget *Subtarget;
104
105	// Default FP mode for the current function.
106	AMDGPU::SIModeRegisterDefaults Mode;
107
108	bool EnableLateStructurizeCFG;
109
110	public:
111	explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
112	CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
113	: SelectionDAGISel(*TM, OptLevel) {
114	EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
115	}
116	~AMDGPUDAGToDAGISel() override = default;
117
118	void getAnalysisUsage(AnalysisUsage &AU) const override {
119	AU.addRequired<AMDGPUArgumentUsageInfo>();
120	AU.addRequired<LegacyDivergenceAnalysis>();
121	#ifdef EXPENSIVE_CHECKS
122	AU.addRequired<DominatorTreeWrapperPass>();
123	AU.addRequired<LoopInfoWrapperPass>();
124	#endif
125	SelectionDAGISel::getAnalysisUsage(AU);
126	}
127
128	bool matchLoadD16FromBuildVector(SDNode *N) const;
129
130	bool runOnMachineFunction(MachineFunction &MF) override;
131	void PreprocessISelDAG() override;
132	void Select(SDNode *N) override;
133	StringRef getPassName() const override;
134	void PostprocessISelDAG() override;
135
136	protected:
137	void SelectBuildVector(SDNode *N, unsigned RegClassID);
138
139	private:
140	std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
141	bool isNoNanSrc(SDValue N) const;
142	bool isInlineImmediate(const SDNode *N, bool Negated = false) const;
143	bool isNegInlineImmediate(const SDNode *N) const {
144	return isInlineImmediate(N, true);
145	}
146
147	bool isInlineImmediate16(int64_t Imm) const {
148	return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm());
149	}
150
151	bool isInlineImmediate32(int64_t Imm) const {
152	return AMDGPU::isInlinableLiteral32(Imm, Subtarget->hasInv2PiInlineImm());
153	}
154
155	bool isInlineImmediate64(int64_t Imm) const {
156	return AMDGPU::isInlinableLiteral64(Imm, Subtarget->hasInv2PiInlineImm());
157	}
158
159	bool isInlineImmediate(const APFloat &Imm) const {
160	return Subtarget->getInstrInfo()->isInlineConstant(Imm);
161	}
162
163	bool isVGPRImm(const SDNode *N) const;
164	bool isUniformLoad(const SDNode *N) const;
165	bool isUniformBr(const SDNode *N) const;
166
167	bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
168	SDValue &RHS) const;
169
170	MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
171
172	SDNode glueCopyToOp(SDNode N, SDValue NewChain, SDValue Glue) const;
173	SDNode glueCopyToM0(SDNode N, SDValue Val) const;
174	SDNode glueCopyToM0LDSInit(SDNode N) const;
175
176	const TargetRegisterClass getOperandRegClass(SDNode N, unsigned OpNo) const;
177	virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
178	virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
179	bool isDSOffsetLegal(SDValue Base, unsigned Offset) const;
180	bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1,
181	unsigned Size) const;
182	bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
183	bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
184	SDValue &Offset1) const;
185	bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
186	SDValue &Offset1) const;
187	bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0,
188	SDValue &Offset1, unsigned Size) const;
189	bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
190	SDValue &SOffset, SDValue &Offset, SDValue &Offen,
191	SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
192	SDValue &TFE, SDValue &DLC, SDValue &SWZ,
193	SDValue &SCCB) const;
194	bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
195	SDValue &SOffset, SDValue &Offset, SDValue &GLC,
196	SDValue &SLC, SDValue &TFE, SDValue &DLC,
197	SDValue &SWZ, SDValue &SCCB) const;
198	bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
199	SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
200	SDValue &SLC) const;
201	bool SelectMUBUFScratchOffen(SDNode *Parent,
202	SDValue Addr, SDValue &RSrc, SDValue &VAddr,
203	SDValue &SOffset, SDValue &ImmOffset) const;
204	bool SelectMUBUFScratchOffset(SDNode *Parent,
205	SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
206	SDValue &Offset) const;
207
208	bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
209	SDValue &Offset, SDValue &GLC, SDValue &SLC,
210	SDValue &TFE, SDValue &DLC, SDValue &SWZ,
211	SDValue &SCCB) const;
212	bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
213	SDValue &Offset, SDValue &SLC) const;
214	bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
215	SDValue &Offset) const;
216
217	template <bool IsSigned>
218	bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
219	SDValue &Offset) const;
220	bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
221	SDValue &VOffset, SDValue &Offset) const;
222	bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
223	SDValue &Offset) const;
224
225	bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
226	bool &Imm) const;
227	SDValue Expand32BitAddress(SDValue Addr) const;
228	bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
229	bool &Imm) const;
230	bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
231	bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
232	bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
233	bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
234	bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
235	bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
236
237	bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
238	bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,
239	bool AllowAbs = true) const;
240	bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
241	bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
242	bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
243	bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
244	SDValue &Clamp, SDValue &Omod) const;
245	bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
246	SDValue &Clamp, SDValue &Omod) const;
247	bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
248	SDValue &Clamp, SDValue &Omod) const;
249
250	bool SelectVOP3OMods(SDValue In, SDValue &Src,
251	SDValue &Clamp, SDValue &Omod) const;
252
253	bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
254
255	bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
256
257	bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
258	bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
259	bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
260
261	SDValue getHi16Elt(SDValue In) const;
262
263	SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const;
264
265	void SelectADD_SUB_I64(SDNode *N);
266	void SelectAddcSubb(SDNode *N);
267	void SelectUADDO_USUBO(SDNode *N);
268	void SelectDIV_SCALE(SDNode *N);
269	void SelectMAD_64_32(SDNode *N);
270	void SelectFMA_W_CHAIN(SDNode *N);
271	void SelectFMUL_W_CHAIN(SDNode *N);
272
273	SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
274	uint32_t Offset, uint32_t Width);
275	void SelectS_BFEFromShifts(SDNode *N);
276	void SelectS_BFE(SDNode *N);
277	bool isCBranchSCC(const SDNode *N) const;
278	void SelectBRCOND(SDNode *N);
279	void SelectFMAD_FMA(SDNode *N);
280	void SelectATOMIC_CMP_SWAP(SDNode *N);
281	void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
282	void SelectDS_GWS(SDNode *N, unsigned IntrID);
283	void SelectInterpP1F16(SDNode *N);
284	void SelectINTRINSIC_W_CHAIN(SDNode *N);
285	void SelectINTRINSIC_WO_CHAIN(SDNode *N);
286	void SelectINTRINSIC_VOID(SDNode *N);
287
288	protected:
289	// Include the pieces autogenerated from the target description.
290	#include "AMDGPUGenDAGISel.inc"
291	};
292
293	class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
294	const R600Subtarget *Subtarget;
295
296	bool isConstantLoad(const MemSDNode *N, int cbID) const;
297	bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
298	bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
299	SDValue& Offset);
300	public:
301	explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
302	AMDGPUDAGToDAGISel(TM, OptLevel) {}
303
304	void Select(SDNode *N) override;
305
306	bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
307	SDValue &Offset) override;
308	bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
309	SDValue &Offset) override;
310
311	bool runOnMachineFunction(MachineFunction &MF) override;
312
313	void PreprocessISelDAG() override {}
314
315	protected:
316	// Include the pieces autogenerated from the target description.
317	#include "R600GenDAGISel.inc"
318	};
319
320	static SDValue stripBitcast(SDValue Val) {
321	return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
322	}
323
324	// Figure out if this is really an extract of the high 16-bits of a dword.
325	static bool isExtractHiElt(SDValue In, SDValue &Out) {
326	In = stripBitcast(In);
327
328	if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
329	if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
330	if (!Idx->isOne())
331	return false;
332	Out = In.getOperand(0);
333	return true;
334	}
335	}
336
337	if (In.getOpcode() != ISD::TRUNCATE)
338	return false;
339
340	SDValue Srl = In.getOperand(0);
341	if (Srl.getOpcode() == ISD::SRL) {
342	if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
343	if (ShiftAmt->getZExtValue() == 16) {
344	Out = stripBitcast(Srl.getOperand(0));
345	return true;
346	}
347	}
348	}
349
350	return false;
351	}
352
353	// Look through operations that obscure just looking at the low 16-bits of the
354	// same register.
355	static SDValue stripExtractLoElt(SDValue In) {
356	if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
357	if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
358	if (Idx->isNullValue() && In.getValueSizeInBits() <= 32)
359	return In.getOperand(0);
360	}
361	}
362
363	if (In.getOpcode() == ISD::TRUNCATE) {
364	SDValue Src = In.getOperand(0);
365	if (Src.getValueType().getSizeInBits() == 32)
366	return stripBitcast(Src);
367	}
368
369	return In;
370	}
371
372	} // end anonymous namespace
373
374	INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",static void *initializeAMDGPUDAGToDAGISelPassOnce(PassRegistry &Registry) {
375	"AMDGPU DAG->DAG Pattern Instruction Selection", false, false)static void *initializeAMDGPUDAGToDAGISelPassOnce(PassRegistry &Registry) {
376	INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)initializeAMDGPUArgumentUsageInfoPass(Registry);
377	INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)initializeAMDGPUPerfHintAnalysisPass(Registry);
378	INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)initializeLegacyDivergenceAnalysisPass(Registry);
379	#ifdef EXPENSIVE_CHECKS
380	INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
381	INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry);
382	#endif
383	INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",PassInfo PI = new PassInfo( "AMDGPU DAG->DAG Pattern Instruction Selection" , "amdgpu-isel", &AMDGPUDAGToDAGISel::ID, PassInfo::NormalCtor_t (callDefaultCtor<AMDGPUDAGToDAGISel>), false, false); Registry .registerPass(PI, true); return PI; } static llvm::once_flag InitializeAMDGPUDAGToDAGISelPassFlag; void llvm::initializeAMDGPUDAGToDAGISelPass (PassRegistry &Registry) { llvm::call_once(InitializeAMDGPUDAGToDAGISelPassFlag , initializeAMDGPUDAGToDAGISelPassOnce, std::ref(Registry)); }
384	"AMDGPU DAG->DAG Pattern Instruction Selection", false, false)PassInfo PI = new PassInfo( "AMDGPU DAG->DAG Pattern Instruction Selection" , "amdgpu-isel", &AMDGPUDAGToDAGISel::ID, PassInfo::NormalCtor_t (callDefaultCtor<AMDGPUDAGToDAGISel>), false, false); Registry .registerPass(PI, true); return PI; } static llvm::once_flag InitializeAMDGPUDAGToDAGISelPassFlag; void llvm::initializeAMDGPUDAGToDAGISelPass (PassRegistry &Registry) { llvm::call_once(InitializeAMDGPUDAGToDAGISelPassFlag , initializeAMDGPUDAGToDAGISelPassOnce, std::ref(Registry)); }
385
386	/// This pass converts a legalized DAG into a AMDGPU-specific
387	// DAG, ready for instruction scheduling.
388	FunctionPass llvm::createAMDGPUISelDag(TargetMachine TM,
389	CodeGenOpt::Level OptLevel) {
390	return new AMDGPUDAGToDAGISel(TM, OptLevel);
391	}
392
393	/// This pass converts a legalized DAG into a R600-specific
394	// DAG, ready for instruction scheduling.
395	FunctionPass llvm::createR600ISelDag(TargetMachine TM,
396	CodeGenOpt::Level OptLevel) {
397	return new R600DAGToDAGISel(TM, OptLevel);
398	}
399
400	bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
401	#ifdef EXPENSIVE_CHECKS
402	DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
403	LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
404	for (auto &L : LI->getLoopsInPreorder()) {
405	assert(L->isLCSSAForm(DT))((L->isLCSSAForm(DT)) ? static_cast<void> (0) : __assert_fail ("L->isLCSSAForm(DT)", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp" , 405, __PRETTY_FUNCTION__));
406	}
407	#endif
408	Subtarget = &MF.getSubtarget<GCNSubtarget>();
409	Mode = AMDGPU::SIModeRegisterDefaults(MF.getFunction());
410	return SelectionDAGISel::runOnMachineFunction(MF);
411	}
412
413	bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const {
414	assert(Subtarget->d16PreservesUnusedBits())((Subtarget->d16PreservesUnusedBits()) ? static_cast<void > (0) : __assert_fail ("Subtarget->d16PreservesUnusedBits()" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp" , 414, __PRETTY_FUNCTION__));
415	MVT VT = N->getValueType(0).getSimpleVT();
416	if (VT != MVT::v2i16 && VT != MVT::v2f16)
417	return false;
418
419	SDValue Lo = N->getOperand(0);
420	SDValue Hi = N->getOperand(1);
421
422	LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi));
423
424	// build_vector lo, (load ptr) -> load_d16_hi ptr, lo
425	// build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo
426	// build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo
427
428	// Need to check for possible indirect dependencies on the other half of the
429	// vector to avoid introducing a cycle.
430	if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) {
431	SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
432
433	SDValue TiedIn = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Lo);
434	SDValue Ops[] = {
435	LdHi->getChain(), LdHi->getBasePtr(), TiedIn
436	};
437
438	unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
439	if (LdHi->getMemoryVT() == MVT::i8) {
440	LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?
441	AMDGPUISD::LOAD_D16_HI_I8 : AMDGPUISD::LOAD_D16_HI_U8;
442	} else {
443	assert(LdHi->getMemoryVT() == MVT::i16)((LdHi->getMemoryVT() == MVT::i16) ? static_cast<void> (0) : __assert_fail ("LdHi->getMemoryVT() == MVT::i16", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp" , 443, __PRETTY_FUNCTION__));
444	}
445
446	SDValue NewLoadHi =
447	CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
448	Ops, LdHi->getMemoryVT(),
449	LdHi->getMemOperand());
450
451	CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
452	CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));
453	return true;
454	}
455
456	// build_vector (load ptr), hi -> load_d16_lo ptr, hi
457	// build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi
458	// build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi
459	LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo));
460	if (LdLo && Lo.hasOneUse()) {
461	SDValue TiedIn = getHi16Elt(Hi);
462	if (!TiedIn \|\| LdLo->isPredecessorOf(TiedIn.getNode()))
463	return false;
464
465	SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
466	unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
467	if (LdLo->getMemoryVT() == MVT::i8) {
468	LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?
469	AMDGPUISD::LOAD_D16_LO_I8 : AMDGPUISD::LOAD_D16_LO_U8;
470	} else {
471	assert(LdLo->getMemoryVT() == MVT::i16)((LdLo->getMemoryVT() == MVT::i16) ? static_cast<void> (0) : __assert_fail ("LdLo->getMemoryVT() == MVT::i16", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp" , 471, __PRETTY_FUNCTION__));
472	}
473
474	TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
475
476	SDValue Ops[] = {
477	LdLo->getChain(), LdLo->getBasePtr(), TiedIn
478	};
479
480	SDValue NewLoadLo =
481	CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
482	Ops, LdLo->getMemoryVT(),
483	LdLo->getMemOperand());
484
485	CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
486	CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));
487	return true;
488	}
489
490	return false;
491	}
492
493	void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
494	if (!Subtarget->d16PreservesUnusedBits())
495	return;
496
497	SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
498
499	bool MadeChange = false;
500	while (Position != CurDAG->allnodes_begin()) {
501	SDNode N = &--Position;
502	if (N->use_empty())
503	continue;
504
505	switch (N->getOpcode()) {
506	case ISD::BUILD_VECTOR:
507	MadeChange \|= matchLoadD16FromBuildVector(N);
508	break;
509	default:
510	break;
511	}
512	}
513
514	if (MadeChange) {
515	CurDAG->RemoveDeadNodes();
516	LLVM_DEBUG(dbgs() << "After PreProcess:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("isel")) { dbgs() << "After PreProcess:\n"; CurDAG-> dump();; } } while (false)
517	CurDAG->dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("isel")) { dbgs() << "After PreProcess:\n"; CurDAG-> dump();; } } while (false);
518	}
519	}
520
521	bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
522	if (TM.Options.NoNaNsFPMath)
523	return true;
524
525	// TODO: Move into isKnownNeverNaN
526	if (N->getFlags().hasNoNaNs())
527	return true;
528
529	return CurDAG->isKnownNeverNaN(N);
530	}
531
532	bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N,
533	bool Negated) const {
534	if (N->isUndef())
535	return true;
536
537	const SIInstrInfo *TII = Subtarget->getInstrInfo();
538	if (Negated) {
539	if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
540	return TII->isInlineConstant(-C->getAPIntValue());
541
542	if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
543	return TII->isInlineConstant(-C->getValueAPF().bitcastToAPInt());
544
545	} else {
546	if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
547	return TII->isInlineConstant(C->getAPIntValue());
548
549	if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
550	return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
551	}
552
553	return false;
554	}
555
556	/// Determine the register class for \p OpNo
557	/// \returns The register class of the virtual register that will be used for
558	/// the given operand number \OpNo or NULL if the register class cannot be
559	/// determined.
560	const TargetRegisterClass AMDGPUDAGToDAGISel::getOperandRegClass(SDNode N,
561	unsigned OpNo) const {
562	if (!N->isMachineOpcode()) {
563	if (N->getOpcode() == ISD::CopyToReg) {
564	Register Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
565	if (Reg.isVirtual()) {
566	MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
567	return MRI.getRegClass(Reg);
568	}
569
570	const SIRegisterInfo *TRI
571	= static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
572	return TRI->getPhysRegClass(Reg);
573	}
574
575	return nullptr;
576	}
577
578	switch (N->getMachineOpcode()) {
579	default: {
580	const MCInstrDesc &Desc =
581	Subtarget->getInstrInfo()->get(N->getMachineOpcode());
582	unsigned OpIdx = Desc.getNumDefs() + OpNo;
583	if (OpIdx >= Desc.getNumOperands())
584	return nullptr;
585	int RegClass = Desc.OpInfo[OpIdx].RegClass;
586	if (RegClass == -1)
587	return nullptr;
588
589	return Subtarget->getRegisterInfo()->getRegClass(RegClass);
590	}
591	case AMDGPU::REG_SEQUENCE: {
592	unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
593	const TargetRegisterClass *SuperRC =
594	Subtarget->getRegisterInfo()->getRegClass(RCID);
595
596	SDValue SubRegOp = N->getOperand(OpNo + 1);
597	unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
598	return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
599	SubRegIdx);
600	}
601	}
602	}
603
604	SDNode AMDGPUDAGToDAGISel::glueCopyToOp(SDNode N, SDValue NewChain,
605	SDValue Glue) const {
606	SmallVector <SDValue, 8> Ops;
607	Ops.push_back(NewChain); // Replace the chain.
608	for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
609	Ops.push_back(N->getOperand(i));
610
611	Ops.push_back(Glue);
612	return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
613	}
614
615	SDNode AMDGPUDAGToDAGISel::glueCopyToM0(SDNode N, SDValue Val) const {
616	const SITargetLowering& Lowering =
617	static_cast<const SITargetLowering>(getTargetLowering());
618
619	assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain")((N->getOperand(0).getValueType() == MVT::Other && "Expected chain") ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == MVT::Other && \"Expected chain\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp" , 619, __PRETTY_FUNCTION__));
620
621	SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), Val);
622	return glueCopyToOp(N, M0, M0.getValue(1));
623	}
624
625	SDNode AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode N) const {
626	unsigned AS = cast<MemSDNode>(N)->getAddressSpace();
627	if (AS == AMDGPUAS::LOCAL_ADDRESS) {
628	if (Subtarget->ldsRequiresM0Init())
629	return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
630	} else if (AS == AMDGPUAS::REGION_ADDRESS) {
631	MachineFunction &MF = CurDAG->getMachineFunction();
632	unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
633	return
634	glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32));
635	}
636	return N;
637	}
638
639	MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
640	EVT VT) const {
641	SDNode *Lo = CurDAG->getMachineNode(
642	AMDGPU::S_MOV_B32, DL, MVT::i32,
643	CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
644	SDNode *Hi =
645	CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
646	CurDAG->getTargetConstant(Imm >> 32, DL, MVT::i32));
647	const SDValue Ops[] = {
648	CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
649	SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
650	SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
651
652	return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
653	}
654
655	void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
656	EVT VT = N->getValueType(0);
657	unsigned NumVectorElts = VT.getVectorNumElements();
658	EVT EltVT = VT.getVectorElementType();
659	SDLoc DL(N);
660	SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
661
662	if (NumVectorElts == 1) {
663	CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
664	RegClass);
665	return;
666	}
667
668	assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "((NumVectorElts <= 32 && "Vectors with more than 32 elements not " "supported yet") ? static_cast<void> (0) : __assert_fail ("NumVectorElts <= 32 && \"Vectors with more than 32 elements not \" \"supported yet\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp" , 669, __PRETTY_FUNCTION__))
669	"supported yet")((NumVectorElts <= 32 && "Vectors with more than 32 elements not " "supported yet") ? static_cast<void> (0) : __assert_fail ("NumVectorElts <= 32 && \"Vectors with more than 32 elements not \" \"supported yet\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp" , 669, __PRETTY_FUNCTION__));
670	// 32 = Max Num Vector Elements
671	// 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
672	// 1 = Vector Register Class
673	SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
674
675	bool IsGCN = CurDAG->getSubtarget().getTargetTriple().getArch() ==
676	Triple::amdgcn;
677	RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
678	bool IsRegSeq = true;
679	unsigned NOps = N->getNumOperands();
680	for (unsigned i = 0; i < NOps; i++) {
681	// XXX: Why is this here?
682	if (isa<RegisterSDNode>(N->getOperand(i))) {
683	IsRegSeq = false;
684	break;
685	}
686	unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
687	: R600RegisterInfo::getSubRegFromChannel(i);
688	RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
689	RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
690	}
691	if (NOps != NumVectorElts) {
692	// Fill in the missing undef elements if this was a scalar_to_vector.
693	assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts)((N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp" , 693, __PRETTY_FUNCTION__));
694	MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
695	DL, EltVT);
696	for (unsigned i = NOps; i < NumVectorElts; ++i) {
697	unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
698	: R600RegisterInfo::getSubRegFromChannel(i);
699	RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
700	RegSeqArgs[1 + (2 * i) + 1] =
701	CurDAG->getTargetConstant(Sub, DL, MVT::i32);
702	}
703	}
704
705	if (!IsRegSeq)
706	SelectCode(N);
707	CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
708	}
709
710	void AMDGPUDAGToDAGISel::Select(SDNode *N) {
711	unsigned int Opc = N->getOpcode();
712	if (N->isMachineOpcode()) {
713	N->setNodeId(-1);
714	return; // Already selected.
715	}
716
717	// isa<MemSDNode> almost works but is slightly too permissive for some DS
718	// intrinsics.
719	if (Opc == ISD::LOAD \|\| Opc == ISD::STORE \|\| isa<AtomicSDNode>(N) \|\|
720	(Opc == AMDGPUISD::ATOMIC_INC \|\| Opc == AMDGPUISD::ATOMIC_DEC \|\|
721	Opc == ISD::ATOMIC_LOAD_FADD \|\|
722	Opc == AMDGPUISD::ATOMIC_LOAD_FMIN \|\|
723	Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) {
724	N = glueCopyToM0LDSInit(N);
725	SelectCode(N);
726	return;
727	}
728
729	switch (Opc) {
730	default:
731	break;
732	// We are selecting i64 ADD here instead of custom lower it during
733	// DAG legalization, so we can fold some i64 ADDs used for address
734	// calculation into the LOAD and STORE instructions.
735	case ISD::ADDC:
736	case ISD::ADDE:
737	case ISD::SUBC:
738	case ISD::SUBE: {
739	if (N->getValueType(0) != MVT::i64)
740	break;
741
742	SelectADD_SUB_I64(N);
743	return;
744	}
745	case ISD::ADDCARRY:
746	case ISD::SUBCARRY:
747	if (N->getValueType(0) != MVT::i32)
748	break;
749
750	SelectAddcSubb(N);
751	return;
752	case ISD::UADDO:
753	case ISD::USUBO: {
754	SelectUADDO_USUBO(N);
755	return;
756	}
757	case AMDGPUISD::FMUL_W_CHAIN: {
758	SelectFMUL_W_CHAIN(N);
759	return;
760	}
761	case AMDGPUISD::FMA_W_CHAIN: {
762	SelectFMA_W_CHAIN(N);
763	return;
764	}
765
766	case ISD::SCALAR_TO_VECTOR:
767	case ISD::BUILD_VECTOR: {
768	EVT VT = N->getValueType(0);
769	unsigned NumVectorElts = VT.getVectorNumElements();
770	if (VT.getScalarSizeInBits() == 16) {
771	if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
772	if (SDNode Packed = packConstantV2I16(N, CurDAG)) {
773	ReplaceNode(N, Packed);
774	return;
775	}
776	}
777
778	break;
779	}
780
781	assert(VT.getVectorElementType().bitsEq(MVT::i32))((VT.getVectorElementType().bitsEq(MVT::i32)) ? static_cast< void> (0) : __assert_fail ("VT.getVectorElementType().bitsEq(MVT::i32)" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp" , 781, __PRETTY_FUNCTION__));
782	unsigned RegClassID =
783	SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID();
784	SelectBuildVector(N, RegClassID);
785	return;
786	}
787	case ISD::BUILD_PAIR: {
788	SDValue RC, SubReg0, SubReg1;
789	SDLoc DL(N);
790	if (N->getValueType(0) == MVT::i128) {
791	RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32);
792	SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
793	SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
794	} else if (N->getValueType(0) == MVT::i64) {
795	RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
796	SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
797	SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
798	} else {
799	llvm_unreachable("Unhandled value type for BUILD_PAIR")::llvm::llvm_unreachable_internal("Unhandled value type for BUILD_PAIR" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp" , 799);
800	}
801	const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
802	N->getOperand(1), SubReg1 };
803	ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
804	N->getValueType(0), Ops));
805	return;
806	}
807
808	case ISD::Constant:
809	case ISD::ConstantFP: {
810	if (N->getValueType(0).getSizeInBits() != 64 \|\| isInlineImmediate(N))
811	break;
812
813	uint64_t Imm;
814	if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
815	Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
816	else {
817	ConstantSDNode *C = cast<ConstantSDNode>(N);
818	Imm = C->getZExtValue();
819	}
820
821	SDLoc DL(N);
822	ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
823	return;
824	}
825	case AMDGPUISD::BFE_I32:
826	case AMDGPUISD::BFE_U32: {
827	// There is a scalar version available, but unlike the vector version which
828	// has a separate operand for the offset and width, the scalar version packs
829	// the width and offset into a single operand. Try to move to the scalar
830	// version if the offsets are constant, so that we can try to keep extended
831	// loads of kernel arguments in SGPRs.
832
833	// TODO: Technically we could try to pattern match scalar bitshifts of
834	// dynamic values, but it's probably not useful.
835	ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
836	if (!Offset)
837	break;
838
839	ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
840	if (!Width)
841	break;
842
843	bool Signed = Opc == AMDGPUISD::BFE_I32;
844
845	uint32_t OffsetVal = Offset->getZExtValue();
846	uint32_t WidthVal = Width->getZExtValue();
847
848	ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
849	SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
850	return;
851	}
852	case AMDGPUISD::DIV_SCALE: {
853	SelectDIV_SCALE(N);
854	return;
855	}
856	case AMDGPUISD::MAD_I64_I32:
857	case AMDGPUISD::MAD_U64_U32: {
858	SelectMAD_64_32(N);
859	return;
860	}
861	case ISD::CopyToReg: {
862	const SITargetLowering& Lowering =
863	static_cast<const SITargetLowering>(getTargetLowering());
864	N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
865	break;
866	}
867	case ISD::AND:
868	case ISD::SRL:
869	case ISD::SRA:
870	case ISD::SIGN_EXTEND_INREG:
871	if (N->getValueType(0) != MVT::i32)
872	break;
873
874	SelectS_BFE(N);
875	return;
876	case ISD::BRCOND:
877	SelectBRCOND(N);
878	return;
879	case ISD::FMAD:
880	case ISD::FMA:
881	SelectFMAD_FMA(N);
882	return;
883	case AMDGPUISD::ATOMIC_CMP_SWAP:
884	SelectATOMIC_CMP_SWAP(N);
885	return;
886	case AMDGPUISD::CVT_PKRTZ_F16_F32:
887	case AMDGPUISD::CVT_PKNORM_I16_F32:
888	case AMDGPUISD::CVT_PKNORM_U16_F32:
889	case AMDGPUISD::CVT_PK_U16_U32:
890	case AMDGPUISD::CVT_PK_I16_I32: {
891	// Hack around using a legal type if f16 is illegal.
892	if (N->getValueType(0) == MVT::i32) {
893	MVT NewVT = Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;
894	N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
895	{ N->getOperand(0), N->getOperand(1) });
896	SelectCode(N);
897	return;
898	}
899
900	break;
901	}
902	case ISD::INTRINSIC_W_CHAIN: {
903	SelectINTRINSIC_W_CHAIN(N);
904	return;
905	}
906	case ISD::INTRINSIC_WO_CHAIN: {
907	SelectINTRINSIC_WO_CHAIN(N);
908	return;
909	}
910	case ISD::INTRINSIC_VOID: {
911	SelectINTRINSIC_VOID(N);
912	return;
913	}
914	}
915
916	SelectCode(N);
917	}
918
919	bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
920	const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
921	const Instruction *Term = BB->getTerminator();
922	return Term->getMetadata("amdgpu.uniform") \|\|
923	Term->getMetadata("structurizecfg.uniform");
924	}
925
926	static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr,
927	SDValue &N0, SDValue &N1) {
928	if (Addr.getValueType() == MVT::i64 && Addr.getOpcode() == ISD::BITCAST &&
929	Addr.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
930	// As we split 64-bit `or` earlier, it's complicated pattern to match, i.e.
931	// (i64 (bitcast (v2i32 (build_vector
932	// (or (extract_vector_elt V, 0), OFFSET),
933	// (extract_vector_elt V, 1)))))
934	SDValue Lo = Addr.getOperand(0).getOperand(0);
935	if (Lo.getOpcode() == ISD::OR && DAG.isBaseWithConstantOffset(Lo)) {
936	SDValue BaseLo = Lo.getOperand(0);
937	SDValue BaseHi = Addr.getOperand(0).getOperand(1);
938	// Check that split base (Lo and Hi) are extracted from the same one.
939	if (BaseLo.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
940	BaseHi.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
941	BaseLo.getOperand(0) == BaseHi.getOperand(0) &&
942	// Lo is statically extracted from index 0.
943	isa<ConstantSDNode>(BaseLo.getOperand(1)) &&
944	BaseLo.getConstantOperandVal(1) == 0 &&
945	// Hi is statically extracted from index 0.
946	isa<ConstantSDNode>(BaseHi.getOperand(1)) &&
947	BaseHi.getConstantOperandVal(1) == 1) {
948	N0 = BaseLo.getOperand(0).getOperand(0);
949	N1 = Lo.getOperand(1);
950	return true;
951	}
952	}
953	}
954	return false;
955	}
956
957	bool AMDGPUDAGToDAGISel::isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
958	SDValue &RHS) const {
959	if (CurDAG->isBaseWithConstantOffset(Addr)) {
960	LHS = Addr.getOperand(0);
961	RHS = Addr.getOperand(1);
962	return true;
963	}
964
965	if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, LHS, RHS)) {
966	assert(LHS && RHS && isa<ConstantSDNode>(RHS))((LHS && RHS && isa<ConstantSDNode>(RHS )) ? static_cast<void> (0) : __assert_fail ("LHS && RHS && isa<ConstantSDNode>(RHS)" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp" , 966, __PRETTY_FUNCTION__));
967	return true;
968	}
969
970	return false;
971	}
972
973	StringRef AMDGPUDAGToDAGISel::getPassName() const {
974	return "AMDGPU DAG->DAG Pattern Instruction Selection";
975	}
976
977	//===----------------------------------------------------------------------===//
978	// Complex Patterns
979	//===----------------------------------------------------------------------===//
980
981	bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
982	SDValue &Offset) {
983	return false;
984	}
985
986	bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
987	SDValue &Offset) {
988	ConstantSDNode *C;
989	SDLoc DL(Addr);
990
991	if ((C = dyn_cast<ConstantSDNode>(Addr))) {
992	Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
993	Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
994	} else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
995	(C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
996	Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
997	Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
998	} else if ((Addr.getOpcode() == ISD::ADD \|\| Addr.getOpcode() == ISD::OR) &&
999	(C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
1000	Base = Addr.getOperand(0);
1001	Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
1002	} else {
1003	Base = Addr;
1004	Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1005	}
1006
1007	return true;
1008	}
1009
1010	SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
1011	const SDLoc &DL) const {
1012	SDNode *Mov = CurDAG->getMachineNode(
1013	AMDGPU::S_MOV_B32, DL, MVT::i32,
1014	CurDAG->getTargetConstant(Val, DL, MVT::i32));
1015	return SDValue(Mov, 0);
1016	}
1017
1018	// FIXME: Should only handle addcarry/subcarry
1019	void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
1020	SDLoc DL(N);
1021	SDValue LHS = N->getOperand(0);
1022	SDValue RHS = N->getOperand(1);
1023
1024	unsigned Opcode = N->getOpcode();
1025	bool ConsumeCarry = (Opcode == ISD::ADDE \|\| Opcode == ISD::SUBE);
1026	bool ProduceCarry =
1027	ConsumeCarry \|\| Opcode == ISD::ADDC \|\| Opcode == ISD::SUBC;
1028	bool IsAdd = Opcode == ISD::ADD \|\| Opcode == ISD::ADDC \|\| Opcode == ISD::ADDE;
1029
1030	SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
1031	SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
1032
1033	SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1034	DL, MVT::i32, LHS, Sub0);
1035	SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1036	DL, MVT::i32, LHS, Sub1);
1037
1038	SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1039	DL, MVT::i32, RHS, Sub0);
1040	SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1041	DL, MVT::i32, RHS, Sub1);
1042
1043	SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
1044
1045	static const unsigned OpcMap[2][2][2] = {
1046	{{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
1047	{AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
1048	{{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
1049	{AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
1050
1051	unsigned Opc = OpcMap[0][N->isDivergent()][IsAdd];
1052	unsigned CarryOpc = OpcMap[1][N->isDivergent()][IsAdd];
1053
1054	SDNode *AddLo;
1055	if (!ConsumeCarry) {
1056	SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
1057	AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
1058	} else {
1059	SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
1060	AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
1061	}
1062	SDValue AddHiArgs[] = {
1063	SDValue(Hi0, 0),
1064	SDValue(Hi1, 0),
1065	SDValue(AddLo, 1)
1066	};
1067	SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
1068
1069	SDValue RegSequenceArgs[] = {
1070	CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
1071	SDValue(AddLo,0),
1072	Sub0,
1073	SDValue(AddHi,0),
1074	Sub1,
1075	};
1076	SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1077	MVT::i64, RegSequenceArgs);
1078
1079	if (ProduceCarry) {
1080	// Replace the carry-use
1081	ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
1082	}
1083
1084	// Replace the remaining uses.
1085	ReplaceNode(N, RegSequence);
1086	}
1087
1088	void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {
1089	SDLoc DL(N);
1090	SDValue LHS = N->getOperand(0);
1091	SDValue RHS = N->getOperand(1);
1092	SDValue CI = N->getOperand(2);
1093
1094	if (N->isDivergent()) {
1095	unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64
1096	: AMDGPU::V_SUBB_U32_e64;
1097	CurDAG->SelectNodeTo(
1098	N, Opc, N->getVTList(),
1099	{LHS, RHS, CI,
1100	CurDAG->getTargetConstant(0, {}, MVT::i1) /clamp bit/});
1101	} else {
1102	unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::S_ADD_CO_PSEUDO
1103	: AMDGPU::S_SUB_CO_PSEUDO;
1104	CurDAG->SelectNodeTo(N, Opc, N->getVTList(), {LHS, RHS, CI});
1105	}
1106	}
1107
1108	void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
1109	// The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
1110	// carry out despite the _i32 name. These were renamed in VI to _U32.
1111	// FIXME: We should probably rename the opcodes here.
1112	bool IsAdd = N->getOpcode() == ISD::UADDO;
1113	bool IsVALU = N->isDivergent();
1114
1115	for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E;
1116	++UI)
1117	if (UI.getUse().getResNo() == 1) {
1118	if ((IsAdd && (UI->getOpcode() != ISD::ADDCARRY)) \|\|
1119	(!IsAdd && (UI->getOpcode() != ISD::SUBCARRY))) {
1120	IsVALU = true;
1121	break;
1122	}
1123	}
1124
1125	if (IsVALU) {
1126	unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
1127
1128	CurDAG->SelectNodeTo(
1129	N, Opc, N->getVTList(),
1130	{N->getOperand(0), N->getOperand(1),
1131	CurDAG->getTargetConstant(0, {}, MVT::i1) /clamp bit/});
1132	} else {
1133	unsigned Opc = N->getOpcode() == ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
1134	: AMDGPU::S_USUBO_PSEUDO;
1135
1136	CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
1137	{N->getOperand(0), N->getOperand(1)});
1138	}
1139	}
1140
1141	void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
1142	SDLoc SL(N);
1143	// src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
1144	SDValue Ops[10];
1145
1146	SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
1147	SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
1148	SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
1149	Ops[8] = N->getOperand(0);
1150	Ops[9] = N->getOperand(4);
1151
1152	CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32_e64, N->getVTList(), Ops);
1153	}
1154
1155	void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
1156	SDLoc SL(N);
1157	// src0_modifiers, src0, src1_modifiers, src1, clamp, omod
1158	SDValue Ops[8];
1159
1160	SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
1161	SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
1162	Ops[6] = N->getOperand(0);
1163	Ops[7] = N->getOperand(3);
1164
1165	CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
1166	}
1167
1168	// We need to handle this here because tablegen doesn't support matching
1169	// instructions with multiple outputs.
1170	void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
1171	SDLoc SL(N);
1172	EVT VT = N->getValueType(0);
1173
1174	assert(VT == MVT::f32 \|\| VT == MVT::f64)((VT == MVT::f32 \|\| VT == MVT::f64) ? static_cast<void> (0) : __assert_fail ("VT == MVT::f32 \|\| VT == MVT::f64", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp" , 1174, __PRETTY_FUNCTION__));
1175
1176	unsigned Opc
1177	= (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
1178
1179	// src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
1180	// omod
1181	SDValue Ops[8];
1182	SelectVOP3BMods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
1183	SelectVOP3BMods(N->getOperand(1), Ops[3], Ops[2]);
1184	SelectVOP3BMods(N->getOperand(2), Ops[5], Ops[4]);
1185	CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1186	}
1187
1188	// We need to handle this here because tablegen doesn't support matching
1189	// instructions with multiple outputs.
1190	void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
1191	SDLoc SL(N);
1192	bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
1193	unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1194
1195	SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
1196	SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
1197	Clamp };
1198	CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1199	}
1200
1201	bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset) const {
1202	if (!isUInt<16>(Offset))
1203	return false;
1204
1205	if (!Base \|\| Subtarget->hasUsableDSOffset() \|\|
1206	Subtarget->unsafeDSOffsetFoldingEnabled())
1207	return true;
1208
1209	// On Southern Islands instruction with a negative base value and an offset
1210	// don't seem to work.
1211	return CurDAG->SignBitIsZero(Base);
1212	}
1213
1214	bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
1215	SDValue &Offset) const {
1216	SDLoc DL(Addr);
1217	if (CurDAG->isBaseWithConstantOffset(Addr)) {
1218	SDValue N0 = Addr.getOperand(0);
1219	SDValue N1 = Addr.getOperand(1);
1220	ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1221	if (isDSOffsetLegal(N0, C1->getSExtValue())) {
1222	// (add n0, c0)
1223	Base = N0;
1224	Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1225	return true;
1226	}
1227	} else if (Addr.getOpcode() == ISD::SUB) {
1228	// sub C, x -> add (sub 0, x), C
1229	if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1230	int64_t ByteOffset = C->getSExtValue();
1231	if (isDSOffsetLegal(SDValue(), ByteOffset)) {
1232	SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1233
1234	// XXX - This is kind of hacky. Create a dummy sub node so we can check
1235	// the known bits in isDSOffsetLegal. We need to emit the selected node
1236	// here, so this is thrown away.
1237	SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
1238	Zero, Addr.getOperand(1));
1239
1240	if (isDSOffsetLegal(Sub, ByteOffset)) {
1241	SmallVector<SDValue, 3> Opnds;
1242	Opnds.push_back(Zero);
1243	Opnds.push_back(Addr.getOperand(1));
1244
1245	// FIXME: Select to VOP3 version for with-carry.
1246	unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1247	if (Subtarget->hasAddNoCarry()) {
1248	SubOp = AMDGPU::V_SUB_U32_e64;
1249	Opnds.push_back(
1250	CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1251	}
1252
1253	MachineSDNode *MachineSub =
1254	CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1255
1256	Base = SDValue(MachineSub, 0);
1257	Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
1258	return true;
1259	}
1260	}
1261	}
1262	} else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1263	// If we have a constant address, prefer to put the constant into the
1264	// offset. This can save moves to load the constant address since multiple
1265	// operations can share the zero base address register, and enables merging
1266	// into read2 / write2 instructions.
1267
1268	SDLoc DL(Addr);
1269
1270	if (isDSOffsetLegal(SDValue(), CAddr->getZExtValue())) {
1271	SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1272	MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1273	DL, MVT::i32, Zero);
1274	Base = SDValue(MovZero, 0);
1275	Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1276	return true;
1277	}
1278	}
1279
1280	// default case
1281	Base = Addr;
1282	Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
1283	return true;
1284	}
1285
1286	bool AMDGPUDAGToDAGISel::isDSOffset2Legal(SDValue Base, unsigned Offset0,
1287	unsigned Offset1,
1288	unsigned Size) const {
1289	if (Offset0 % Size != 0 \|\| Offset1 % Size != 0)
1290	return false;
1291	if (!isUInt<8>(Offset0 / Size) \|\| !isUInt<8>(Offset1 / Size))
1292	return false;
1293
1294	if (!Base \|\| Subtarget->hasUsableDSOffset() \|\|
1295	Subtarget->unsafeDSOffsetFoldingEnabled())
1296	return true;
1297
1298	// On Southern Islands instruction with a negative base value and an offset
1299	// don't seem to work.
1300	return CurDAG->SignBitIsZero(Base);
1301	}
1302
1303	// TODO: If offset is too big, put low 16-bit into offset.
1304	bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
1305	SDValue &Offset0,
1306	SDValue &Offset1) const {
1307	return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 4);
1308	}
1309
1310	bool AMDGPUDAGToDAGISel::SelectDS128Bit8ByteAligned(SDValue Addr, SDValue &Base,
1311	SDValue &Offset0,
1312	SDValue &Offset1) const {
1313	return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 8);
1314	}
1315
1316	bool AMDGPUDAGToDAGISel::SelectDSReadWrite2(SDValue Addr, SDValue &Base,
1317	SDValue &Offset0, SDValue &Offset1,
1318	unsigned Size) const {
1319	SDLoc DL(Addr);
1320
1321	if (CurDAG->isBaseWithConstantOffset(Addr)) {
1322	SDValue N0 = Addr.getOperand(0);
1323	SDValue N1 = Addr.getOperand(1);
1324	ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1325	unsigned OffsetValue0 = C1->getZExtValue();
1326	unsigned OffsetValue1 = OffsetValue0 + Size;
1327
1328	// (add n0, c0)
1329	if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1, Size)) {
1330	Base = N0;
1331	Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1332	Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1333	return true;
1334	}
1335	} else if (Addr.getOpcode() == ISD::SUB) {
1336	// sub C, x -> add (sub 0, x), C
1337	if (const ConstantSDNode *C =
1338	dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1339	unsigned OffsetValue0 = C->getZExtValue();
1340	unsigned OffsetValue1 = OffsetValue0 + Size;
1341
1342	if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
1343	SDLoc DL(Addr);
1344	SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1345
1346	// XXX - This is kind of hacky. Create a dummy sub node so we can check
1347	// the known bits in isDSOffsetLegal. We need to emit the selected node
1348	// here, so this is thrown away.
1349	SDValue Sub =
1350	CurDAG->getNode(ISD::SUB, DL, MVT::i32, Zero, Addr.getOperand(1));
1351
1352	if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1, Size)) {
1353	SmallVector<SDValue, 3> Opnds;
1354	Opnds.push_back(Zero);
1355	Opnds.push_back(Addr.getOperand(1));
1356	unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1357	if (Subtarget->hasAddNoCarry()) {
1358	SubOp = AMDGPU::V_SUB_U32_e64;
1359	Opnds.push_back(
1360	CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1361	}
1362
1363	MachineSDNode *MachineSub = CurDAG->getMachineNode(
1364	SubOp, DL, MVT::getIntegerVT(Size * 8), Opnds);
1365
1366	Base = SDValue(MachineSub, 0);
1367	Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1368	Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1369	return true;
1370	}
1371	}
1372	}
1373	} else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1374	unsigned OffsetValue0 = CAddr->getZExtValue();
1375	unsigned OffsetValue1 = OffsetValue0 + Size;
1376
1377	if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
1378	SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1379	MachineSDNode *MovZero =
1380	CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, Zero);
1381	Base = SDValue(MovZero, 0);
1382	Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1383	Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1384	return true;
1385	}
1386	}
1387
1388	// default case
1389
1390	Base = Addr;
1391	Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
1392	Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
1393	return true;
1394	}
1395
1396	bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
1397	SDValue &VAddr, SDValue &SOffset,
1398	SDValue &Offset, SDValue &Offen,
1399	SDValue &Idxen, SDValue &Addr64,
1400	SDValue &GLC, SDValue &SLC,
1401	SDValue &TFE, SDValue &DLC,
1402	SDValue &SWZ, SDValue &SCCB) const {
1403	// Subtarget prefers to use flat instruction
1404	// FIXME: This should be a pattern predicate and not reach here
1405	if (Subtarget->useFlatForGlobal())
1406	return false;
1407
1408	SDLoc DL(Addr);
1409
1410	if (!GLC.getNode())
1411	GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1412	if (!SLC.getNode())
1413	SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1414	TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
1415	DLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1416	SWZ = CurDAG->getTargetConstant(0, DL, MVT::i1);
1417	SCCB = CurDAG->getTargetConstant(0, DL, MVT::i1);
1418
1419	Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1420	Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1421	Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1422	SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1423
1424	ConstantSDNode *C1 = nullptr;
1425	SDValue N0 = Addr;
1426	if (CurDAG->isBaseWithConstantOffset(Addr)) {
1427	C1 = cast<ConstantSDNode>(Addr.getOperand(1));
1428	if (isUInt<32>(C1->getZExtValue()))
1429	N0 = Addr.getOperand(0);
1430	else
1431	C1 = nullptr;
1432	}
1433
1434	if (N0.getOpcode() == ISD::ADD) {
1435	// (add N2, N3) -> addr64, or
1436	// (add (add N2, N3), C1) -> addr64
1437	SDValue N2 = N0.getOperand(0);
1438	SDValue N3 = N0.getOperand(1);
1439	Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1440
1441	if (N2->isDivergent()) {
1442	if (N3->isDivergent()) {
1443	// Both N2 and N3 are divergent. Use N0 (the result of the add) as the
1444	// addr64, and construct the resource from a 0 address.
1445	Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1446	VAddr = N0;
1447	} else {
1448	// N2 is divergent, N3 is not.
1449	Ptr = N3;
1450	VAddr = N2;
1451	}
1452	} else {
1453	// N2 is not divergent.
1454	Ptr = N2;
1455	VAddr = N3;
1456	}
1457	Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1458	} else if (N0->isDivergent()) {
1459	// N0 is divergent. Use it as the addr64, and construct the resource from a
1460	// 0 address.
1461	Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1462	VAddr = N0;
1463	Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1464	} else {
1465	// N0 -> offset, or
1466	// (N0 + C1) -> offset
1467	VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1468	Ptr = N0;
1469	}
1470
1471	if (!C1) {
1472	// No offset.
1473	Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1474	return true;
1475	}
1476
1477	if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
1478	// Legal offset for instruction.
1479	Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1480	return true;
1481	}
1482
1483	// Illegal offset, store it in soffset.
1484	Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1485	SOffset =
1486	SDValue(CurDAG->getMachineNode(
1487	AMDGPU::S_MOV_B32, DL, MVT::i32,
1488	CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1489	0);
1490	return true;
1491	}
1492
1493	bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1494	SDValue &VAddr, SDValue &SOffset,
1495	SDValue &Offset, SDValue &GLC,
1496	SDValue &SLC, SDValue &TFE,
1497	SDValue &DLC, SDValue &SWZ,
1498	SDValue &SCCB) const {
1499	SDValue Ptr, Offen, Idxen, Addr64;
1500
1501	// addr64 bit was removed for volcanic islands.
1502	// FIXME: This should be a pattern predicate and not reach here
1503	if (!Subtarget->hasAddr64())
1504	return false;
1505
1506	if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1507	GLC, SLC, TFE, DLC, SWZ, SCCB))
1508	return false;
1509
1510	ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1511	if (C->getSExtValue()) {
1512	SDLoc DL(Addr);
1513
1514	const SITargetLowering& Lowering =
1515	static_cast<const SITargetLowering>(getTargetLowering());
1516
1517	SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1518	return true;
1519	}
1520
1521	return false;
1522	}
1523
1524	bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1525	SDValue &VAddr, SDValue &SOffset,
1526	SDValue &Offset,
1527	SDValue &SLC) const {
1528	SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
1529	SDValue GLC, TFE, DLC, SWZ, SCCB;
1530
1531	return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC, SWZ, SCCB);
1532	}
1533
1534	static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
1535	auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
1536	return PSV && PSV->isStack();
1537	}
1538
1539	std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1540	SDLoc DL(N);
1541
1542	auto *FI = dyn_cast<FrameIndexSDNode>(N);
1543	SDValue TFI =
1544	FI ? CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) : N;
1545
1546	// We rebase the base address into an absolute stack address and hence
1547	// use constant 0 for soffset. This value must be retained until
1548	// frame elimination and eliminateFrameIndex will choose the appropriate
1549	// frame register if need be.
1550	return std::make_pair(TFI, CurDAG->getTargetConstant(0, DL, MVT::i32));
1551	}
1552
1553	bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
1554	SDValue Addr, SDValue &Rsrc,
1555	SDValue &VAddr, SDValue &SOffset,
1556	SDValue &ImmOffset) const {
1557
1558	SDLoc DL(Addr);
1559	MachineFunction &MF = CurDAG->getMachineFunction();
1560	const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1561
1562	Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1563
1564	if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1565	int64_t Imm = CAddr->getSExtValue();
1566	const int64_t NullPtr =
1567	AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS);
1568	// Don't fold null pointer.
1569	if (Imm != NullPtr) {
1570	SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1571	MachineSDNode *MovHighBits = CurDAG->getMachineNode(
1572	AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits);
1573	VAddr = SDValue(MovHighBits, 0);
1574
1575	// In a call sequence, stores to the argument stack area are relative to the
1576	// stack pointer.
1577	const MachinePointerInfo &PtrInfo
1578	= cast<MemSDNode>(Parent)->getPointerInfo();
1579	SOffset = isStackPtrRelative(PtrInfo)
1580	? CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32)
1581	: CurDAG->getTargetConstant(0, DL, MVT::i32);
1582	ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
1583	return true;
1584	}
1585	}
1586
1587	if (CurDAG->isBaseWithConstantOffset(Addr)) {
1588	// (add n0, c1)
1589
1590	SDValue N0 = Addr.getOperand(0);
1591	SDValue N1 = Addr.getOperand(1);
1592
1593	// Offsets in vaddr must be positive if range checking is enabled.
1594	//
1595	// The total computation of vaddr + soffset + offset must not overflow. If
1596	// vaddr is negative, even if offset is 0 the sgpr offset add will end up
1597	// overflowing.
1598	//
1599	// Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
1600	// always perform a range check. If a negative vaddr base index was used,
1601	// this would fail the range check. The overall address computation would
1602	// compute a valid address, but this doesn't happen due to the range
1603	// check. For out-of-bounds MUBUF loads, a 0 is returned.
1604	//
1605	// Therefore it should be safe to fold any VGPR offset on gfx9 into the
1606	// MUBUF vaddr, but not on older subtargets which can only do this if the
1607	// sign bit is known 0.
1608	ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1609	if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) &&
1610	(!Subtarget->privateMemoryResourceIsRangeChecked() \|\|
1611	CurDAG->SignBitIsZero(N0))) {
1612	std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1613	ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1614	return true;
1615	}
1616	}
1617
1618	// (node)
1619	std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1620	ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1621	return true;
1622	}
1623
1624	bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
1625	SDValue Addr,
1626	SDValue &SRsrc,
1627	SDValue &SOffset,
1628	SDValue &Offset) const {
1629	ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
1630	if (!CAddr \|\| !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1631	return false;
1632
1633	SDLoc DL(Addr);
1634	MachineFunction &MF = CurDAG->getMachineFunction();
1635	const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1636
1637	SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1638
1639	const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1640
1641	// FIXME: Get from MachinePointerInfo? We should only be using the frame
1642	// offset if we know this is in a call sequence.
1643	SOffset = isStackPtrRelative(PtrInfo)
1644	? CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32)
1645	: CurDAG->getTargetConstant(0, DL, MVT::i32);
1646
1647	Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1648	return true;
1649	}
1650
1651	bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1652	SDValue &SOffset, SDValue &Offset,
1653	SDValue &GLC, SDValue &SLC,
1654	SDValue &TFE, SDValue &DLC,
1655	SDValue &SWZ, SDValue &SCCB) const {
1656	SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1657	const SIInstrInfo *TII =
1658	static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1659
1660	if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1661	GLC, SLC, TFE, DLC, SWZ, SCCB))
1662	return false;
1663
1664	if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1665	!cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1666	!cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1667	uint64_t Rsrc = TII->getDefaultRsrcDataFormat() \|
1668	APInt::getAllOnesValue(32).getZExtValue(); // Size
1669	SDLoc DL(Addr);
1670
1671	const SITargetLowering& Lowering =
1672	static_cast<const SITargetLowering>(getTargetLowering());
1673
1674	SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1675	return true;
1676	}
1677	return false;
1678	}
1679
1680	bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1681	SDValue &Soffset, SDValue &Offset
1682	) const {
1683	SDValue GLC, SLC, TFE, DLC, SWZ, SCCB;
1684
1685	return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ, SCCB);
1686	}
1687	bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1688	SDValue &Soffset, SDValue &Offset,
1689	SDValue &SLC) const {
1690	SDValue GLC, TFE, DLC, SWZ, SCCB;
1691
1692	return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ, SCCB);
1693	}
1694
1695	// Find a load or store from corresponding pattern root.
1696	// Roots may be build_vector, bitconvert or their combinations.
1697	static MemSDNode* findMemSDNode(SDNode *N) {
1698	N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode();
1699	if (MemSDNode *MN = dyn_cast<MemSDNode>(N))
1700	return MN;
1701	assert(isa<BuildVectorSDNode>(N))((isa<BuildVectorSDNode>(N)) ? static_cast<void> ( 0) : __assert_fail ("isa<BuildVectorSDNode>(N)", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp" , 1701, __PRETTY_FUNCTION__));
1702	for (SDValue V : N->op_values())
1703	if (MemSDNode *MN =
1704	dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V)))
1705	return MN;
1706	llvm_unreachable("cannot find MemSDNode in the pattern!")::llvm::llvm_unreachable_internal("cannot find MemSDNode in the pattern!" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp" , 1706);
1707	}
1708
1709	template <bool IsSigned>
1710	bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N,
1711	SDValue Addr,
1712	SDValue &VAddr,
1713	SDValue &Offset) const {
1714	int64_t OffsetVal = 0;
1715
1716	unsigned AS = findMemSDNode(N)->getAddressSpace();
1717
1718	if (Subtarget->hasFlatInstOffsets() &&
1719	(!Subtarget->hasFlatSegmentOffsetBug() \|\|
1720	AS != AMDGPUAS::FLAT_ADDRESS)) {
1721	SDValue N0, N1;
1722	if (isBaseWithConstantOffset64(Addr, N0, N1)) {
1723	uint64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1724
1725	const SIInstrInfo *TII = Subtarget->getInstrInfo();
1726	if (TII->isLegalFLATOffset(COffsetVal, AS, IsSigned)) {
1727	Addr = N0;
1728	OffsetVal = COffsetVal;
1729	} else {
1730	// If the offset doesn't fit, put the low bits into the offset field and
1731	// add the rest.
1732	//
1733	// For a FLAT instruction the hardware decides whether to access
1734	// global/scratch/shared memory based on the high bits of vaddr,
1735	// ignoring the offset field, so we have to ensure that when we add
1736	// remainder to vaddr it still points into the same underlying object.
1737	// The easiest way to do that is to make sure that we split the offset
1738	// into two pieces that are both >= 0 or both <= 0.
1739
1740	SDLoc DL(N);
1741	uint64_t RemainderOffset;
1742
1743	std::tie(OffsetVal, RemainderOffset)
1744	= TII->splitFlatOffset(COffsetVal, AS, IsSigned);
1745
1746	SDValue AddOffsetLo =
1747	getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
1748	SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1749
1750	if (Addr.getValueType().getSizeInBits() == 32) {
1751	SmallVector<SDValue, 3> Opnds;
1752	Opnds.push_back(N0);
1753	Opnds.push_back(AddOffsetLo);
1754	unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1755	if (Subtarget->hasAddNoCarry()) {
1756	AddOp = AMDGPU::V_ADD_U32_e64;
1757	Opnds.push_back(Clamp);
1758	}
1759	Addr = SDValue(CurDAG->getMachineNode(AddOp, DL, MVT::i32, Opnds), 0);
1760	} else {
1761	// TODO: Should this try to use a scalar add pseudo if the base address
1762	// is uniform and saddr is usable?
1763	SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
1764	SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
1765
1766	SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1767	DL, MVT::i32, N0, Sub0);
1768	SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1769	DL, MVT::i32, N0, Sub1);
1770
1771	SDValue AddOffsetHi =
1772	getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);
1773
1774	SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1);
1775
1776	SDNode *Add =
1777	CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64, DL, VTs,
1778	{AddOffsetLo, SDValue(N0Lo, 0), Clamp});
1779
1780	SDNode *Addc = CurDAG->getMachineNode(
1781	AMDGPU::V_ADDC_U32_e64, DL, VTs,
1782	{AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp});
1783
1784	SDValue RegSequenceArgs[] = {
1785	CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),
1786	SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1};
1787
1788	Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1789	MVT::i64, RegSequenceArgs),
1790	0);
1791	}
1792	}
1793	}
1794	}
1795
1796	VAddr = Addr;
1797	Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
1798	return true;
1799	}
1800
1801	// If this matches zero_extend i32:x, return x
1802	static SDValue matchZExtFromI32(SDValue Op) {
1803	if (Op.getOpcode() != ISD::ZERO_EXTEND)
1804	return SDValue();
1805
1806	SDValue ExtSrc = Op.getOperand(0);
1807	return (ExtSrc.getValueType() == MVT::i32) ? ExtSrc : SDValue();
1808	}
1809
1810	// Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
1811	bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
1812	SDValue Addr,
1813	SDValue &SAddr,
1814	SDValue &VOffset,
1815	SDValue &Offset) const {
1816	int64_t ImmOffset = 0;
1817
1818	// Match the immediate offset first, which canonically is moved as low as
1819	// possible.
1820
1821	SDValue LHS, RHS;
1822	if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
1823	int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1824	const SIInstrInfo *TII = Subtarget->getInstrInfo();
1825
1826	if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, true)) {
1827	Addr = LHS;
1828	ImmOffset = COffsetVal;
1829	} else if (!LHS->isDivergent() && COffsetVal > 0) {
1830	SDLoc SL(N);
1831	// saddr + large_offset -> saddr + (voffset = large_offset & ~MaxOffset) +
1832	// (large_offset & MaxOffset);
1833	int64_t SplitImmOffset, RemainderOffset;
1834	std::tie(SplitImmOffset, RemainderOffset)
1835	= TII->splitFlatOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, true);
1836
1837	if (isUInt<32>(RemainderOffset)) {
1838	SDNode *VMov = CurDAG->getMachineNode(
1839	AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1840	CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
1841	VOffset = SDValue(VMov, 0);
1842	SAddr = LHS;
1843	Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
1844	return true;
1845	}
1846	}
1847	}
1848
1849	// Match the variable offset.
1850	if (Addr.getOpcode() != ISD::ADD) {
1851	if (Addr->isDivergent() \|\| Addr.getOpcode() == ISD::UNDEF \|\|
1852	isa<ConstantSDNode>(Addr))
1853	return false;
1854
1855	// It's cheaper to materialize a single 32-bit zero for vaddr than the two
1856	// moves required to copy a 64-bit SGPR to VGPR.
1857	SAddr = Addr;
1858	SDNode *VMov = CurDAG->getMachineNode(
1859	AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
1860	CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));
1861	VOffset = SDValue(VMov, 0);
1862	Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1863	return true;
1864	}
1865
1866	LHS = Addr.getOperand(0);
1867	RHS = Addr.getOperand(1);
1868
1869	if (!LHS->isDivergent()) {
1870	// add (i64 sgpr), (zero_extend (i32 vgpr))
1871	if (SDValue ZextRHS = matchZExtFromI32(RHS)) {
1872	SAddr = LHS;
1873	VOffset = ZextRHS;
1874	}
1875	}
1876
1877	if (!SAddr && !RHS->isDivergent()) {
1878	// add (zero_extend (i32 vgpr)), (i64 sgpr)
1879	if (SDValue ZextLHS = matchZExtFromI32(LHS)) {
1880	SAddr = RHS;
1881	VOffset = ZextLHS;
1882	}
1883	}
1884
1885	if (!SAddr)
1886	return false;
1887
1888	Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1889	return true;
1890	}
1891
1892	// Match (32-bit SGPR base) + sext(imm offset)
1893	bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *N,
1894	SDValue Addr,
1895	SDValue &SAddr,
1896	SDValue &Offset) const {
1897	if (Addr->isDivergent())
1898	return false;
1899
1900	SAddr = Addr;
1901	int64_t COffsetVal = 0;
1902
1903	if (CurDAG->isBaseWithConstantOffset(Addr)) {
1904	COffsetVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
1905	SAddr = Addr.getOperand(0);
1906	}
1907
1908	if (auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
1909	SAddr = CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
1910	} else if (SAddr.getOpcode() == ISD::ADD &&
1911	isa<FrameIndexSDNode>(SAddr.getOperand(0))) {
1912	// Materialize this into a scalar move for scalar address to avoid
1913	// readfirstlane.
1914	auto FI = cast<FrameIndexSDNode>(SAddr.getOperand(0));
1915	SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1916	FI->getValueType(0));
1917	SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_U32, SDLoc(SAddr),
1918	MVT::i32, TFI, SAddr.getOperand(1)),
1919	0);
1920	}
1921
1922	const SIInstrInfo *TII = Subtarget->getInstrInfo();
1923
1924	if (!TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true)) {
1925	int64_t RemainderOffset = COffsetVal;
	Value stored to 'RemainderOffset' during its initialization is never read
1926	int64_t ImmField = 0;
1927	const unsigned NumBits = AMDGPU::getNumFlatOffsetBits(*Subtarget, true);
1928	// Use signed division by a power of two to truncate towards 0.
1929	int64_t D = 1LL << (NumBits - 1);
1930	RemainderOffset = (COffsetVal / D) * D;
1931	ImmField = COffsetVal - RemainderOffset;
1932
1933	assert(TII->isLegalFLATOffset(ImmField, AMDGPUAS::PRIVATE_ADDRESS, true))((TII->isLegalFLATOffset(ImmField, AMDGPUAS::PRIVATE_ADDRESS , true)) ? static_cast<void> (0) : __assert_fail ("TII->isLegalFLATOffset(ImmField, AMDGPUAS::PRIVATE_ADDRESS, true)" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp" , 1933, __PRETTY_FUNCTION__));
1934	assert(RemainderOffset + ImmField == COffsetVal)((RemainderOffset + ImmField == COffsetVal) ? static_cast< void> (0) : __assert_fail ("RemainderOffset + ImmField == COffsetVal" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp" , 1934, __PRETTY_FUNCTION__));
1935
1936	COffsetVal = ImmField;
1937
1938	SDLoc DL(N);
1939	SDValue AddOffset =
1940	getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
1941	SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_U32, DL, MVT::i32,
1942	SAddr, AddOffset), 0);
1943	}
1944
1945	Offset = CurDAG->getTargetConstant(COffsetVal, SDLoc(), MVT::i16);
1946
1947	return true;
1948	}
1949
1950	bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1951	SDValue &Offset, bool &Imm) const {
1952	ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1953	if (!C) {
1954	if (ByteOffsetNode.getValueType().isScalarInteger() &&
1955	ByteOffsetNode.getValueType().getSizeInBits() == 32) {
1956	Offset = ByteOffsetNode;
1957	Imm = false;
1958	return true;
1959	}
1960	if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) {
1961	if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) {
1962	Offset = ByteOffsetNode.getOperand(0);
1963	Imm = false;
1964	return true;
1965	}
1966	}
1967	return false;
1968	}
1969
1970	SDLoc SL(ByteOffsetNode);
1971	// GFX9 and GFX10 have signed byte immediate offsets.
1972	int64_t ByteOffset = C->getSExtValue();
1973	Optional<int64_t> EncodedOffset =
1974	AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, false);
1975	if (EncodedOffset) {
1976	Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
1977	Imm = true;
1978	return true;
1979	}
1980
1981	// SGPR and literal offsets are unsigned.
1982	if (ByteOffset < 0)
1983	return false;
1984
1985	EncodedOffset = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset);
1986	if (EncodedOffset) {
1987	Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
1988	return true;
1989	}
1990
1991	if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
1992	return false;
1993
1994	SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1995	Offset = SDValue(
1996	CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
1997
1998	return true;
1999	}
2000
2001	SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
2002	if (Addr.getValueType() != MVT::i32)
2003	return Addr;
2004
2005	// Zero-extend a 32-bit address.
2006	SDLoc SL(Addr);
2007
2008	const MachineFunction &MF = CurDAG->getMachineFunction();
2009	const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
2010	unsigned AddrHiVal = Info->get32BitAddressHighBits();
2011	SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
2012
2013	const SDValue Ops[] = {
2014	CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
2015	Addr,
2016	CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
2017	SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
2018	0),
2019	CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
2020	};
2021
2022	return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
2023	Ops), 0);
2024	}
2025
2026	bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
2027	SDValue &Offset, bool &Imm) const {
2028	SDLoc SL(Addr);
2029
2030	// A 32-bit (address + offset) should not cause unsigned 32-bit integer
2031	// wraparound, because s_load instructions perform the addition in 64 bits.
2032	if ((Addr.getValueType() != MVT::i32 \|\|
2033	Addr->getFlags().hasNoUnsignedWrap())) {
2034	SDValue N0, N1;
2035	// Extract the base and offset if possible.
2036	if (CurDAG->isBaseWithConstantOffset(Addr) \|\|
2037	Addr.getOpcode() == ISD::ADD) {
2038	N0 = Addr.getOperand(0);
2039	N1 = Addr.getOperand(1);
2040	} else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
2041	assert(N0 && N1 && isa<ConstantSDNode>(N1))((N0 && N1 && isa<ConstantSDNode>(N1)) ? static_cast<void> (0) : __assert_fail ("N0 && N1 && isa<ConstantSDNode>(N1)" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp" , 2041, __PRETTY_FUNCTION__));
2042	}
2043	if (N0 && N1) {
2044	if (SelectSMRDOffset(N1, Offset, Imm)) {
2045	SBase = Expand32BitAddress(N0);
2046	return true;
2047	}
2048	}
2049	}
2050	SBase = Expand32BitAddress(Addr);
2051	Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
2052	Imm = true;
2053	return true;
2054	}
2055
2056	bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
2057	SDValue &Offset) const {
2058	bool Imm = false;
2059	return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
2060	}
2061
2062	bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
2063	SDValue &Offset) const {
2064
2065	assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS)((Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ) ? static_cast<void> (0) : __assert_fail ("Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp" , 2065, __PRETTY_FUNCTION__));
2066
2067	bool Imm = false;
2068	if (!SelectSMRD(Addr, SBase, Offset, Imm))
2069	return false;
2070
2071	return !Imm && isa<ConstantSDNode>(Offset);
2072	}
2073
2074	bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
2075	SDValue &Offset) const {
2076	bool Imm = false;
2077	return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
2078	!isa<ConstantSDNode>(Offset);
2079	}
2080
2081	bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
2082	SDValue &Offset) const {
2083	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) {
2084	// The immediate offset for S_BUFFER instructions is unsigned.
2085	if (auto Imm =
2086	AMDGPU::getSMRDEncodedOffset(*Subtarget, C->getZExtValue(), true)) {
2087	Offset = CurDAG->getTargetConstant(*Imm, SDLoc(Addr), MVT::i32);
2088	return true;
2089	}
2090	}
2091
2092	return false;
2093	}
2094
2095	bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
2096	SDValue &Offset) const {
2097	assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS)((Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ) ? static_cast<void> (0) : __assert_fail ("Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp" , 2097, __PRETTY_FUNCTION__));
2098
2099	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) {
2100	if (auto Imm = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget,
2101	C->getZExtValue())) {
2102	Offset = CurDAG->getTargetConstant(*Imm, SDLoc(Addr), MVT::i32);
2103	return true;
2104	}
2105	}
2106
2107	return false;
2108	}
2109
2110	bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
2111	SDValue &Base,
2112	SDValue &Offset) const {
2113	SDLoc DL(Index);
2114
2115	if (CurDAG->isBaseWithConstantOffset(Index)) {
2116	SDValue N0 = Index.getOperand(0);
2117	SDValue N1 = Index.getOperand(1);
2118	ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
2119
2120	// (add n0, c0)
2121	// Don't peel off the offset (c0) if doing so could possibly lead
2122	// the base (n0) to be negative.
2123	// (or n0, \|c0\|) can never change a sign given isBaseWithConstantOffset.
2124	if (C1->getSExtValue() <= 0 \|\| CurDAG->SignBitIsZero(N0) \|\|
2125	(Index->getOpcode() == ISD::OR && C1->getSExtValue() >= 0)) {
2126	Base = N0;
2127	Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
2128	return true;
2129	}
2130	}
2131
2132	if (isa<ConstantSDNode>(Index))
2133	return false;
2134
2135	Base = Index;
2136	Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
2137	return true;
2138	}
2139
2140	SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
2141	SDValue Val, uint32_t Offset,
2142	uint32_t Width) {
2143	// Transformation function, pack the offset and width of a BFE into
2144	// the format expected by the S_BFE_I32 / S_BFE_U32. In the second
2145	// source, bits [5:0] contain the offset and bits [22:16] the width.
2146	uint32_t PackedVal = Offset \| (Width << 16);
2147	SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
2148
2149	return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
2150	}
2151
2152	void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
2153	// "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
2154	// "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
2155	// Predicate: 0 < b <= c < 32
2156
2157	const SDValue &Shl = N->getOperand(0);
2158	ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
2159	ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2160
2161	if (B && C) {
2162	uint32_t BVal = B->getZExtValue();
2163	uint32_t CVal = C->getZExtValue();
2164
2165	if (0 < BVal && BVal <= CVal && CVal < 32) {
2166	bool Signed = N->getOpcode() == ISD::SRA;
2167	unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2168
2169	ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
2170	32 - CVal));
2171	return;
2172	}
2173	}
2174	SelectCode(N);
2175	}
2176
2177	void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
2178	switch (N->getOpcode()) {
2179	case ISD::AND:
2180	if (N->getOperand(0).getOpcode() == ISD::SRL) {
2181	// "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
2182	// Predicate: isMask(mask)
2183	const SDValue &Srl = N->getOperand(0);
2184	ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
2185	ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
2186
2187	if (Shift && Mask) {
2188	uint32_t ShiftVal = Shift->getZExtValue();
2189	uint32_t MaskVal = Mask->getZExtValue();
2190
2191	if (isMask_32(MaskVal)) {
2192	uint32_t WidthVal = countPopulation(MaskVal);
2193
2194	ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
2195	Srl.getOperand(0), ShiftVal, WidthVal));
2196	return;
2197	}
2198	}
2199	}
2200	break;
2201	case ISD::SRL:
2202	if (N->getOperand(0).getOpcode() == ISD::AND) {
2203	// "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
2204	// Predicate: isMask(mask >> b)
2205	const SDValue &And = N->getOperand(0);
2206	ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
2207	ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
2208
2209	if (Shift && Mask) {
2210	uint32_t ShiftVal = Shift->getZExtValue();
2211	uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
2212
2213	if (isMask_32(MaskVal)) {
2214	uint32_t WidthVal = countPopulation(MaskVal);
2215
2216	ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
2217	And.getOperand(0), ShiftVal, WidthVal));
2218	return;
2219	}
2220	}
2221	} else if (N->getOperand(0).getOpcode() == ISD::SHL) {
2222	SelectS_BFEFromShifts(N);
2223	return;
2224	}
2225	break;
2226	case ISD::SRA:
2227	if (N->getOperand(0).getOpcode() == ISD::SHL) {
2228	SelectS_BFEFromShifts(N);
2229	return;
2230	}
2231	break;
2232
2233	case ISD::SIGN_EXTEND_INREG: {
2234	// sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
2235	SDValue Src = N->getOperand(0);
2236	if (Src.getOpcode() != ISD::SRL)
2237	break;
2238
2239	const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
2240	if (!Amt)
2241	break;
2242
2243	unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2244	ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
2245	Amt->getZExtValue(), Width));
2246	return;
2247	}
2248	}
2249
2250	SelectCode(N);
2251	}
2252
2253	bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
2254	assert(N->getOpcode() == ISD::BRCOND)((N->getOpcode() == ISD::BRCOND) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BRCOND", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp" , 2254, __PRETTY_FUNCTION__));
2255	if (!N->hasOneUse())
2256	return false;
2257
2258	SDValue Cond = N->getOperand(1);
2259	if (Cond.getOpcode() == ISD::CopyToReg)
2260	Cond = Cond.getOperand(2);
2261
2262	if (Cond.getOpcode() != ISD::SETCC \|\| !Cond.hasOneUse())
2263	return false;
2264
2265	MVT VT = Cond.getOperand(0).getSimpleValueType();
2266	if (VT == MVT::i32)
2267	return true;
2268
2269	if (VT == MVT::i64) {
2270	auto ST = static_cast<const GCNSubtarget *>(Subtarget);
2271
2272	ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
2273	return (CC == ISD::SETEQ \|\| CC == ISD::SETNE) && ST->hasScalarCompareEq64();
2274	}
2275
2276	return false;
2277	}
2278
2279	void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
2280	SDValue Cond = N->getOperand(1);
2281
2282	if (Cond.isUndef()) {
2283	CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
2284	N->getOperand(2), N->getOperand(0));
2285	return;
2286	}
2287
2288	const GCNSubtarget ST = static_cast<const GCNSubtarget >(Subtarget);
2289	const SIRegisterInfo *TRI = ST->getRegisterInfo();
2290
2291	bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
2292	unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
2293	Register CondReg = UseSCCBr ? AMDGPU::SCC : TRI->getVCC();
2294	SDLoc SL(N);
2295
2296	if (!UseSCCBr) {
2297	// This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
2298	// analyzed what generates the vcc value, so we do not know whether vcc
2299	// bits for disabled lanes are 0. Thus we need to mask out bits for
2300	// disabled lanes.
2301	//
2302	// For the case that we select S_CBRANCH_SCC1 and it gets
2303	// changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
2304	// SIInstrInfo::moveToVALU which inserts the S_AND).
2305	//
2306	// We could add an analysis of what generates the vcc value here and omit
2307	// the S_AND when is unnecessary. But it would be better to add a separate
2308	// pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
2309	// catches both cases.
2310	Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? AMDGPU::S_AND_B32
2311	: AMDGPU::S_AND_B64,
2312	SL, MVT::i1,
2313	CurDAG->getRegister(ST->isWave32() ? AMDGPU::EXEC_LO
2314	: AMDGPU::EXEC,
2315	MVT::i1),
2316	Cond),
2317	0);
2318	}
2319
2320	SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
2321	CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
2322	N->getOperand(2), // Basic Block
2323	VCC.getValue(0));
2324	}
2325
2326	void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
2327	MVT VT = N->getSimpleValueType(0);
2328	bool IsFMA = N->getOpcode() == ISD::FMA;
2329	if (VT != MVT::f32 \|\| (!Subtarget->hasMadMixInsts() &&
2330	!Subtarget->hasFmaMixInsts()) \|\|
2331	((IsFMA && Subtarget->hasMadMixInsts()) \|\|
2332	(!IsFMA && Subtarget->hasFmaMixInsts()))) {
2333	SelectCode(N);
2334	return;
2335	}
2336
2337	SDValue Src0 = N->getOperand(0);
2338	SDValue Src1 = N->getOperand(1);
2339	SDValue Src2 = N->getOperand(2);
2340	unsigned Src0Mods, Src1Mods, Src2Mods;
2341
2342	// Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
2343	// using the conversion from f16.
2344	bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
2345	bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
2346	bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
2347
2348	assert((IsFMA \|\| !Mode.allFP32Denormals()) &&(((IsFMA \|\| !Mode.allFP32Denormals()) && "fmad selected with denormals enabled" ) ? static_cast<void> (0) : __assert_fail ("(IsFMA \|\| !Mode.allFP32Denormals()) && \"fmad selected with denormals enabled\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp" , 2349, __PRETTY_FUNCTION__))
2349	"fmad selected with denormals enabled")(((IsFMA \|\| !Mode.allFP32Denormals()) && "fmad selected with denormals enabled" ) ? static_cast<void> (0) : __assert_fail ("(IsFMA \|\| !Mode.allFP32Denormals()) && \"fmad selected with denormals enabled\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp" , 2349, __PRETTY_FUNCTION__));
2350	// TODO: We can select this with f32 denormals enabled if all the sources are
2351	// converted from f16 (in which case fmad isn't legal).
2352
2353	if (Sel0 \|\| Sel1 \|\| Sel2) {
2354	// For dummy operands.
2355	SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2356	SDValue Ops[] = {
2357	CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
2358	CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
2359	CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
2360	CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
2361	Zero, Zero
2362	};
2363
2364	CurDAG->SelectNodeTo(N,
2365	IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
2366	MVT::f32, Ops);
2367	} else {
2368	SelectCode(N);
2369	}
2370	}
2371
2372	// This is here because there isn't a way to use the generated sub0_sub1 as the
2373	// subreg index to EXTRACT_SUBREG in tablegen.
2374	void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
2375	MemSDNode *Mem = cast<MemSDNode>(N);
2376	unsigned AS = Mem->getAddressSpace();
2377	if (AS == AMDGPUAS::FLAT_ADDRESS) {
2378	SelectCode(N);
2379	return;
2380	}
2381
2382	MVT VT = N->getSimpleValueType(0);
2383	bool Is32 = (VT == MVT::i32);
2384	SDLoc SL(N);
2385
2386	MachineSDNode *CmpSwap = nullptr;
2387	if (Subtarget->hasAddr64()) {
2388	SDValue SRsrc, VAddr, SOffset, Offset, SLC;
2389
2390	if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
2391	unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
2392	AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
2393	SDValue CmpVal = Mem->getOperand(2);
2394	SDValue GLC = CurDAG->getTargetConstant(1, SL, MVT::i1);
2395
2396	// XXX - Do we care about glue operands?
2397
2398	SDValue Ops[] = {
2399	CmpVal, VAddr, SRsrc, SOffset, Offset, GLC, SLC, Mem->getChain()
2400	};
2401
2402	CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
2403	}
2404	}
2405
2406	if (!CmpSwap) {
2407	SDValue SRsrc, SOffset, Offset, SLC;
2408	if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
2409	unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
2410	AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
2411
2412	SDValue CmpVal = Mem->getOperand(2);
2413	SDValue GLC = CurDAG->getTargetConstant(1, SL, MVT::i1);
2414	SDValue Ops[] = {
2415	CmpVal, SRsrc, SOffset, Offset, GLC, SLC, Mem->getChain()
2416	};
2417
2418	CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
2419	}
2420	}
2421
2422	if (!CmpSwap) {
2423	SelectCode(N);
2424	return;
2425	}
2426
2427	MachineMemOperand *MMO = Mem->getMemOperand();
2428	CurDAG->setNodeMemRefs(CmpSwap, {MMO});
2429
2430	unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
2431	SDValue Extract
2432	= CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
2433
2434	ReplaceUses(SDValue(N, 0), Extract);
2435	ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
2436	CurDAG->RemoveDeadNode(N);
2437	}
2438
2439	void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
2440	// The address is assumed to be uniform, so if it ends up in a VGPR, it will
2441	// be copied to an SGPR with readfirstlane.
2442	unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2443	AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2444
2445	SDValue Chain = N->getOperand(0);
2446	SDValue Ptr = N->getOperand(2);
2447	MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2448	MachineMemOperand *MMO = M->getMemOperand();
2449	bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
2450
2451	SDValue Offset;
2452	if (CurDAG->isBaseWithConstantOffset(Ptr)) {
2453	SDValue PtrBase = Ptr.getOperand(0);
2454	SDValue PtrOffset = Ptr.getOperand(1);
2455
2456	const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
2457	if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue())) {
2458	N = glueCopyToM0(N, PtrBase);
2459	Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
2460	}
2461	}
2462
2463	if (!Offset) {
2464	N = glueCopyToM0(N, Ptr);
2465	Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2466	}
2467
2468	SDValue Ops[] = {
2469	Offset,
2470	CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
2471	Chain,
2472	N->getOperand(N->getNumOperands() - 1) // New glue
2473	};
2474
2475	SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2476	CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2477	}
2478
2479	static unsigned gwsIntrinToOpcode(unsigned IntrID) {
2480	switch (IntrID) {
2481	case Intrinsic::amdgcn_ds_gws_init:
2482	return AMDGPU::DS_GWS_INIT;
2483	case Intrinsic::amdgcn_ds_gws_barrier:
2484	return AMDGPU::DS_GWS_BARRIER;
2485	case Intrinsic::amdgcn_ds_gws_sema_v:
2486	return AMDGPU::DS_GWS_SEMA_V;
2487	case Intrinsic::amdgcn_ds_gws_sema_br:
2488	return AMDGPU::DS_GWS_SEMA_BR;
2489	case Intrinsic::amdgcn_ds_gws_sema_p:
2490	return AMDGPU::DS_GWS_SEMA_P;
2491	case Intrinsic::amdgcn_ds_gws_sema_release_all:
2492	return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2493	default:
2494	llvm_unreachable("not a gws intrinsic")::llvm::llvm_unreachable_internal("not a gws intrinsic", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp" , 2494);
2495	}
2496	}
2497
2498	void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
2499	if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2500	!Subtarget->hasGWSSemaReleaseAll()) {
2501	// Let this error.
2502	SelectCode(N);
2503	return;
2504	}
2505
2506	// Chain, intrinsic ID, vsrc, offset
2507	const bool HasVSrc = N->getNumOperands() == 4;
2508	assert(HasVSrc \|\| N->getNumOperands() == 3)((HasVSrc \|\| N->getNumOperands() == 3) ? static_cast<void > (0) : __assert_fail ("HasVSrc \|\| N->getNumOperands() == 3" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp" , 2508, __PRETTY_FUNCTION__));
2509
2510	SDLoc SL(N);
2511	SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2);
2512	int ImmOffset = 0;
2513	MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2514	MachineMemOperand *MMO = M->getMemOperand();
2515
2516	// Don't worry if the offset ends up in a VGPR. Only one lane will have
2517	// effect, so SIFixSGPRCopies will validly insert readfirstlane.
2518
2519	// The resource id offset is computed as (<isa opaque base> + M0[21:16] +
2520	// offset field) % 64. Some versions of the programming guide omit the m0
2521	// part, or claim it's from offset 0.
2522	if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
2523	// If we have a constant offset, try to use the 0 in m0 as the base.
2524	// TODO: Look into changing the default m0 initialization value. If the
2525	// default -1 only set the low 16-bits, we could leave it as-is and add 1 to
2526	// the immediate offset.
2527	glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32));
2528	ImmOffset = ConstOffset->getZExtValue();
2529	} else {
2530	if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
2531	ImmOffset = BaseOffset.getConstantOperandVal(1);
2532	BaseOffset = BaseOffset.getOperand(0);
2533	}
2534
2535	// Prefer to do the shift in an SGPR since it should be possible to use m0
2536	// as the result directly. If it's already an SGPR, it will be eliminated
2537	// later.
2538	SDNode *SGPROffset
2539	= CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
2540	BaseOffset);
2541	// Shift to offset in m0
2542	SDNode *M0Base
2543	= CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
2544	SDValue(SGPROffset, 0),
2545	CurDAG->getTargetConstant(16, SL, MVT::i32));
2546	glueCopyToM0(N, SDValue(M0Base, 0));
2547	}
2548
2549	SDValue Chain = N->getOperand(0);
2550	SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
2551
2552	const unsigned Opc = gwsIntrinToOpcode(IntrID);
2553	SmallVector<SDValue, 5> Ops;
2554	if (HasVSrc)
2555	Ops.push_back(N->getOperand(2));
2556	Ops.push_back(OffsetField);
2557	Ops.push_back(Chain);
2558
2559	SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2560	CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2561	}
2562
2563	void AMDGPUDAGToDAGISel::SelectInterpP1F16(SDNode *N) {
2564	if (Subtarget->getLDSBankCount() != 16) {
2565	// This is a single instruction with a pattern.
2566	SelectCode(N);
2567	return;
2568	}
2569
2570	SDLoc DL(N);
2571
2572	// This requires 2 instructions. It is possible to write a pattern to support
2573	// this, but the generated isel emitter doesn't correctly deal with multiple
2574	// output instructions using the same physical register input. The copy to m0
2575	// is incorrectly placed before the second instruction.
2576	//
2577	// TODO: Match source modifiers.
2578	//
2579	// def : Pat <
2580	// (int_amdgcn_interp_p1_f16
2581	// (VOP3Mods f32:$src0, i32:$src0_modifiers),
2582	// (i32 timm:$attrchan), (i32 timm:$attr),
2583	// (i1 timm:$high), M0),
2584	// (V_INTERP_P1LV_F16 $src0_modifiers, VGPR_32:$src0, timm:$attr,
2585	// timm:$attrchan, 0,
2586	// (V_INTERP_MOV_F32 2, timm:$attr, timm:$attrchan), timm:$high)> {
2587	// let Predicates = [has16BankLDS];
2588	// }
2589
2590	// 16 bank LDS
2591	SDValue ToM0 = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, AMDGPU::M0,
2592	N->getOperand(5), SDValue());
2593
2594	SDVTList VTs = CurDAG->getVTList(MVT::f32, MVT::Other);
2595
2596	SDNode *InterpMov =
2597	CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32, DL, VTs, {
2598	CurDAG->getTargetConstant(2, DL, MVT::i32), // P0
2599	N->getOperand(3), // Attr
2600	N->getOperand(2), // Attrchan
2601	ToM0.getValue(1) // In glue
2602	});
2603
2604	SDNode *InterpP1LV =
2605	CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16, DL, MVT::f32, {
2606	CurDAG->getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
2607	N->getOperand(1), // Src0
2608	N->getOperand(3), // Attr
2609	N->getOperand(2), // Attrchan
2610	CurDAG->getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
2611	SDValue(InterpMov, 0), // Src2 - holds two f16 values selected by high
2612	N->getOperand(4), // high
2613	CurDAG->getTargetConstant(0, DL, MVT::i1), // $clamp
2614	CurDAG->getTargetConstant(0, DL, MVT::i32), // $omod
2615	SDValue(InterpMov, 1)
2616	});
2617
2618	CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), SDValue(InterpP1LV, 0));
2619	}
2620
2621	void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
2622	unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2623	switch (IntrID) {
2624	case Intrinsic::amdgcn_ds_append:
2625	case Intrinsic::amdgcn_ds_consume: {
2626	if (N->getValueType(0) != MVT::i32)
2627	break;
2628	SelectDSAppendConsume(N, IntrID);
2629	return;
2630	}
2631	}
2632
2633	SelectCode(N);
2634	}
2635
2636	void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
2637	unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
2638	unsigned Opcode;
2639	switch (IntrID) {
2640	case Intrinsic::amdgcn_wqm:
2641	Opcode = AMDGPU::WQM;
2642	break;
2643	case Intrinsic::amdgcn_softwqm:
2644	Opcode = AMDGPU::SOFT_WQM;
2645	break;
2646	case Intrinsic::amdgcn_wwm:
2647	Opcode = AMDGPU::WWM;
2648	break;
2649	case Intrinsic::amdgcn_interp_p1_f16:
2650	SelectInterpP1F16(N);
2651	return;
2652	default:
2653	SelectCode(N);
2654	return;
2655	}
2656
2657	SDValue Src = N->getOperand(1);
2658	CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src});
2659	}
2660
2661	void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
2662	unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2663	switch (IntrID) {
2664	case Intrinsic::amdgcn_ds_gws_init:
2665	case Intrinsic::amdgcn_ds_gws_barrier:
2666	case Intrinsic::amdgcn_ds_gws_sema_v:
2667	case Intrinsic::amdgcn_ds_gws_sema_br:
2668	case Intrinsic::amdgcn_ds_gws_sema_p:
2669	case Intrinsic::amdgcn_ds_gws_sema_release_all:
2670	SelectDS_GWS(N, IntrID);
2671	return;
2672	default:
2673	break;
2674	}
2675
2676	SelectCode(N);
2677	}
2678
2679	bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
2680	unsigned &Mods,
2681	bool AllowAbs) const {
2682	Mods = 0;
2683	Src = In;
2684
2685	if (Src.getOpcode() == ISD::FNEG) {
2686	Mods \|= SISrcMods::NEG;
2687	Src = Src.getOperand(0);
2688	}
2689
2690	if (AllowAbs && Src.getOpcode() == ISD::FABS) {
2691	Mods \|= SISrcMods::ABS;
2692	Src = Src.getOperand(0);
2693	}
2694
2695	return true;
2696	}
2697
2698	bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
2699	SDValue &SrcMods) const {
2700	unsigned Mods;
2701	if (SelectVOP3ModsImpl(In, Src, Mods)) {
2702	SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2703	return true;
2704	}
2705
2706	return false;
2707	}
2708
2709	bool AMDGPUDAGToDAGISel::SelectVOP3BMods(SDValue In, SDValue &Src,
2710	SDValue &SrcMods) const {
2711	unsigned Mods;
2712	if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) {
2713	SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2714	return true;
2715	}
2716
2717	return false;
2718	}
2719
2720	bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
2721	SDValue &SrcMods) const {
2722	SelectVOP3Mods(In, Src, SrcMods);
2723	return isNoNanSrc(Src);
2724	}
2725
2726	bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
2727	if (In.getOpcode() == ISD::FABS \|\| In.getOpcode() == ISD::FNEG)
2728	return false;
2729
2730	Src = In;
2731	return true;
2732	}
2733
2734	bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
2735	SDValue &SrcMods, SDValue &Clamp,
2736	SDValue &Omod) const {
2737	SDLoc DL(In);
2738	Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2739	Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2740
2741	return SelectVOP3Mods(In, Src, SrcMods);
2742	}
2743
2744	bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(SDValue In, SDValue &Src,
2745	SDValue &SrcMods, SDValue &Clamp,
2746	SDValue &Omod) const {
2747	SDLoc DL(In);
2748	Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2749	Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2750
2751	return SelectVOP3BMods(In, Src, SrcMods);
2752	}
2753
2754	bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
2755	SDValue &Clamp, SDValue &Omod) const {
2756	Src = In;
2757
2758	SDLoc DL(In);
2759	Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2760	Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2761
2762	return true;
2763	}
2764
2765	bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
2766	SDValue &SrcMods) const {
2767	unsigned Mods = 0;
2768	Src = In;
2769
2770	if (Src.getOpcode() == ISD::FNEG) {
2771	Mods ^= (SISrcMods::NEG \| SISrcMods::NEG_HI);
2772	Src = Src.getOperand(0);
2773	}
2774
2775	if (Src.getOpcode() == ISD::BUILD_VECTOR) {
2776	unsigned VecMods = Mods;
2777
2778	SDValue Lo = stripBitcast(Src.getOperand(0));
2779	SDValue Hi = stripBitcast(Src.getOperand(1));
2780
2781	if (Lo.getOpcode() == ISD::FNEG) {
2782	Lo = stripBitcast(Lo.getOperand(0));
2783	Mods ^= SISrcMods::NEG;
2784	}
2785
2786	if (Hi.getOpcode() == ISD::FNEG) {
2787	Hi = stripBitcast(Hi.getOperand(0));
2788	Mods ^= SISrcMods::NEG_HI;
2789	}
2790
2791	if (isExtractHiElt(Lo, Lo))
2792	Mods \|= SISrcMods::OP_SEL_0;
2793
2794	if (isExtractHiElt(Hi, Hi))
2795	Mods \|= SISrcMods::OP_SEL_1;
2796
2797	unsigned VecSize = Src.getValueSizeInBits();
2798	Lo = stripExtractLoElt(Lo);
2799	Hi = stripExtractLoElt(Hi);
2800
2801	if (Lo.getValueSizeInBits() > VecSize) {
2802	Lo = CurDAG->getTargetExtractSubreg(
2803	(VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
2804	MVT::getIntegerVT(VecSize), Lo);
2805	}
2806
2807	if (Hi.getValueSizeInBits() > VecSize) {
2808	Hi = CurDAG->getTargetExtractSubreg(
2809	(VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
2810	MVT::getIntegerVT(VecSize), Hi);
2811	}
2812
2813	assert(Lo.getValueSizeInBits() <= VecSize &&((Lo.getValueSizeInBits() <= VecSize && Hi.getValueSizeInBits () <= VecSize) ? static_cast<void> (0) : __assert_fail ("Lo.getValueSizeInBits() <= VecSize && Hi.getValueSizeInBits() <= VecSize" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp" , 2814, __PRETTY_FUNCTION__))
2814	Hi.getValueSizeInBits() <= VecSize)((Lo.getValueSizeInBits() <= VecSize && Hi.getValueSizeInBits () <= VecSize) ? static_cast<void> (0) : __assert_fail ("Lo.getValueSizeInBits() <= VecSize && Hi.getValueSizeInBits() <= VecSize" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp" , 2814, __PRETTY_FUNCTION__));
2815
2816	if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
2817	// Really a scalar input. Just select from the low half of the register to
2818	// avoid packing.
2819
2820	if (VecSize == 32 \|\| VecSize == Lo.getValueSizeInBits()) {
2821	Src = Lo;
2822	} else {
2823	assert(Lo.getValueSizeInBits() == 32 && VecSize == 64)((Lo.getValueSizeInBits() == 32 && VecSize == 64) ? static_cast <void> (0) : __assert_fail ("Lo.getValueSizeInBits() == 32 && VecSize == 64" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp" , 2823, __PRETTY_FUNCTION__));
2824
2825	SDLoc SL(In);
2826	SDValue Undef = SDValue(
2827	CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL,
2828	Lo.getValueType()), 0);
2829	auto RC = Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
2830	: AMDGPU::SReg_64RegClassID;
2831	const SDValue Ops[] = {
2832	CurDAG->getTargetConstant(RC, SL, MVT::i32),
2833	Lo, CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
2834	Undef, CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) };
2835
2836	Src = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL,
2837	Src.getValueType(), Ops), 0);
2838	}
2839	SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2840	return true;
2841	}
2842
2843	if (VecSize == 64 && Lo == Hi && isa<ConstantFPSDNode>(Lo)) {
2844	uint64_t Lit = cast<ConstantFPSDNode>(Lo)->getValueAPF()
2845	.bitcastToAPInt().getZExtValue();
2846	if (AMDGPU::isInlinableLiteral32(Lit, Subtarget->hasInv2PiInlineImm())) {
2847	Src = CurDAG->getTargetConstant(Lit, SDLoc(In), MVT::i64);;
2848	SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2849	return true;
2850	}
2851	}
2852
2853	Mods = VecMods;
2854	}
2855
2856	// Packed instructions do not have abs modifiers.
2857	Mods \|= SISrcMods::OP_SEL_1;
2858
2859	SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2860	return true;
2861	}
2862
2863	bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
2864	SDValue &SrcMods) const {
2865	Src = In;
2866	// FIXME: Handle op_sel
2867	SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2868	return true;
2869	}
2870
2871	bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
2872	SDValue &SrcMods) const {
2873	// FIXME: Handle op_sel
2874	return SelectVOP3Mods(In, Src, SrcMods);
2875	}
2876
2877	// The return value is not whether the match is possible (which it always is),
2878	// but whether or not it a conversion is really used.
2879	bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
2880	unsigned &Mods) const {
2881	Mods = 0;
2882	SelectVOP3ModsImpl(In, Src, Mods);
2883
2884	if (Src.getOpcode() == ISD::FP_EXTEND) {
2885	Src = Src.getOperand(0);
2886	assert(Src.getValueType() == MVT::f16)((Src.getValueType() == MVT::f16) ? static_cast<void> ( 0) : __assert_fail ("Src.getValueType() == MVT::f16", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp" , 2886, __PRETTY_FUNCTION__));
2887	Src = stripBitcast(Src);
2888
2889	// Be careful about folding modifiers if we already have an abs. fneg is
2890	// applied last, so we don't want to apply an earlier fneg.
2891	if ((Mods & SISrcMods::ABS) == 0) {
2892	unsigned ModsTmp;
2893	SelectVOP3ModsImpl(Src, Src, ModsTmp);
2894
2895	if ((ModsTmp & SISrcMods::NEG) != 0)
2896	Mods ^= SISrcMods::NEG;
2897
2898	if ((ModsTmp & SISrcMods::ABS) != 0)
2899	Mods \|= SISrcMods::ABS;
2900	}
2901
2902	// op_sel/op_sel_hi decide the source type and source.
2903	// If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
2904	// If the sources's op_sel is set, it picks the high half of the source
2905	// register.
2906
2907	Mods \|= SISrcMods::OP_SEL_1;
2908	if (isExtractHiElt(Src, Src)) {
2909	Mods \|= SISrcMods::OP_SEL_0;
2910
2911	// TODO: Should we try to look for neg/abs here?
2912	}
2913
2914	return true;
2915	}
2916
2917	return false;
2918	}
2919
2920	bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
2921	SDValue &SrcMods) const {
2922	unsigned Mods = 0;
2923	SelectVOP3PMadMixModsImpl(In, Src, Mods);
2924	SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2925	return true;
2926	}
2927
2928	SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
2929	if (In.isUndef())
2930	return CurDAG->getUNDEF(MVT::i32);
2931
2932	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
2933	SDLoc SL(In);
2934	return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
2935	}
2936
2937	if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
2938	SDLoc SL(In);
2939	return CurDAG->getConstant(
2940	C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
2941	}
2942
2943	SDValue Src;
2944	if (isExtractHiElt(In, Src))
2945	return Src;
2946
2947	return SDValue();
2948	}
2949
2950	bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
2951	assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn)((CurDAG->getTarget().getTargetTriple().getArch() == Triple ::amdgcn) ? static_cast<void> (0) : __assert_fail ("CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp" , 2951, __PRETTY_FUNCTION__));
2952
2953	const SIRegisterInfo *SIRI =
2954	static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
2955	const SIInstrInfo * SII =
2956	static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
2957
2958	unsigned Limit = 0;
2959	bool AllUsesAcceptSReg = true;
2960	for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
2961	Limit < 10 && U != E; ++U, ++Limit) {
2962	const TargetRegisterClass RC = getOperandRegClass(U, U.getOperandNo());
2963
2964	// If the register class is unknown, it could be an unknown
2965	// register class that needs to be an SGPR, e.g. an inline asm
2966	// constraint
2967	if (!RC \|\| SIRI->isSGPRClass(RC))
2968	return false;
2969
2970	if (RC != &AMDGPU::VS_32RegClass) {
2971	AllUsesAcceptSReg = false;
2972	SDNode * User = *U;
2973	if (User->isMachineOpcode()) {
2974	unsigned Opc = User->getMachineOpcode();
2975	MCInstrDesc Desc = SII->get(Opc);
2976	if (Desc.isCommutable()) {
2977	unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
2978	unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
2979	if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
2980	unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
2981	const TargetRegisterClass CommutedRC = getOperandRegClass(U, CommutedOpNo);
2982	if (CommutedRC == &AMDGPU::VS_32RegClass)
2983	AllUsesAcceptSReg = true;
2984	}
2985	}
2986	}
2987	// If "AllUsesAcceptSReg == false" so far we haven't suceeded
2988	// commuting current user. This means have at least one use
2989	// that strictly require VGPR. Thus, we will not attempt to commute
2990	// other user instructions.
2991	if (!AllUsesAcceptSReg)
2992	break;
2993	}
2994	}
2995	return !AllUsesAcceptSReg && (Limit < 10);
2996	}
2997
2998	bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
2999	auto Ld = cast<LoadSDNode>(N);
3000
3001	return Ld->getAlignment() >= 4 &&
3002	(
3003	(
3004	(
3005	Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS \|\|
3006	Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
3007	)
3008	&&
3009	!N->isDivergent()
3010	)
3011	\|\|
3012	(
3013	Subtarget->getScalarizeGlobalBehavior() &&
3014	Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
3015	Ld->isSimple() &&
3016	!N->isDivergent() &&
3017	static_cast<const SITargetLowering *>(
3018	getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)
3019	)
3020	);
3021	}
3022
3023	void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
3024	const AMDGPUTargetLowering& Lowering =
3025	static_cast<const AMDGPUTargetLowering>(getTargetLowering());
3026	bool IsModified = false;
3027	do {
3028	IsModified = false;
3029
3030	// Go over all selected nodes and try to fold them a bit more
3031	SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
3032	while (Position != CurDAG->allnodes_end()) {
3033	SDNode Node = &Position++;
3034	MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
3035	if (!MachineNode)
3036	continue;
3037
3038	SDNode ResNode = Lowering.PostISelFolding(MachineNode, CurDAG);
3039	if (ResNode != Node) {
3040	if (ResNode)
3041	ReplaceUses(Node, ResNode);
3042	IsModified = true;
3043	}
3044	}
3045	CurDAG->RemoveDeadNodes();
3046	} while (IsModified);
3047	}
3048
3049	bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
3050	Subtarget = &MF.getSubtarget<R600Subtarget>();
3051	return SelectionDAGISel::runOnMachineFunction(MF);
3052	}
3053
3054	bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
3055	if (!N->readMem())
3056	return false;
3057	if (CbId == -1)
3058	return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS \|\|
3059	N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
3060
3061	return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
3062	}
3063
3064	bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
3065	SDValue& IntPtr) {
3066	if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
3067	IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
3068	true);
3069	return true;
3070	}
3071	return false;
3072	}
3073
3074	bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
3075	SDValue& BaseReg, SDValue &Offset) {
3076	if (!isa<ConstantSDNode>(Addr)) {
3077	BaseReg = Addr;
3078	Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
3079	return true;
3080	}
3081	return false;
3082	}
3083
3084	void R600DAGToDAGISel::Select(SDNode *N) {
3085	unsigned int Opc = N->getOpcode();
3086	if (N->isMachineOpcode()) {
3087	N->setNodeId(-1);
3088	return; // Already selected.
3089	}
3090
3091	switch (Opc) {
3092	default: break;
3093	case AMDGPUISD::BUILD_VERTICAL_VECTOR:
3094	case ISD::SCALAR_TO_VECTOR:
3095	case ISD::BUILD_VECTOR: {
3096	EVT VT = N->getValueType(0);
3097	unsigned NumVectorElts = VT.getVectorNumElements();
3098	unsigned RegClassID;
3099	// BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
3100	// that adds a 128 bits reg copy when going through TwoAddressInstructions
3101	// pass. We want to avoid 128 bits copies as much as possible because they
3102	// can't be bundled by our scheduler.
3103	switch(NumVectorElts) {
3104	case 2: RegClassID = R600::R600_Reg64RegClassID; break;
3105	case 4:
3106	if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
3107	RegClassID = R600::R600_Reg128VerticalRegClassID;
3108	else
3109	RegClassID = R600::R600_Reg128RegClassID;
3110	break;
3111	default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR")::llvm::llvm_unreachable_internal("Do not know how to lower this BUILD_VECTOR" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp" , 3111);
3112	}
3113	SelectBuildVector(N, RegClassID);
3114	return;
3115	}
3116	}
3117
3118	SelectCode(N);
3119	}
3120
3121	bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
3122	SDValue &Offset) {
3123	ConstantSDNode *C;
3124	SDLoc DL(Addr);
3125
3126	if ((C = dyn_cast<ConstantSDNode>(Addr))) {
3127	Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
3128	Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
3129	} else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
3130	(C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
3131	Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
3132	Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
3133	} else if ((Addr.getOpcode() == ISD::ADD \|\| Addr.getOpcode() == ISD::OR) &&
3134	(C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
3135	Base = Addr.getOperand(0);
3136	Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
3137	} else {
3138	Base = Addr;
3139	Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
3140	}
3141
3142	return true;
3143	}
3144
3145	bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
3146	SDValue &Offset) {
3147	ConstantSDNode *IMMOffset;
3148
3149	if (Addr.getOpcode() == ISD::ADD
3150	&& (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
3151	&& isInt<16>(IMMOffset->getZExtValue())) {
3152
3153	Base = Addr.getOperand(0);
3154	Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
3155	MVT::i32);
3156	return true;
3157	// If the pointer address is constant, we can move it to the offset field.
3158	} else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
3159	&& isInt<16>(IMMOffset->getZExtValue())) {
3160	Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
3161	SDLoc(CurDAG->getEntryNode()),
3162	R600::ZERO, MVT::i32);
3163	Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
3164	MVT::i32);
3165	return true;
3166	}
3167
3168	// Default case, no offset
3169	Base = Addr;
3170	Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
3171	return true;
3172	}