/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Bug Summary

File:	llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Warning:	line 17270, column 11 Value stored to 'StartAddress' during its initialization is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name DAGCombiner.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/build-llvm/lib/CodeGen/SelectionDAG -resource-dir /usr/lib/llvm-13/lib/clang/13.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/build-llvm/lib/CodeGen/SelectionDAG -I /build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG -I /build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/build-llvm/include -I /build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-13/lib/clang/13.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/build-llvm/lib/CodeGen/SelectionDAG -fdebug-prefix-map=/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd=. -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2021-03-02-022427-27315-1 -x c++ /build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

1	//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10	// both before and after the DAG is legalized.
11	//
12	// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13	// primarily intended to handle simplification opportunities that are implicit
14	// in the LLVM IR and exposed by the various codegen lowering phases.
15	//
16	//===----------------------------------------------------------------------===//
17
18	#include "llvm/ADT/APFloat.h"
19	#include "llvm/ADT/APInt.h"
20	#include "llvm/ADT/ArrayRef.h"
21	#include "llvm/ADT/DenseMap.h"
22	#include "llvm/ADT/IntervalMap.h"
23	#include "llvm/ADT/None.h"
24	#include "llvm/ADT/Optional.h"
25	#include "llvm/ADT/STLExtras.h"
26	#include "llvm/ADT/SetVector.h"
27	#include "llvm/ADT/SmallBitVector.h"
28	#include "llvm/ADT/SmallPtrSet.h"
29	#include "llvm/ADT/SmallSet.h"
30	#include "llvm/ADT/SmallVector.h"
31	#include "llvm/ADT/Statistic.h"
32	#include "llvm/Analysis/AliasAnalysis.h"
33	#include "llvm/Analysis/MemoryLocation.h"
34	#include "llvm/Analysis/TargetLibraryInfo.h"
35	#include "llvm/Analysis/VectorUtils.h"
36	#include "llvm/CodeGen/DAGCombine.h"
37	#include "llvm/CodeGen/ISDOpcodes.h"
38	#include "llvm/CodeGen/MachineFrameInfo.h"
39	#include "llvm/CodeGen/MachineFunction.h"
40	#include "llvm/CodeGen/MachineMemOperand.h"
41	#include "llvm/CodeGen/RuntimeLibcalls.h"
42	#include "llvm/CodeGen/SelectionDAG.h"
43	#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
44	#include "llvm/CodeGen/SelectionDAGNodes.h"
45	#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
46	#include "llvm/CodeGen/TargetLowering.h"
47	#include "llvm/CodeGen/TargetRegisterInfo.h"
48	#include "llvm/CodeGen/TargetSubtargetInfo.h"
49	#include "llvm/CodeGen/ValueTypes.h"
50	#include "llvm/IR/Attributes.h"
51	#include "llvm/IR/Constant.h"
52	#include "llvm/IR/DataLayout.h"
53	#include "llvm/IR/DerivedTypes.h"
54	#include "llvm/IR/Function.h"
55	#include "llvm/IR/LLVMContext.h"
56	#include "llvm/IR/Metadata.h"
57	#include "llvm/Support/Casting.h"
58	#include "llvm/Support/CodeGen.h"
59	#include "llvm/Support/CommandLine.h"
60	#include "llvm/Support/Compiler.h"
61	#include "llvm/Support/Debug.h"
62	#include "llvm/Support/ErrorHandling.h"
63	#include "llvm/Support/KnownBits.h"
64	#include "llvm/Support/MachineValueType.h"
65	#include "llvm/Support/MathExtras.h"
66	#include "llvm/Support/raw_ostream.h"
67	#include "llvm/Target/TargetMachine.h"
68	#include "llvm/Target/TargetOptions.h"
69	#include <algorithm>
70	#include <cassert>
71	#include <cstdint>
72	#include <functional>
73	#include <iterator>
74	#include <string>
75	#include <tuple>
76	#include <utility>
77
78	using namespace llvm;
79
80	#define DEBUG_TYPE"dagcombine" "dagcombine"
81
82	STATISTIC(NodesCombined , "Number of dag nodes combined")static llvm::Statistic NodesCombined = {"dagcombine", "NodesCombined" , "Number of dag nodes combined"};
83	STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created")static llvm::Statistic PreIndexedNodes = {"dagcombine", "PreIndexedNodes" , "Number of pre-indexed nodes created"};
84	STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created")static llvm::Statistic PostIndexedNodes = {"dagcombine", "PostIndexedNodes" , "Number of post-indexed nodes created"};
85	STATISTIC(OpsNarrowed , "Number of load/op/store narrowed")static llvm::Statistic OpsNarrowed = {"dagcombine", "OpsNarrowed" , "Number of load/op/store narrowed"};
86	STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int")static llvm::Statistic LdStFP2Int = {"dagcombine", "LdStFP2Int" , "Number of fp load/store pairs transformed to int"};
87	STATISTIC(SlicedLoads, "Number of load sliced")static llvm::Statistic SlicedLoads = {"dagcombine", "SlicedLoads" , "Number of load sliced"};
88	STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops")static llvm::Statistic NumFPLogicOpsConv = {"dagcombine", "NumFPLogicOpsConv" , "Number of logic ops converted to fp ops"};
89
90	static cl::opt<bool>
91	CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
92	cl::desc("Enable DAG combiner's use of IR alias analysis"));
93
94	static cl::opt<bool>
95	UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
96	cl::desc("Enable DAG combiner's use of TBAA"));
97
98	#ifndef NDEBUG
99	static cl::opt<std::string>
100	CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
101	cl::desc("Only use DAG-combiner alias analysis in this"
102	" function"));
103	#endif
104
105	/// Hidden option to stress test load slicing, i.e., when this option
106	/// is enabled, load slicing bypasses most of its profitability guards.
107	static cl::opt<bool>
108	StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
109	cl::desc("Bypass the profitability model of load slicing"),
110	cl::init(false));
111
112	static cl::opt<bool>
113	MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
114	cl::desc("DAG combiner may split indexing from loads"));
115
116	static cl::opt<bool>
117	EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
118	cl::desc("DAG combiner enable merging multiple stores "
119	"into a wider store"));
120
121	static cl::opt<unsigned> TokenFactorInlineLimit(
122	"combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
123	cl::desc("Limit the number of operands to inline for Token Factors"));
124
125	static cl::opt<unsigned> StoreMergeDependenceLimit(
126	"combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
127	cl::desc("Limit the number of times for the same StoreNode and RootNode "
128	"to bail out in store merging dependence check"));
129
130	static cl::opt<bool> EnableReduceLoadOpStoreWidth(
131	"combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
132	cl::desc("DAG cominber enable reducing the width of load/op/store "
133	"sequence"));
134
135	static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
136	"combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
137	cl::desc("DAG cominber enable load/<replace bytes>/store with "
138	"a narrower store"));
139
140	namespace {
141
142	class DAGCombiner {
143	SelectionDAG &DAG;
144	const TargetLowering &TLI;
145	const SelectionDAGTargetInfo *STI;
146	CombineLevel Level;
147	CodeGenOpt::Level OptLevel;
148	bool LegalDAG = false;
149	bool LegalOperations = false;
150	bool LegalTypes = false;
151	bool ForCodeSize;
152	bool DisableGenericCombines;
153
154	/// Worklist of all of the nodes that need to be simplified.
155	///
156	/// This must behave as a stack -- new nodes to process are pushed onto the
157	/// back and when processing we pop off of the back.
158	///
159	/// The worklist will not contain duplicates but may contain null entries
160	/// due to nodes being deleted from the underlying DAG.
161	SmallVector<SDNode *, 64> Worklist;
162
163	/// Mapping from an SDNode to its position on the worklist.
164	///
165	/// This is used to find and remove nodes from the worklist (by nulling
166	/// them) when they are deleted from the underlying DAG. It relies on
167	/// stable indices of nodes within the worklist.
168	DenseMap<SDNode *, unsigned> WorklistMap;
169	/// This records all nodes attempted to add to the worklist since we
170	/// considered a new worklist entry. As we keep do not add duplicate nodes
171	/// in the worklist, this is different from the tail of the worklist.
172	SmallSetVector<SDNode *, 32> PruningList;
173
174	/// Set of nodes which have been combined (at least once).
175	///
176	/// This is used to allow us to reliably add any operands of a DAG node
177	/// which have not yet been combined to the worklist.
178	SmallPtrSet<SDNode *, 32> CombinedNodes;
179
180	/// Map from candidate StoreNode to the pair of RootNode and count.
181	/// The count is used to track how many times we have seen the StoreNode
182	/// with the same RootNode bail out in dependence check. If we have seen
183	/// the bail out for the same pair many times over a limit, we won't
184	/// consider the StoreNode with the same RootNode as store merging
185	/// candidate again.
186	DenseMap<SDNode , std::pair<SDNode , unsigned>> StoreRootCountMap;
187
188	// AA - Used for DAG load/store alias analysis.
189	AliasAnalysis *AA;
190
191	/// When an instruction is simplified, add all users of the instruction to
192	/// the work lists because they might get more simplified now.
193	void AddUsersToWorklist(SDNode *N) {
194	for (SDNode *Node : N->uses())
195	AddToWorklist(Node);
196	}
197
198	/// Convenient shorthand to add a node and all of its user to the worklist.
199	void AddToWorklistWithUsers(SDNode *N) {
200	AddUsersToWorklist(N);
201	AddToWorklist(N);
202	}
203
204	// Prune potentially dangling nodes. This is called after
205	// any visit to a node, but should also be called during a visit after any
206	// failed combine which may have created a DAG node.
207	void clearAddedDanglingWorklistEntries() {
208	// Check any nodes added to the worklist to see if they are prunable.
209	while (!PruningList.empty()) {
210	auto *N = PruningList.pop_back_val();
211	if (N->use_empty())
212	recursivelyDeleteUnusedNodes(N);
213	}
214	}
215
216	SDNode *getNextWorklistEntry() {
217	// Before we do any work, remove nodes that are not in use.
218	clearAddedDanglingWorklistEntries();
219	SDNode *N = nullptr;
220	// The Worklist holds the SDNodes in order, but it may contain null
221	// entries.
222	while (!N && !Worklist.empty()) {
223	N = Worklist.pop_back_val();
224	}
225
226	if (N) {
227	bool GoodWorklistEntry = WorklistMap.erase(N);
228	(void)GoodWorklistEntry;
229	assert(GoodWorklistEntry &&((GoodWorklistEntry && "Found a worklist entry without a corresponding map entry!" ) ? static_cast<void> (0) : __assert_fail ("GoodWorklistEntry && \"Found a worklist entry without a corresponding map entry!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 230, __PRETTY_FUNCTION__))
230	"Found a worklist entry without a corresponding map entry!")((GoodWorklistEntry && "Found a worklist entry without a corresponding map entry!" ) ? static_cast<void> (0) : __assert_fail ("GoodWorklistEntry && \"Found a worklist entry without a corresponding map entry!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 230, __PRETTY_FUNCTION__));
231	}
232	return N;
233	}
234
235	/// Call the node-specific routine that folds each particular type of node.
236	SDValue visit(SDNode *N);
237
238	public:
239	DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
240	: DAG(D), TLI(D.getTargetLoweringInfo()),
241	STI(D.getSubtarget().getSelectionDAGInfo()),
242	Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) {
243	ForCodeSize = DAG.shouldOptForSize();
244	DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
245
246	MaximumLegalStoreInBits = 0;
247	// We use the minimum store size here, since that's all we can guarantee
248	// for the scalable vector types.
249	for (MVT VT : MVT::all_valuetypes())
250	if (EVT(VT).isSimple() && VT != MVT::Other &&
251	TLI.isTypeLegal(EVT(VT)) &&
252	VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
253	MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
254	}
255
256	void ConsiderForPruning(SDNode *N) {
257	// Mark this for potential pruning.
258	PruningList.insert(N);
259	}
260
261	/// Add to the worklist making sure its instance is at the back (next to be
262	/// processed.)
263	void AddToWorklist(SDNode *N) {
264	assert(N->getOpcode() != ISD::DELETED_NODE &&((N->getOpcode() != ISD::DELETED_NODE && "Deleted Node added to Worklist" ) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && \"Deleted Node added to Worklist\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 265, __PRETTY_FUNCTION__))
265	"Deleted Node added to Worklist")((N->getOpcode() != ISD::DELETED_NODE && "Deleted Node added to Worklist" ) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && \"Deleted Node added to Worklist\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 265, __PRETTY_FUNCTION__));
266
267	// Skip handle nodes as they can't usefully be combined and confuse the
268	// zero-use deletion strategy.
269	if (N->getOpcode() == ISD::HANDLENODE)
270	return;
271
272	ConsiderForPruning(N);
273
274	if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
275	Worklist.push_back(N);
276	}
277
278	/// Remove all instances of N from the worklist.
279	void removeFromWorklist(SDNode *N) {
280	CombinedNodes.erase(N);
281	PruningList.remove(N);
282	StoreRootCountMap.erase(N);
283
284	auto It = WorklistMap.find(N);
285	if (It == WorklistMap.end())
286	return; // Not in the worklist.
287
288	// Null out the entry rather than erasing it to avoid a linear operation.
289	Worklist[It->second] = nullptr;
290	WorklistMap.erase(It);
291	}
292
293	void deleteAndRecombine(SDNode *N);
294	bool recursivelyDeleteUnusedNodes(SDNode *N);
295
296	/// Replaces all uses of the results of one DAG node with new values.
297	SDValue CombineTo(SDNode N, const SDValue To, unsigned NumTo,
298	bool AddTo = true);
299
300	/// Replaces all uses of the results of one DAG node with new values.
301	SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
302	return CombineTo(N, &Res, 1, AddTo);
303	}
304
305	/// Replaces all uses of the results of one DAG node with new values.
306	SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
307	bool AddTo = true) {
308	SDValue To[] = { Res0, Res1 };
309	return CombineTo(N, To, 2, AddTo);
310	}
311
312	void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
313
314	private:
315	unsigned MaximumLegalStoreInBits;
316
317	/// Check the specified integer node value to see if it can be simplified or
318	/// if things it uses can be simplified by bit propagation.
319	/// If so, return true.
320	bool SimplifyDemandedBits(SDValue Op) {
321	unsigned BitWidth = Op.getScalarValueSizeInBits();
322	APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
323	return SimplifyDemandedBits(Op, DemandedBits);
324	}
325
326	bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
327	TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
328	KnownBits Known;
329	if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
330	return false;
331
332	// Revisit the node.
333	AddToWorklist(Op.getNode());
334
335	CommitTargetLoweringOpt(TLO);
336	return true;
337	}
338
339	/// Check the specified vector node value to see if it can be simplified or
340	/// if things it uses can be simplified as it only uses some of the
341	/// elements. If so, return true.
342	bool SimplifyDemandedVectorElts(SDValue Op) {
343	// TODO: For now just pretend it cannot be simplified.
344	if (Op.getValueType().isScalableVector())
345	return false;
346
347	unsigned NumElts = Op.getValueType().getVectorNumElements();
348	APInt DemandedElts = APInt::getAllOnesValue(NumElts);
349	return SimplifyDemandedVectorElts(Op, DemandedElts);
350	}
351
352	bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
353	const APInt &DemandedElts,
354	bool AssumeSingleUse = false);
355	bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
356	bool AssumeSingleUse = false);
357
358	bool CombineToPreIndexedLoadStore(SDNode *N);
359	bool CombineToPostIndexedLoadStore(SDNode *N);
360	SDValue SplitIndexingFromLoad(LoadSDNode *LD);
361	bool SliceUpLoad(SDNode *N);
362
363	// Scalars have size 0 to distinguish from singleton vectors.
364	SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
365	bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
366	bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
367
368	/// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
369	/// load.
370	///
371	/// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
372	/// \param InVecVT type of the input vector to EVE with bitcasts resolved.
373	/// \param EltNo index of the vector element to load.
374	/// \param OriginalLoad load that EVE came from to be replaced.
375	/// \returns EVE on success SDValue() on failure.
376	SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
377	SDValue EltNo,
378	LoadSDNode *OriginalLoad);
379	void ReplaceLoadWithPromotedLoad(SDNode Load, SDNode ExtLoad);
380	SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
381	SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
382	SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
383	SDValue PromoteIntBinOp(SDValue Op);
384	SDValue PromoteIntShiftOp(SDValue Op);
385	SDValue PromoteExtend(SDValue Op);
386	bool PromoteLoad(SDValue Op);
387
388	/// Call the node-specific routine that knows how to fold each
389	/// particular type of node. If that doesn't do anything, try the
390	/// target-specific DAG combines.
391	SDValue combine(SDNode *N);
392
393	// Visitation implementation - Implement dag node combining for different
394	// node types. The semantics are as follows:
395	// Return Value:
396	// SDValue.getNode() == 0 - No change was made
397	// SDValue.getNode() == N - N was replaced, is dead and has been handled.
398	// otherwise - N should be replaced by the returned Operand.
399	//
400	SDValue visitTokenFactor(SDNode *N);
401	SDValue visitMERGE_VALUES(SDNode *N);
402	SDValue visitADD(SDNode *N);
403	SDValue visitADDLike(SDNode *N);
404	SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
405	SDValue visitSUB(SDNode *N);
406	SDValue visitADDSAT(SDNode *N);
407	SDValue visitSUBSAT(SDNode *N);
408	SDValue visitADDC(SDNode *N);
409	SDValue visitADDO(SDNode *N);
410	SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
411	SDValue visitSUBC(SDNode *N);
412	SDValue visitSUBO(SDNode *N);
413	SDValue visitADDE(SDNode *N);
414	SDValue visitADDCARRY(SDNode *N);
415	SDValue visitSADDO_CARRY(SDNode *N);
416	SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
417	SDValue visitSUBE(SDNode *N);
418	SDValue visitSUBCARRY(SDNode *N);
419	SDValue visitSSUBO_CARRY(SDNode *N);
420	SDValue visitMUL(SDNode *N);
421	SDValue visitMULFIX(SDNode *N);
422	SDValue useDivRem(SDNode *N);
423	SDValue visitSDIV(SDNode *N);
424	SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
425	SDValue visitUDIV(SDNode *N);
426	SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
427	SDValue visitREM(SDNode *N);
428	SDValue visitMULHU(SDNode *N);
429	SDValue visitMULHS(SDNode *N);
430	SDValue visitSMUL_LOHI(SDNode *N);
431	SDValue visitUMUL_LOHI(SDNode *N);
432	SDValue visitMULO(SDNode *N);
433	SDValue visitIMINMAX(SDNode *N);
434	SDValue visitAND(SDNode *N);
435	SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
436	SDValue visitOR(SDNode *N);
437	SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
438	SDValue visitXOR(SDNode *N);
439	SDValue SimplifyVBinOp(SDNode *N);
440	SDValue visitSHL(SDNode *N);
441	SDValue visitSRA(SDNode *N);
442	SDValue visitSRL(SDNode *N);
443	SDValue visitFunnelShift(SDNode *N);
444	SDValue visitRotate(SDNode *N);
445	SDValue visitABS(SDNode *N);
446	SDValue visitBSWAP(SDNode *N);
447	SDValue visitBITREVERSE(SDNode *N);
448	SDValue visitCTLZ(SDNode *N);
449	SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
450	SDValue visitCTTZ(SDNode *N);
451	SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
452	SDValue visitCTPOP(SDNode *N);
453	SDValue visitSELECT(SDNode *N);
454	SDValue visitVSELECT(SDNode *N);
455	SDValue visitSELECT_CC(SDNode *N);
456	SDValue visitSETCC(SDNode *N);
457	SDValue visitSETCCCARRY(SDNode *N);
458	SDValue visitSIGN_EXTEND(SDNode *N);
459	SDValue visitZERO_EXTEND(SDNode *N);
460	SDValue visitANY_EXTEND(SDNode *N);
461	SDValue visitAssertExt(SDNode *N);
462	SDValue visitAssertAlign(SDNode *N);
463	SDValue visitSIGN_EXTEND_INREG(SDNode *N);
464	SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
465	SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
466	SDValue visitTRUNCATE(SDNode *N);
467	SDValue visitBITCAST(SDNode *N);
468	SDValue visitFREEZE(SDNode *N);
469	SDValue visitBUILD_PAIR(SDNode *N);
470	SDValue visitFADD(SDNode *N);
471	SDValue visitSTRICT_FADD(SDNode *N);
472	SDValue visitFSUB(SDNode *N);
473	SDValue visitFMUL(SDNode *N);
474	SDValue visitFMA(SDNode *N);
475	SDValue visitFDIV(SDNode *N);
476	SDValue visitFREM(SDNode *N);
477	SDValue visitFSQRT(SDNode *N);
478	SDValue visitFCOPYSIGN(SDNode *N);
479	SDValue visitFPOW(SDNode *N);
480	SDValue visitSINT_TO_FP(SDNode *N);
481	SDValue visitUINT_TO_FP(SDNode *N);
482	SDValue visitFP_TO_SINT(SDNode *N);
483	SDValue visitFP_TO_UINT(SDNode *N);
484	SDValue visitFP_ROUND(SDNode *N);
485	SDValue visitFP_EXTEND(SDNode *N);
486	SDValue visitFNEG(SDNode *N);
487	SDValue visitFABS(SDNode *N);
488	SDValue visitFCEIL(SDNode *N);
489	SDValue visitFTRUNC(SDNode *N);
490	SDValue visitFFLOOR(SDNode *N);
491	SDValue visitFMINNUM(SDNode *N);
492	SDValue visitFMAXNUM(SDNode *N);
493	SDValue visitFMINIMUM(SDNode *N);
494	SDValue visitFMAXIMUM(SDNode *N);
495	SDValue visitBRCOND(SDNode *N);
496	SDValue visitBR_CC(SDNode *N);
497	SDValue visitLOAD(SDNode *N);
498
499	SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
500	SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
501
502	SDValue visitSTORE(SDNode *N);
503	SDValue visitLIFETIME_END(SDNode *N);
504	SDValue visitINSERT_VECTOR_ELT(SDNode *N);
505	SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
506	SDValue visitBUILD_VECTOR(SDNode *N);
507	SDValue visitCONCAT_VECTORS(SDNode *N);
508	SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
509	SDValue visitVECTOR_SHUFFLE(SDNode *N);
510	SDValue visitSCALAR_TO_VECTOR(SDNode *N);
511	SDValue visitINSERT_SUBVECTOR(SDNode *N);
512	SDValue visitMLOAD(SDNode *N);
513	SDValue visitMSTORE(SDNode *N);
514	SDValue visitMGATHER(SDNode *N);
515	SDValue visitMSCATTER(SDNode *N);
516	SDValue visitFP_TO_FP16(SDNode *N);
517	SDValue visitFP16_TO_FP(SDNode *N);
518	SDValue visitVECREDUCE(SDNode *N);
519
520	SDValue visitFADDForFMACombine(SDNode *N);
521	SDValue visitFSUBForFMACombine(SDNode *N);
522	SDValue visitFMULForFMADistributiveCombine(SDNode *N);
523
524	SDValue XformToShuffleWithZero(SDNode *N);
525	bool reassociationCanBreakAddressingModePattern(unsigned Opc,
526	const SDLoc &DL, SDValue N0,
527	SDValue N1);
528	SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
529	SDValue N1);
530	SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
531	SDValue N1, SDNodeFlags Flags);
532
533	SDValue visitShiftByConstant(SDNode *N);
534
535	SDValue foldSelectOfConstants(SDNode *N);
536	SDValue foldVSelectOfConstants(SDNode *N);
537	SDValue foldBinOpIntoSelect(SDNode *BO);
538	bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
539	SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
540	SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
541	SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
542	SDValue N2, SDValue N3, ISD::CondCode CC,
543	bool NotExtCompare = false);
544	SDValue convertSelectOfFPConstantsToLoadOffset(
545	const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
546	ISD::CondCode CC);
547	SDValue foldSignChangeInBitcast(SDNode *N);
548	SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
549	SDValue N2, SDValue N3, ISD::CondCode CC);
550	SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
551	const SDLoc &DL);
552	SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
553	SDValue unfoldMaskedMerge(SDNode *N);
554	SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
555	SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
556	const SDLoc &DL, bool foldBooleans);
557	SDValue rebuildSetCC(SDValue N);
558
559	bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
560	SDValue &CC, bool MatchStrict = false) const;
561	bool isOneUseSetCC(SDValue N) const;
562
563	SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
564	unsigned HiOp);
565	SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
566	SDValue CombineExtLoad(SDNode *N);
567	SDValue CombineZExtLogicopShiftLoad(SDNode *N);
568	SDValue combineRepeatedFPDivisors(SDNode *N);
569	SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
570	SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
571	SDValue BuildSDIV(SDNode *N);
572	SDValue BuildSDIVPow2(SDNode *N);
573	SDValue BuildUDIV(SDNode *N);
574	SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
575	SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
576	SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
577	SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
578	SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
579	SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
580	SDNodeFlags Flags, bool Reciprocal);
581	SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
582	SDNodeFlags Flags, bool Reciprocal);
583	SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
584	bool DemandHighBits = true);
585	SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
586	SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
587	SDValue InnerPos, SDValue InnerNeg,
588	unsigned PosOpcode, unsigned NegOpcode,
589	const SDLoc &DL);
590	SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
591	SDValue InnerPos, SDValue InnerNeg,
592	unsigned PosOpcode, unsigned NegOpcode,
593	const SDLoc &DL);
594	SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
595	SDValue MatchLoadCombine(SDNode *N);
596	SDValue mergeTruncStores(StoreSDNode *N);
597	SDValue ReduceLoadWidth(SDNode *N);
598	SDValue ReduceLoadOpStoreWidth(SDNode *N);
599	SDValue splitMergedValStore(StoreSDNode *ST);
600	SDValue TransformFPLoadStorePair(SDNode *N);
601	SDValue convertBuildVecZextToZext(SDNode *N);
602	SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
603	SDValue reduceBuildVecTruncToBitCast(SDNode *N);
604	SDValue reduceBuildVecToShuffle(SDNode *N);
605	SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
606	ArrayRef<int> VectorMask, SDValue VecIn1,
607	SDValue VecIn2, unsigned LeftIdx,
608	bool DidSplitVec);
609	SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
610
611	/// Walk up chain skipping non-aliasing memory nodes,
612	/// looking for aliasing nodes and adding them to the Aliases vector.
613	void GatherAllAliases(SDNode *N, SDValue OriginalChain,
614	SmallVectorImpl<SDValue> &Aliases);
615
616	/// Return true if there is any possibility that the two addresses overlap.
617	bool isAlias(SDNode Op0, SDNode Op1) const;
618
619	/// Walk up chain skipping non-aliasing memory nodes, looking for a better
620	/// chain (aliasing node.)
621	SDValue FindBetterChain(SDNode *N, SDValue Chain);
622
623	/// Try to replace a store and any possibly adjacent stores on
624	/// consecutive chains with better chains. Return true only if St is
625	/// replaced.
626	///
627	/// Notice that other chains may still be replaced even if the function
628	/// returns false.
629	bool findBetterNeighborChains(StoreSDNode *St);
630
631	// Helper for findBetterNeighborChains. Walk up store chain add additional
632	// chained stores that do not overlap and can be parallelized.
633	bool parallelizeChainedStores(StoreSDNode *St);
634
635	/// Holds a pointer to an LSBaseSDNode as well as information on where it
636	/// is located in a sequence of memory operations connected by a chain.
637	struct MemOpLink {
638	// Ptr to the mem node.
639	LSBaseSDNode *MemNode;
640
641	// Offset from the base ptr.
642	int64_t OffsetFromBase;
643
644	MemOpLink(LSBaseSDNode *N, int64_t Offset)
645	: MemNode(N), OffsetFromBase(Offset) {}
646	};
647
648	// Classify the origin of a stored value.
649	enum class StoreSource { Unknown, Constant, Extract, Load };
650	StoreSource getStoreSource(SDValue StoreVal) {
651	switch (StoreVal.getOpcode()) {
652	case ISD::Constant:
653	case ISD::ConstantFP:
654	return StoreSource::Constant;
655	case ISD::EXTRACT_VECTOR_ELT:
656	case ISD::EXTRACT_SUBVECTOR:
657	return StoreSource::Extract;
658	case ISD::LOAD:
659	return StoreSource::Load;
660	default:
661	return StoreSource::Unknown;
662	}
663	}
664
665	/// This is a helper function for visitMUL to check the profitability
666	/// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
667	/// MulNode is the original multiply, AddNode is (add x, c1),
668	/// and ConstNode is c2.
669	bool isMulAddWithConstProfitable(SDNode *MulNode,
670	SDValue &AddNode,
671	SDValue &ConstNode);
672
673	/// This is a helper function for visitAND and visitZERO_EXTEND. Returns
674	/// true if the (and (load x) c) pattern matches an extload. ExtVT returns
675	/// the type of the loaded value to be extended.
676	bool isAndLoadExtLoad(ConstantSDNode AndC, LoadSDNode LoadN,
677	EVT LoadResultTy, EVT &ExtVT);
678
679	/// Helper function to calculate whether the given Load/Store can have its
680	/// width reduced to ExtVT.
681	bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
682	EVT &MemVT, unsigned ShAmt = 0);
683
684	/// Used by BackwardsPropagateMask to find suitable loads.
685	bool SearchForAndLoads(SDNode N, SmallVectorImpl<LoadSDNode> &Loads,
686	SmallPtrSetImpl<SDNode*> &NodesWithConsts,
687	ConstantSDNode Mask, SDNode &NodeToMask);
688	/// Attempt to propagate a given AND node back to load leaves so that they
689	/// can be combined into narrow loads.
690	bool BackwardsPropagateMask(SDNode *N);
691
692	/// Helper function for mergeConsecutiveStores which merges the component
693	/// store chains.
694	SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
695	unsigned NumStores);
696
697	/// This is a helper function for mergeConsecutiveStores. When the source
698	/// elements of the consecutive stores are all constants or all extracted
699	/// vector elements, try to merge them into one larger store introducing
700	/// bitcasts if necessary. \return True if a merged store was created.
701	bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
702	EVT MemVT, unsigned NumStores,
703	bool IsConstantSrc, bool UseVector,
704	bool UseTrunc);
705
706	/// This is a helper function for mergeConsecutiveStores. Stores that
707	/// potentially may be merged with St are placed in StoreNodes. RootNode is
708	/// a chain predecessor to all store candidates.
709	void getStoreMergeCandidates(StoreSDNode *St,
710	SmallVectorImpl<MemOpLink> &StoreNodes,
711	SDNode *&Root);
712
713	/// Helper function for mergeConsecutiveStores. Checks if candidate stores
714	/// have indirect dependency through their operands. RootNode is the
715	/// predecessor to all stores calculated by getStoreMergeCandidates and is
716	/// used to prune the dependency check. \return True if safe to merge.
717	bool checkMergeStoreCandidatesForDependencies(
718	SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
719	SDNode *RootNode);
720
721	/// This is a helper function for mergeConsecutiveStores. Given a list of
722	/// store candidates, find the first N that are consecutive in memory.
723	/// Returns 0 if there are not at least 2 consecutive stores to try merging.
724	unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
725	int64_t ElementSizeBytes) const;
726
727	/// This is a helper function for mergeConsecutiveStores. It is used for
728	/// store chains that are composed entirely of constant values.
729	bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
730	unsigned NumConsecutiveStores,
731	EVT MemVT, SDNode *Root, bool AllowVectors);
732
733	/// This is a helper function for mergeConsecutiveStores. It is used for
734	/// store chains that are composed entirely of extracted vector elements.
735	/// When extracting multiple vector elements, try to store them in one
736	/// vector store rather than a sequence of scalar stores.
737	bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
738	unsigned NumConsecutiveStores, EVT MemVT,
739	SDNode *Root);
740
741	/// This is a helper function for mergeConsecutiveStores. It is used for
742	/// store chains that are composed entirely of loaded values.
743	bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
744	unsigned NumConsecutiveStores, EVT MemVT,
745	SDNode *Root, bool AllowVectors,
746	bool IsNonTemporalStore, bool IsNonTemporalLoad);
747
748	/// Merge consecutive store operations into a wide store.
749	/// This optimization uses wide integers or vectors when possible.
750	/// \return true if stores were merged.
751	bool mergeConsecutiveStores(StoreSDNode *St);
752
753	/// Try to transform a truncation where C is a constant:
754	/// (trunc (and X, C)) -> (and (trunc X), (trunc C))
755	///
756	/// \p N needs to be a truncation and its first operand an AND. Other
757	/// requirements are checked by the function (e.g. that trunc is
758	/// single-use) and if missed an empty SDValue is returned.
759	SDValue distributeTruncateThroughAnd(SDNode *N);
760
761	/// Helper function to determine whether the target supports operation
762	/// given by \p Opcode for type \p VT, that is, whether the operation
763	/// is legal or custom before legalizing operations, and whether is
764	/// legal (but not custom) after legalization.
765	bool hasOperation(unsigned Opcode, EVT VT) {
766	return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
767	}
768
769	public:
770	/// Runs the dag combiner on all nodes in the work list
771	void Run(CombineLevel AtLevel);
772
773	SelectionDAG &getDAG() const { return DAG; }
774
775	/// Returns a type large enough to hold any valid shift amount - before type
776	/// legalization these can be huge.
777	EVT getShiftAmountTy(EVT LHSTy) {
778	assert(LHSTy.isInteger() && "Shift amount is not an integer type!")((LHSTy.isInteger() && "Shift amount is not an integer type!" ) ? static_cast<void> (0) : __assert_fail ("LHSTy.isInteger() && \"Shift amount is not an integer type!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 778, __PRETTY_FUNCTION__));
779	return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
780	}
781
782	/// This method returns true if we are running before type legalization or
783	/// if the specified VT is legal.
784	bool isTypeLegal(const EVT &VT) {
785	if (!LegalTypes) return true;
786	return TLI.isTypeLegal(VT);
787	}
788
789	/// Convenience wrapper around TargetLowering::getSetCCResultType
790	EVT getSetCCResultType(EVT VT) const {
791	return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
792	}
793
794	void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
795	SDValue OrigLoad, SDValue ExtLoad,
796	ISD::NodeType ExtType);
797	};
798
799	/// This class is a DAGUpdateListener that removes any deleted
800	/// nodes from the worklist.
801	class WorklistRemover : public SelectionDAG::DAGUpdateListener {
802	DAGCombiner &DC;
803
804	public:
805	explicit WorklistRemover(DAGCombiner &dc)
806	: SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
807
808	void NodeDeleted(SDNode N, SDNode E) override {
809	DC.removeFromWorklist(N);
810	}
811	};
812
813	class WorklistInserter : public SelectionDAG::DAGUpdateListener {
814	DAGCombiner &DC;
815
816	public:
817	explicit WorklistInserter(DAGCombiner &dc)
818	: SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
819
820	// FIXME: Ideally we could add N to the worklist, but this causes exponential
821	// compile time costs in large DAGs, e.g. Halide.
822	void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
823	};
824
825	} // end anonymous namespace
826
827	//===----------------------------------------------------------------------===//
828	// TargetLowering::DAGCombinerInfo implementation
829	//===----------------------------------------------------------------------===//
830
831	void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
832	((DAGCombiner*)DC)->AddToWorklist(N);
833	}
834
835	SDValue TargetLowering::DAGCombinerInfo::
836	CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
837	return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
838	}
839
840	SDValue TargetLowering::DAGCombinerInfo::
841	CombineTo(SDNode *N, SDValue Res, bool AddTo) {
842	return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
843	}
844
845	SDValue TargetLowering::DAGCombinerInfo::
846	CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
847	return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
848	}
849
850	bool TargetLowering::DAGCombinerInfo::
851	recursivelyDeleteUnusedNodes(SDNode *N) {
852	return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
853	}
854
855	void TargetLowering::DAGCombinerInfo::
856	CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
857	return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
858	}
859
860	//===----------------------------------------------------------------------===//
861	// Helper Functions
862	//===----------------------------------------------------------------------===//
863
864	void DAGCombiner::deleteAndRecombine(SDNode *N) {
865	removeFromWorklist(N);
866
867	// If the operands of this node are only used by the node, they will now be
868	// dead. Make sure to re-visit them and recursively delete dead nodes.
869	for (const SDValue &Op : N->ops())
870	// For an operand generating multiple values, one of the values may
871	// become dead allowing further simplification (e.g. split index
872	// arithmetic from an indexed load).
873	if (Op->hasOneUse() \|\| Op->getNumValues() > 1)
874	AddToWorklist(Op.getNode());
875
876	DAG.DeleteNode(N);
877	}
878
879	// APInts must be the same size for most operations, this helper
880	// function zero extends the shorter of the pair so that they match.
881	// We provide an Offset so that we can create bitwidths that won't overflow.
882	static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
883	unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
884	LHS = LHS.zextOrSelf(Bits);
885	RHS = RHS.zextOrSelf(Bits);
886	}
887
888	// Return true if this node is a setcc, or is a select_cc
889	// that selects between the target values used for true and false, making it
890	// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
891	// the appropriate nodes based on the type of node we are checking. This
892	// simplifies life a bit for the callers.
893	bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
894	SDValue &CC, bool MatchStrict) const {
895	if (N.getOpcode() == ISD::SETCC) {
896	LHS = N.getOperand(0);
897	RHS = N.getOperand(1);
898	CC = N.getOperand(2);
899	return true;
900	}
901
902	if (MatchStrict &&
903	(N.getOpcode() == ISD::STRICT_FSETCC \|\|
904	N.getOpcode() == ISD::STRICT_FSETCCS)) {
905	LHS = N.getOperand(1);
906	RHS = N.getOperand(2);
907	CC = N.getOperand(3);
908	return true;
909	}
910
911	if (N.getOpcode() != ISD::SELECT_CC \|\|
912	!TLI.isConstTrueVal(N.getOperand(2).getNode()) \|\|
913	!TLI.isConstFalseVal(N.getOperand(3).getNode()))
914	return false;
915
916	if (TLI.getBooleanContents(N.getValueType()) ==
917	TargetLowering::UndefinedBooleanContent)
918	return false;
919
920	LHS = N.getOperand(0);
921	RHS = N.getOperand(1);
922	CC = N.getOperand(4);
923	return true;
924	}
925
926	/// Return true if this is a SetCC-equivalent operation with only one use.
927	/// If this is true, it allows the users to invert the operation for free when
928	/// it is profitable to do so.
929	bool DAGCombiner::isOneUseSetCC(SDValue N) const {
930	SDValue N0, N1, N2;
931	if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
932	return true;
933	return false;
934	}
935
936	static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
937	if (!ScalarTy.isSimple())
938	return false;
939
940	uint64_t MaskForTy = 0ULL;
941	switch (ScalarTy.getSimpleVT().SimpleTy) {
942	case MVT::i8:
943	MaskForTy = 0xFFULL;
944	break;
945	case MVT::i16:
946	MaskForTy = 0xFFFFULL;
947	break;
948	case MVT::i32:
949	MaskForTy = 0xFFFFFFFFULL;
950	break;
951	default:
952	return false;
953	break;
954	}
955
956	APInt Val;
957	if (ISD::isConstantSplatVector(N, Val))
958	return Val.getLimitedValue() == MaskForTy;
959
960	return false;
961	}
962
963	// Determines if it is a constant integer or a splat/build vector of constant
964	// integers (and undefs).
965	// Do not permit build vector implicit truncation.
966	static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
967	if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
968	return !(Const->isOpaque() && NoOpaques);
969	if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
970	return false;
971	unsigned BitWidth = N.getScalarValueSizeInBits();
972	for (const SDValue &Op : N->op_values()) {
973	if (Op.isUndef())
974	continue;
975	ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
976	if (!Const \|\| Const->getAPIntValue().getBitWidth() != BitWidth \|\|
977	(Const->isOpaque() && NoOpaques))
978	return false;
979	}
980	return true;
981	}
982
983	// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
984	// undef's.
985	static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
986	if (V.getOpcode() != ISD::BUILD_VECTOR)
987	return false;
988	return isConstantOrConstantVector(V, NoOpaques) \|\|
989	ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
990	}
991
992	// Determine if this an indexed load with an opaque target constant index.
993	static bool canSplitIdx(LoadSDNode *LD) {
994	return MaySplitLoadIndex &&
995	(LD->getOperand(2).getOpcode() != ISD::TargetConstant \|\|
996	!cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
997	}
998
999	bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1000	const SDLoc &DL,
1001	SDValue N0,
1002	SDValue N1) {
1003	// Currently this only tries to ensure we don't undo the GEP splits done by
1004	// CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1005	// we check if the following transformation would be problematic:
1006	// (load/store (add, (add, x, offset1), offset2)) ->
1007	// (load/store (add, x, offset1+offset2)).
1008
1009	if (Opc != ISD::ADD \|\| N0.getOpcode() != ISD::ADD)
1010	return false;
1011
1012	if (N0.hasOneUse())
1013	return false;
1014
1015	auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1016	auto *C2 = dyn_cast<ConstantSDNode>(N1);
1017	if (!C1 \|\| !C2)
1018	return false;
1019
1020	const APInt &C1APIntVal = C1->getAPIntValue();
1021	const APInt &C2APIntVal = C2->getAPIntValue();
1022	if (C1APIntVal.getBitWidth() > 64 \|\| C2APIntVal.getBitWidth() > 64)
1023	return false;
1024
1025	const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1026	if (CombinedValueIntVal.getBitWidth() > 64)
1027	return false;
1028	const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1029
1030	for (SDNode *Node : N0->uses()) {
1031	auto LoadStore = dyn_cast<MemSDNode>(Node);
1032	if (LoadStore) {
1033	// Is x[offset2] already not a legal addressing mode? If so then
1034	// reassociating the constants breaks nothing (we test offset2 because
1035	// that's the one we hope to fold into the load or store).
1036	TargetLoweringBase::AddrMode AM;
1037	AM.HasBaseReg = true;
1038	AM.BaseOffs = C2APIntVal.getSExtValue();
1039	EVT VT = LoadStore->getMemoryVT();
1040	unsigned AS = LoadStore->getAddressSpace();
1041	Type AccessTy = VT.getTypeForEVT(DAG.getContext());
1042	if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1043	continue;
1044
1045	// Would x[offset1+offset2] still be a legal addressing mode?
1046	AM.BaseOffs = CombinedValue;
1047	if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1048	return true;
1049	}
1050	}
1051
1052	return false;
1053	}
1054
1055	// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1056	// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1057	SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1058	SDValue N0, SDValue N1) {
1059	EVT VT = N0.getValueType();
1060
1061	if (N0.getOpcode() != Opc)
1062	return SDValue();
1063
1064	if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
1065	if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1066	// Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1067	if (SDValue OpNode =
1068	DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1}))
1069	return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1070	return SDValue();
1071	}
1072	if (N0.hasOneUse()) {
1073	// Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1074	// iff (op x, c1) has one use
1075	SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1076	if (!OpNode.getNode())
1077	return SDValue();
1078	return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1079	}
1080	}
1081	return SDValue();
1082	}
1083
1084	// Try to reassociate commutative binops.
1085	SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1086	SDValue N1, SDNodeFlags Flags) {
1087	assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.")((TLI.isCommutativeBinOp(Opc) && "Operation not commutative." ) ? static_cast<void> (0) : __assert_fail ("TLI.isCommutativeBinOp(Opc) && \"Operation not commutative.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 1087, __PRETTY_FUNCTION__));
1088
1089	// Floating-point reassociation is not allowed without loose FP math.
1090	if (N0.getValueType().isFloatingPoint() \|\|
1091	N1.getValueType().isFloatingPoint())
1092	if (!Flags.hasAllowReassociation() \|\| !Flags.hasNoSignedZeros())
1093	return SDValue();
1094
1095	if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1096	return Combined;
1097	if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1098	return Combined;
1099	return SDValue();
1100	}
1101
1102	SDValue DAGCombiner::CombineTo(SDNode N, const SDValue To, unsigned NumTo,
1103	bool AddTo) {
1104	assert(N->getNumValues() == NumTo && "Broken CombineTo call!")((N->getNumValues() == NumTo && "Broken CombineTo call!" ) ? static_cast<void> (0) : __assert_fail ("N->getNumValues() == NumTo && \"Broken CombineTo call!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 1104, __PRETTY_FUNCTION__));
1105	++NodesCombined;
1106	LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << "\nReplacing.1 "; N->dump (&DAG); dbgs() << "\nWith: "; To[0].getNode()->dump (&DAG); dbgs() << " and " << NumTo - 1 << " other values\n"; } } while (false)
1107	To[0].getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << "\nReplacing.1 "; N->dump (&DAG); dbgs() << "\nWith: "; To[0].getNode()->dump (&DAG); dbgs() << " and " << NumTo - 1 << " other values\n"; } } while (false)
1108	dbgs() << " and " << NumTo - 1 << " other values\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << "\nReplacing.1 "; N->dump (&DAG); dbgs() << "\nWith: "; To[0].getNode()->dump (&DAG); dbgs() << " and " << NumTo - 1 << " other values\n"; } } while (false);
1109	for (unsigned i = 0, e = NumTo; i != e; ++i)
1110	assert((!To[i].getNode() \|\|(((!To[i].getNode() \|\| N->getValueType(i) == To[i].getValueType ()) && "Cannot combine value to value of different type!" ) ? static_cast<void> (0) : __assert_fail ("(!To[i].getNode() \|\| N->getValueType(i) == To[i].getValueType()) && \"Cannot combine value to value of different type!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 1112, __PRETTY_FUNCTION__))
1111	N->getValueType(i) == To[i].getValueType()) &&(((!To[i].getNode() \|\| N->getValueType(i) == To[i].getValueType ()) && "Cannot combine value to value of different type!" ) ? static_cast<void> (0) : __assert_fail ("(!To[i].getNode() \|\| N->getValueType(i) == To[i].getValueType()) && \"Cannot combine value to value of different type!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 1112, __PRETTY_FUNCTION__))
1112	"Cannot combine value to value of different type!")(((!To[i].getNode() \|\| N->getValueType(i) == To[i].getValueType ()) && "Cannot combine value to value of different type!" ) ? static_cast<void> (0) : __assert_fail ("(!To[i].getNode() \|\| N->getValueType(i) == To[i].getValueType()) && \"Cannot combine value to value of different type!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 1112, __PRETTY_FUNCTION__));
1113
1114	WorklistRemover DeadNodes(*this);
1115	DAG.ReplaceAllUsesWith(N, To);
1116	if (AddTo) {
1117	// Push the new nodes and any users onto the worklist
1118	for (unsigned i = 0, e = NumTo; i != e; ++i) {
1119	if (To[i].getNode()) {
1120	AddToWorklist(To[i].getNode());
1121	AddUsersToWorklist(To[i].getNode());
1122	}
1123	}
1124	}
1125
1126	// Finally, if the node is now dead, remove it from the graph. The node
1127	// may not be dead if the replacement process recursively simplified to
1128	// something else needing this node.
1129	if (N->use_empty())
1130	deleteAndRecombine(N);
1131	return SDValue(N, 0);
1132	}
1133
1134	void DAGCombiner::
1135	CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1136	// Replace the old value with the new one.
1137	++NodesCombined;
1138	LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << "\nReplacing.2 "; TLO.Old.getNode ()->dump(&DAG); dbgs() << "\nWith: "; TLO.New.getNode ()->dump(&DAG); dbgs() << '\n'; } } while (false )
1139	dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << "\nReplacing.2 "; TLO.Old.getNode ()->dump(&DAG); dbgs() << "\nWith: "; TLO.New.getNode ()->dump(&DAG); dbgs() << '\n'; } } while (false )
1140	dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << "\nReplacing.2 "; TLO.Old.getNode ()->dump(&DAG); dbgs() << "\nWith: "; TLO.New.getNode ()->dump(&DAG); dbgs() << '\n'; } } while (false );
1141
1142	// Replace all uses. If any nodes become isomorphic to other nodes and
1143	// are deleted, make sure to remove them from our worklist.
1144	WorklistRemover DeadNodes(*this);
1145	DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1146
1147	// Push the new node and any (possibly new) users onto the worklist.
1148	AddToWorklistWithUsers(TLO.New.getNode());
1149
1150	// Finally, if the node is now dead, remove it from the graph. The node
1151	// may not be dead if the replacement process recursively simplified to
1152	// something else needing this node.
1153	if (TLO.Old.getNode()->use_empty())
1154	deleteAndRecombine(TLO.Old.getNode());
1155	}
1156
1157	/// Check the specified integer node value to see if it can be simplified or if
1158	/// things it uses can be simplified by bit propagation. If so, return true.
1159	bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1160	const APInt &DemandedElts,
1161	bool AssumeSingleUse) {
1162	TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1163	KnownBits Known;
1164	if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1165	AssumeSingleUse))
1166	return false;
1167
1168	// Revisit the node.
1169	AddToWorklist(Op.getNode());
1170
1171	CommitTargetLoweringOpt(TLO);
1172	return true;
1173	}
1174
1175	/// Check the specified vector node value to see if it can be simplified or
1176	/// if things it uses can be simplified as it only uses some of the elements.
1177	/// If so, return true.
1178	bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1179	const APInt &DemandedElts,
1180	bool AssumeSingleUse) {
1181	TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1182	APInt KnownUndef, KnownZero;
1183	if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1184	TLO, 0, AssumeSingleUse))
1185	return false;
1186
1187	// Revisit the node.
1188	AddToWorklist(Op.getNode());
1189
1190	CommitTargetLoweringOpt(TLO);
1191	return true;
1192	}
1193
1194	void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode Load, SDNode ExtLoad) {
1195	SDLoc DL(Load);
1196	EVT VT = Load->getValueType(0);
1197	SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1198
1199	LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << "\nReplacing.9 "; Load-> dump(&DAG); dbgs() << "\nWith: "; Trunc.getNode()-> dump(&DAG); dbgs() << '\n'; } } while (false)
1200	Trunc.getNode()->dump(&DAG); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << "\nReplacing.9 "; Load-> dump(&DAG); dbgs() << "\nWith: "; Trunc.getNode()-> dump(&DAG); dbgs() << '\n'; } } while (false);
1201	WorklistRemover DeadNodes(*this);
1202	DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1203	DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1204	deleteAndRecombine(Load);
1205	AddToWorklist(Trunc.getNode());
1206	}
1207
1208	SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1209	Replace = false;
1210	SDLoc DL(Op);
1211	if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1212	LoadSDNode *LD = cast<LoadSDNode>(Op);
1213	EVT MemVT = LD->getMemoryVT();
1214	ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1215	: LD->getExtensionType();
1216	Replace = true;
1217	return DAG.getExtLoad(ExtType, DL, PVT,
1218	LD->getChain(), LD->getBasePtr(),
1219	MemVT, LD->getMemOperand());
1220	}
1221
1222	unsigned Opc = Op.getOpcode();
1223	switch (Opc) {
1224	default: break;
1225	case ISD::AssertSext:
1226	if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1227	return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1228	break;
1229	case ISD::AssertZext:
1230	if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1231	return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1232	break;
1233	case ISD::Constant: {
1234	unsigned ExtOpc =
1235	Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1236	return DAG.getNode(ExtOpc, DL, PVT, Op);
1237	}
1238	}
1239
1240	if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1241	return SDValue();
1242	return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1243	}
1244
1245	SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1246	if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1247	return SDValue();
1248	EVT OldVT = Op.getValueType();
1249	SDLoc DL(Op);
1250	bool Replace = false;
1251	SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1252	if (!NewOp.getNode())
1253	return SDValue();
1254	AddToWorklist(NewOp.getNode());
1255
1256	if (Replace)
1257	ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1258	return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1259	DAG.getValueType(OldVT));
1260	}
1261
1262	SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1263	EVT OldVT = Op.getValueType();
1264	SDLoc DL(Op);
1265	bool Replace = false;
1266	SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1267	if (!NewOp.getNode())
1268	return SDValue();
1269	AddToWorklist(NewOp.getNode());
1270
1271	if (Replace)
1272	ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1273	return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1274	}
1275
1276	/// Promote the specified integer binary operation if the target indicates it is
1277	/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1278	/// i32 since i16 instructions are longer.
1279	SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1280	if (!LegalOperations)
1281	return SDValue();
1282
1283	EVT VT = Op.getValueType();
1284	if (VT.isVector() \|\| !VT.isInteger())
1285	return SDValue();
1286
1287	// If operation type is 'undesirable', e.g. i16 on x86, consider
1288	// promoting it.
1289	unsigned Opc = Op.getOpcode();
1290	if (TLI.isTypeDesirableForOp(Opc, VT))
1291	return SDValue();
1292
1293	EVT PVT = VT;
1294	// Consult target whether it is a good idea to promote this operation and
1295	// what's the right type to promote it to.
1296	if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1297	assert(PVT != VT && "Don't know what type to promote to!")((PVT != VT && "Don't know what type to promote to!") ? static_cast<void> (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 1297, __PRETTY_FUNCTION__));
1298
1299	LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << "\nPromoting "; Op.getNode( )->dump(&DAG); } } while (false);
1300
1301	bool Replace0 = false;
1302	SDValue N0 = Op.getOperand(0);
1303	SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1304
1305	bool Replace1 = false;
1306	SDValue N1 = Op.getOperand(1);
1307	SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1308	SDLoc DL(Op);
1309
1310	SDValue RV =
1311	DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1312
1313	// We are always replacing N0/N1's use in N and only need additional
1314	// replacements if there are additional uses.
1315	// Note: We are checking uses of the nodes (SDNode) rather than values
1316	// (SDValue) here because the node may reference multiple values
1317	// (for example, the chain value of a load node).
1318	Replace0 &= !N0->hasOneUse();
1319	Replace1 &= (N0 != N1) && !N1->hasOneUse();
1320
1321	// Combine Op here so it is preserved past replacements.
1322	CombineTo(Op.getNode(), RV);
1323
1324	// If operands have a use ordering, make sure we deal with
1325	// predecessor first.
1326	if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1327	std::swap(N0, N1);
1328	std::swap(NN0, NN1);
1329	}
1330
1331	if (Replace0) {
1332	AddToWorklist(NN0.getNode());
1333	ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1334	}
1335	if (Replace1) {
1336	AddToWorklist(NN1.getNode());
1337	ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1338	}
1339	return Op;
1340	}
1341	return SDValue();
1342	}
1343
1344	/// Promote the specified integer shift operation if the target indicates it is
1345	/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1346	/// i32 since i16 instructions are longer.
1347	SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1348	if (!LegalOperations)
1349	return SDValue();
1350
1351	EVT VT = Op.getValueType();
1352	if (VT.isVector() \|\| !VT.isInteger())
1353	return SDValue();
1354
1355	// If operation type is 'undesirable', e.g. i16 on x86, consider
1356	// promoting it.
1357	unsigned Opc = Op.getOpcode();
1358	if (TLI.isTypeDesirableForOp(Opc, VT))
1359	return SDValue();
1360
1361	EVT PVT = VT;
1362	// Consult target whether it is a good idea to promote this operation and
1363	// what's the right type to promote it to.
1364	if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1365	assert(PVT != VT && "Don't know what type to promote to!")((PVT != VT && "Don't know what type to promote to!") ? static_cast<void> (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 1365, __PRETTY_FUNCTION__));
1366
1367	LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << "\nPromoting "; Op.getNode( )->dump(&DAG); } } while (false);
1368
1369	bool Replace = false;
1370	SDValue N0 = Op.getOperand(0);
1371	SDValue N1 = Op.getOperand(1);
1372	if (Opc == ISD::SRA)
1373	N0 = SExtPromoteOperand(N0, PVT);
1374	else if (Opc == ISD::SRL)
1375	N0 = ZExtPromoteOperand(N0, PVT);
1376	else
1377	N0 = PromoteOperand(N0, PVT, Replace);
1378
1379	if (!N0.getNode())
1380	return SDValue();
1381
1382	SDLoc DL(Op);
1383	SDValue RV =
1384	DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1385
1386	if (Replace)
1387	ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1388
1389	// Deal with Op being deleted.
1390	if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1391	return RV;
1392	}
1393	return SDValue();
1394	}
1395
1396	SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1397	if (!LegalOperations)
1398	return SDValue();
1399
1400	EVT VT = Op.getValueType();
1401	if (VT.isVector() \|\| !VT.isInteger())
1402	return SDValue();
1403
1404	// If operation type is 'undesirable', e.g. i16 on x86, consider
1405	// promoting it.
1406	unsigned Opc = Op.getOpcode();
1407	if (TLI.isTypeDesirableForOp(Opc, VT))
1408	return SDValue();
1409
1410	EVT PVT = VT;
1411	// Consult target whether it is a good idea to promote this operation and
1412	// what's the right type to promote it to.
1413	if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1414	assert(PVT != VT && "Don't know what type to promote to!")((PVT != VT && "Don't know what type to promote to!") ? static_cast<void> (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 1414, __PRETTY_FUNCTION__));
1415	// fold (aext (aext x)) -> (aext x)
1416	// fold (aext (zext x)) -> (zext x)
1417	// fold (aext (sext x)) -> (sext x)
1418	LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << "\nPromoting "; Op.getNode( )->dump(&DAG); } } while (false);
1419	return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1420	}
1421	return SDValue();
1422	}
1423
1424	bool DAGCombiner::PromoteLoad(SDValue Op) {
1425	if (!LegalOperations)
1426	return false;
1427
1428	if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1429	return false;
1430
1431	EVT VT = Op.getValueType();
1432	if (VT.isVector() \|\| !VT.isInteger())
1433	return false;
1434
1435	// If operation type is 'undesirable', e.g. i16 on x86, consider
1436	// promoting it.
1437	unsigned Opc = Op.getOpcode();
1438	if (TLI.isTypeDesirableForOp(Opc, VT))
1439	return false;
1440
1441	EVT PVT = VT;
1442	// Consult target whether it is a good idea to promote this operation and
1443	// what's the right type to promote it to.
1444	if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1445	assert(PVT != VT && "Don't know what type to promote to!")((PVT != VT && "Don't know what type to promote to!") ? static_cast<void> (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 1445, __PRETTY_FUNCTION__));
1446
1447	SDLoc DL(Op);
1448	SDNode *N = Op.getNode();
1449	LoadSDNode *LD = cast<LoadSDNode>(N);
1450	EVT MemVT = LD->getMemoryVT();
1451	ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1452	: LD->getExtensionType();
1453	SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1454	LD->getChain(), LD->getBasePtr(),
1455	MemVT, LD->getMemOperand());
1456	SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1457
1458	LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << "\nPromoting "; N->dump( &DAG); dbgs() << "\nTo: "; Result.getNode()->dump (&DAG); dbgs() << '\n'; } } while (false)
1459	Result.getNode()->dump(&DAG); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << "\nPromoting "; N->dump( &DAG); dbgs() << "\nTo: "; Result.getNode()->dump (&DAG); dbgs() << '\n'; } } while (false);
1460	WorklistRemover DeadNodes(*this);
1461	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1462	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1463	deleteAndRecombine(N);
1464	AddToWorklist(Result.getNode());
1465	return true;
1466	}
1467	return false;
1468	}
1469
1470	/// Recursively delete a node which has no uses and any operands for
1471	/// which it is the only use.
1472	///
1473	/// Note that this both deletes the nodes and removes them from the worklist.
1474	/// It also adds any nodes who have had a user deleted to the worklist as they
1475	/// may now have only one use and subject to other combines.
1476	bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1477	if (!N->use_empty())
1478	return false;
1479
1480	SmallSetVector<SDNode *, 16> Nodes;
1481	Nodes.insert(N);
1482	do {
1483	N = Nodes.pop_back_val();
1484	if (!N)
1485	continue;
1486
1487	if (N->use_empty()) {
1488	for (const SDValue &ChildN : N->op_values())
1489	Nodes.insert(ChildN.getNode());
1490
1491	removeFromWorklist(N);
1492	DAG.DeleteNode(N);
1493	} else {
1494	AddToWorklist(N);
1495	}
1496	} while (!Nodes.empty());
1497	return true;
1498	}
1499
1500	//===----------------------------------------------------------------------===//
1501	// Main DAG Combiner implementation
1502	//===----------------------------------------------------------------------===//
1503
1504	void DAGCombiner::Run(CombineLevel AtLevel) {
1505	// set the instance variables, so that the various visit routines may use it.
1506	Level = AtLevel;
1507	LegalDAG = Level >= AfterLegalizeDAG;
1508	LegalOperations = Level >= AfterLegalizeVectorOps;
1509	LegalTypes = Level >= AfterLegalizeTypes;
1510
1511	WorklistInserter AddNodes(*this);
1512
1513	// Add all the dag nodes to the worklist.
1514	for (SDNode &Node : DAG.allnodes())
1515	AddToWorklist(&Node);
1516
1517	// Create a dummy node (which is not added to allnodes), that adds a reference
1518	// to the root node, preventing it from being deleted, and tracking any
1519	// changes of the root.
1520	HandleSDNode Dummy(DAG.getRoot());
1521
1522	// While we have a valid worklist entry node, try to combine it.
1523	while (SDNode *N = getNextWorklistEntry()) {
1524	// If N has no uses, it is dead. Make sure to revisit all N's operands once
1525	// N is deleted from the DAG, since they too may now be dead or may have a
1526	// reduced number of uses, allowing other xforms.
1527	if (recursivelyDeleteUnusedNodes(N))
1528	continue;
1529
1530	WorklistRemover DeadNodes(*this);
1531
1532	// If this combine is running after legalizing the DAG, re-legalize any
1533	// nodes pulled off the worklist.
1534	if (LegalDAG) {
1535	SmallSetVector<SDNode *, 16> UpdatedNodes;
1536	bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1537
1538	for (SDNode *LN : UpdatedNodes)
1539	AddToWorklistWithUsers(LN);
1540
1541	if (!NIsValid)
1542	continue;
1543	}
1544
1545	LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << "\nCombining: "; N->dump (&DAG); } } while (false);
1546
1547	// Add any operands of the new node which have not yet been combined to the
1548	// worklist as well. Because the worklist uniques things already, this
1549	// won't repeatedly process the same operand.
1550	CombinedNodes.insert(N);
1551	for (const SDValue &ChildN : N->op_values())
1552	if (!CombinedNodes.count(ChildN.getNode()))
1553	AddToWorklist(ChildN.getNode());
1554
1555	SDValue RV = combine(N);
1556
1557	if (!RV.getNode())
1558	continue;
1559
1560	++NodesCombined;
1561
1562	// If we get back the same node we passed in, rather than a new node or
1563	// zero, we know that the node must have defined multiple values and
1564	// CombineTo was used. Since CombineTo takes care of the worklist
1565	// mechanics for us, we have no work to do in this case.
1566	if (RV.getNode() == N)
1567	continue;
1568
1569	assert(N->getOpcode() != ISD::DELETED_NODE &&((N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode () != ISD::DELETED_NODE && "Node was deleted but visit returned new node!" ) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned new node!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 1571, __PRETTY_FUNCTION__))
1570	RV.getOpcode() != ISD::DELETED_NODE &&((N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode () != ISD::DELETED_NODE && "Node was deleted but visit returned new node!" ) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned new node!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 1571, __PRETTY_FUNCTION__))
1571	"Node was deleted but visit returned new node!")((N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode () != ISD::DELETED_NODE && "Node was deleted but visit returned new node!" ) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned new node!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 1571, __PRETTY_FUNCTION__));
1572
1573	LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << " ... into: "; RV.getNode() ->dump(&DAG); } } while (false);
1574
1575	if (N->getNumValues() == RV.getNode()->getNumValues())
1576	DAG.ReplaceAllUsesWith(N, RV.getNode());
1577	else {
1578	assert(N->getValueType(0) == RV.getValueType() &&((N->getValueType(0) == RV.getValueType() && N-> getNumValues() == 1 && "Type mismatch") ? static_cast <void> (0) : __assert_fail ("N->getValueType(0) == RV.getValueType() && N->getNumValues() == 1 && \"Type mismatch\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 1579, __PRETTY_FUNCTION__))
1579	N->getNumValues() == 1 && "Type mismatch")((N->getValueType(0) == RV.getValueType() && N-> getNumValues() == 1 && "Type mismatch") ? static_cast <void> (0) : __assert_fail ("N->getValueType(0) == RV.getValueType() && N->getNumValues() == 1 && \"Type mismatch\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 1579, __PRETTY_FUNCTION__));
1580	DAG.ReplaceAllUsesWith(N, &RV);
1581	}
1582
1583	// Push the new node and any users onto the worklist. Omit this if the
1584	// new node is the EntryToken (e.g. if a store managed to get optimized
1585	// out), because re-visiting the EntryToken and its users will not uncover
1586	// any additional opportunities, but there may be a large number of such
1587	// users, potentially causing compile time explosion.
1588	if (RV.getOpcode() != ISD::EntryToken) {
1589	AddToWorklist(RV.getNode());
1590	AddUsersToWorklist(RV.getNode());
1591	}
1592
1593	// Finally, if the node is now dead, remove it from the graph. The node
1594	// may not be dead if the replacement process recursively simplified to
1595	// something else needing this node. This will also take care of adding any
1596	// operands which have lost a user to the worklist.
1597	recursivelyDeleteUnusedNodes(N);
1598	}
1599
1600	// If the root changed (e.g. it was a dead load, update the root).
1601	DAG.setRoot(Dummy.getValue());
1602	DAG.RemoveDeadNodes();
1603	}
1604
1605	SDValue DAGCombiner::visit(SDNode *N) {
1606	switch (N->getOpcode()) {
1607	default: break;
1608	case ISD::TokenFactor: return visitTokenFactor(N);
1609	case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1610	case ISD::ADD: return visitADD(N);
1611	case ISD::SUB: return visitSUB(N);
1612	case ISD::SADDSAT:
1613	case ISD::UADDSAT: return visitADDSAT(N);
1614	case ISD::SSUBSAT:
1615	case ISD::USUBSAT: return visitSUBSAT(N);
1616	case ISD::ADDC: return visitADDC(N);
1617	case ISD::SADDO:
1618	case ISD::UADDO: return visitADDO(N);
1619	case ISD::SUBC: return visitSUBC(N);
1620	case ISD::SSUBO:
1621	case ISD::USUBO: return visitSUBO(N);
1622	case ISD::ADDE: return visitADDE(N);
1623	case ISD::ADDCARRY: return visitADDCARRY(N);
1624	case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1625	case ISD::SUBE: return visitSUBE(N);
1626	case ISD::SUBCARRY: return visitSUBCARRY(N);
1627	case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1628	case ISD::SMULFIX:
1629	case ISD::SMULFIXSAT:
1630	case ISD::UMULFIX:
1631	case ISD::UMULFIXSAT: return visitMULFIX(N);
1632	case ISD::MUL: return visitMUL(N);
1633	case ISD::SDIV: return visitSDIV(N);
1634	case ISD::UDIV: return visitUDIV(N);
1635	case ISD::SREM:
1636	case ISD::UREM: return visitREM(N);
1637	case ISD::MULHU: return visitMULHU(N);
1638	case ISD::MULHS: return visitMULHS(N);
1639	case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1640	case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1641	case ISD::SMULO:
1642	case ISD::UMULO: return visitMULO(N);
1643	case ISD::SMIN:
1644	case ISD::SMAX:
1645	case ISD::UMIN:
1646	case ISD::UMAX: return visitIMINMAX(N);
1647	case ISD::AND: return visitAND(N);
1648	case ISD::OR: return visitOR(N);
1649	case ISD::XOR: return visitXOR(N);
1650	case ISD::SHL: return visitSHL(N);
1651	case ISD::SRA: return visitSRA(N);
1652	case ISD::SRL: return visitSRL(N);
1653	case ISD::ROTR:
1654	case ISD::ROTL: return visitRotate(N);
1655	case ISD::FSHL:
1656	case ISD::FSHR: return visitFunnelShift(N);
1657	case ISD::ABS: return visitABS(N);
1658	case ISD::BSWAP: return visitBSWAP(N);
1659	case ISD::BITREVERSE: return visitBITREVERSE(N);
1660	case ISD::CTLZ: return visitCTLZ(N);
1661	case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1662	case ISD::CTTZ: return visitCTTZ(N);
1663	case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1664	case ISD::CTPOP: return visitCTPOP(N);
1665	case ISD::SELECT: return visitSELECT(N);
1666	case ISD::VSELECT: return visitVSELECT(N);
1667	case ISD::SELECT_CC: return visitSELECT_CC(N);
1668	case ISD::SETCC: return visitSETCC(N);
1669	case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1670	case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1671	case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1672	case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1673	case ISD::AssertSext:
1674	case ISD::AssertZext: return visitAssertExt(N);
1675	case ISD::AssertAlign: return visitAssertAlign(N);
1676	case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1677	case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1678	case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1679	case ISD::TRUNCATE: return visitTRUNCATE(N);
1680	case ISD::BITCAST: return visitBITCAST(N);
1681	case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1682	case ISD::FADD: return visitFADD(N);
1683	case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1684	case ISD::FSUB: return visitFSUB(N);
1685	case ISD::FMUL: return visitFMUL(N);
1686	case ISD::FMA: return visitFMA(N);
1687	case ISD::FDIV: return visitFDIV(N);
1688	case ISD::FREM: return visitFREM(N);
1689	case ISD::FSQRT: return visitFSQRT(N);
1690	case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1691	case ISD::FPOW: return visitFPOW(N);
1692	case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1693	case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1694	case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1695	case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1696	case ISD::FP_ROUND: return visitFP_ROUND(N);
1697	case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1698	case ISD::FNEG: return visitFNEG(N);
1699	case ISD::FABS: return visitFABS(N);
1700	case ISD::FFLOOR: return visitFFLOOR(N);
1701	case ISD::FMINNUM: return visitFMINNUM(N);
1702	case ISD::FMAXNUM: return visitFMAXNUM(N);
1703	case ISD::FMINIMUM: return visitFMINIMUM(N);
1704	case ISD::FMAXIMUM: return visitFMAXIMUM(N);
1705	case ISD::FCEIL: return visitFCEIL(N);
1706	case ISD::FTRUNC: return visitFTRUNC(N);
1707	case ISD::BRCOND: return visitBRCOND(N);
1708	case ISD::BR_CC: return visitBR_CC(N);
1709	case ISD::LOAD: return visitLOAD(N);
1710	case ISD::STORE: return visitSTORE(N);
1711	case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1712	case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1713	case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1714	case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1715	case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1716	case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1717	case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1718	case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1719	case ISD::MGATHER: return visitMGATHER(N);
1720	case ISD::MLOAD: return visitMLOAD(N);
1721	case ISD::MSCATTER: return visitMSCATTER(N);
1722	case ISD::MSTORE: return visitMSTORE(N);
1723	case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1724	case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1725	case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1726	case ISD::FREEZE: return visitFREEZE(N);
1727	case ISD::VECREDUCE_FADD:
1728	case ISD::VECREDUCE_FMUL:
1729	case ISD::VECREDUCE_ADD:
1730	case ISD::VECREDUCE_MUL:
1731	case ISD::VECREDUCE_AND:
1732	case ISD::VECREDUCE_OR:
1733	case ISD::VECREDUCE_XOR:
1734	case ISD::VECREDUCE_SMAX:
1735	case ISD::VECREDUCE_SMIN:
1736	case ISD::VECREDUCE_UMAX:
1737	case ISD::VECREDUCE_UMIN:
1738	case ISD::VECREDUCE_FMAX:
1739	case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
1740	}
1741	return SDValue();
1742	}
1743
1744	SDValue DAGCombiner::combine(SDNode *N) {
1745	SDValue RV;
1746	if (!DisableGenericCombines)
1747	RV = visit(N);
1748
1749	// If nothing happened, try a target-specific DAG combine.
1750	if (!RV.getNode()) {
1751	assert(N->getOpcode() != ISD::DELETED_NODE &&((N->getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned NULL!" ) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned NULL!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 1752, __PRETTY_FUNCTION__))
1752	"Node was deleted but visit returned NULL!")((N->getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned NULL!" ) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned NULL!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 1752, __PRETTY_FUNCTION__));
1753
1754	if (N->getOpcode() >= ISD::BUILTIN_OP_END \|\|
1755	TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1756
1757	// Expose the DAG combiner to the target combiner impls.
1758	TargetLowering::DAGCombinerInfo
1759	DagCombineInfo(DAG, Level, false, this);
1760
1761	RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1762	}
1763	}
1764
1765	// If nothing happened still, try promoting the operation.
1766	if (!RV.getNode()) {
1767	switch (N->getOpcode()) {
1768	default: break;
1769	case ISD::ADD:
1770	case ISD::SUB:
1771	case ISD::MUL:
1772	case ISD::AND:
1773	case ISD::OR:
1774	case ISD::XOR:
1775	RV = PromoteIntBinOp(SDValue(N, 0));
1776	break;
1777	case ISD::SHL:
1778	case ISD::SRA:
1779	case ISD::SRL:
1780	RV = PromoteIntShiftOp(SDValue(N, 0));
1781	break;
1782	case ISD::SIGN_EXTEND:
1783	case ISD::ZERO_EXTEND:
1784	case ISD::ANY_EXTEND:
1785	RV = PromoteExtend(SDValue(N, 0));
1786	break;
1787	case ISD::LOAD:
1788	if (PromoteLoad(SDValue(N, 0)))
1789	RV = SDValue(N, 0);
1790	break;
1791	}
1792	}
1793
1794	// If N is a commutative binary node, try to eliminate it if the commuted
1795	// version is already present in the DAG.
1796	if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1797	N->getNumValues() == 1) {
1798	SDValue N0 = N->getOperand(0);
1799	SDValue N1 = N->getOperand(1);
1800
1801	// Constant operands are canonicalized to RHS.
1802	if (N0 != N1 && (isa<ConstantSDNode>(N0) \|\| !isa<ConstantSDNode>(N1))) {
1803	SDValue Ops[] = {N1, N0};
1804	SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1805	N->getFlags());
1806	if (CSENode)
1807	return SDValue(CSENode, 0);
1808	}
1809	}
1810
1811	return RV;
1812	}
1813
1814	/// Given a node, return its input chain if it has one, otherwise return a null
1815	/// sd operand.
1816	static SDValue getInputChainForNode(SDNode *N) {
1817	if (unsigned NumOps = N->getNumOperands()) {
1818	if (N->getOperand(0).getValueType() == MVT::Other)
1819	return N->getOperand(0);
1820	if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1821	return N->getOperand(NumOps-1);
1822	for (unsigned i = 1; i < NumOps-1; ++i)
1823	if (N->getOperand(i).getValueType() == MVT::Other)
1824	return N->getOperand(i);
1825	}
1826	return SDValue();
1827	}
1828
1829	SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1830	// If N has two operands, where one has an input chain equal to the other,
1831	// the 'other' chain is redundant.
1832	if (N->getNumOperands() == 2) {
1833	if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1834	return N->getOperand(0);
1835	if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1836	return N->getOperand(1);
1837	}
1838
1839	// Don't simplify token factors if optnone.
1840	if (OptLevel == CodeGenOpt::None)
1841	return SDValue();
1842
1843	// Don't simplify the token factor if the node itself has too many operands.
1844	if (N->getNumOperands() > TokenFactorInlineLimit)
1845	return SDValue();
1846
1847	// If the sole user is a token factor, we should make sure we have a
1848	// chance to merge them together. This prevents TF chains from inhibiting
1849	// optimizations.
1850	if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1851	AddToWorklist(*(N->use_begin()));
1852
1853	SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1854	SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1855	SmallPtrSet<SDNode*, 16> SeenOps;
1856	bool Changed = false; // If we should replace this token factor.
1857
1858	// Start out with this token factor.
1859	TFs.push_back(N);
1860
1861	// Iterate through token factors. The TFs grows when new token factors are
1862	// encountered.
1863	for (unsigned i = 0; i < TFs.size(); ++i) {
1864	// Limit number of nodes to inline, to avoid quadratic compile times.
1865	// We have to add the outstanding Token Factors to Ops, otherwise we might
1866	// drop Ops from the resulting Token Factors.
1867	if (Ops.size() > TokenFactorInlineLimit) {
1868	for (unsigned j = i; j < TFs.size(); j++)
1869	Ops.emplace_back(TFs[j], 0);
1870	// Drop unprocessed Token Factors from TFs, so we do not add them to the
1871	// combiner worklist later.
1872	TFs.resize(i);
1873	break;
1874	}
1875
1876	SDNode *TF = TFs[i];
1877	// Check each of the operands.
1878	for (const SDValue &Op : TF->op_values()) {
1879	switch (Op.getOpcode()) {
1880	case ISD::EntryToken:
1881	// Entry tokens don't need to be added to the list. They are
1882	// redundant.
1883	Changed = true;
1884	break;
1885
1886	case ISD::TokenFactor:
1887	if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1888	// Queue up for processing.
1889	TFs.push_back(Op.getNode());
1890	Changed = true;
1891	break;
1892	}
1893	LLVM_FALLTHROUGH[[gnu::fallthrough]];
1894
1895	default:
1896	// Only add if it isn't already in the list.
1897	if (SeenOps.insert(Op.getNode()).second)
1898	Ops.push_back(Op);
1899	else
1900	Changed = true;
1901	break;
1902	}
1903	}
1904	}
1905
1906	// Re-visit inlined Token Factors, to clean them up in case they have been
1907	// removed. Skip the first Token Factor, as this is the current node.
1908	for (unsigned i = 1, e = TFs.size(); i < e; i++)
1909	AddToWorklist(TFs[i]);
1910
1911	// Remove Nodes that are chained to another node in the list. Do so
1912	// by walking up chains breath-first stopping when we've seen
1913	// another operand. In general we must climb to the EntryNode, but we can exit
1914	// early if we find all remaining work is associated with just one operand as
1915	// no further pruning is possible.
1916
1917	// List of nodes to search through and original Ops from which they originate.
1918	SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1919	SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1920	SmallPtrSet<SDNode *, 16> SeenChains;
1921	bool DidPruneOps = false;
1922
1923	unsigned NumLeftToConsider = 0;
1924	for (const SDValue &Op : Ops) {
1925	Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1926	OpWorkCount.push_back(1);
1927	}
1928
1929	auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1930	// If this is an Op, we can remove the op from the list. Remark any
1931	// search associated with it as from the current OpNumber.
1932	if (SeenOps.contains(Op)) {
1933	Changed = true;
1934	DidPruneOps = true;
1935	unsigned OrigOpNumber = 0;
1936	while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1937	OrigOpNumber++;
1938	assert((OrigOpNumber != Ops.size()) &&(((OrigOpNumber != Ops.size()) && "expected to find TokenFactor Operand" ) ? static_cast<void> (0) : __assert_fail ("(OrigOpNumber != Ops.size()) && \"expected to find TokenFactor Operand\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 1939, __PRETTY_FUNCTION__))
1939	"expected to find TokenFactor Operand")(((OrigOpNumber != Ops.size()) && "expected to find TokenFactor Operand" ) ? static_cast<void> (0) : __assert_fail ("(OrigOpNumber != Ops.size()) && \"expected to find TokenFactor Operand\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 1939, __PRETTY_FUNCTION__));
1940	// Re-mark worklist from OrigOpNumber to OpNumber
1941	for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1942	if (Worklist[i].second == OrigOpNumber) {
1943	Worklist[i].second = OpNumber;
1944	}
1945	}
1946	OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1947	OpWorkCount[OrigOpNumber] = 0;
1948	NumLeftToConsider--;
1949	}
1950	// Add if it's a new chain
1951	if (SeenChains.insert(Op).second) {
1952	OpWorkCount[OpNumber]++;
1953	Worklist.push_back(std::make_pair(Op, OpNumber));
1954	}
1955	};
1956
1957	for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1958	// We need at least be consider at least 2 Ops to prune.
1959	if (NumLeftToConsider <= 1)
1960	break;
1961	auto CurNode = Worklist[i].first;
1962	auto CurOpNumber = Worklist[i].second;
1963	assert((OpWorkCount[CurOpNumber] > 0) &&(((OpWorkCount[CurOpNumber] > 0) && "Node should not appear in worklist" ) ? static_cast<void> (0) : __assert_fail ("(OpWorkCount[CurOpNumber] > 0) && \"Node should not appear in worklist\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 1964, __PRETTY_FUNCTION__))
1964	"Node should not appear in worklist")(((OpWorkCount[CurOpNumber] > 0) && "Node should not appear in worklist" ) ? static_cast<void> (0) : __assert_fail ("(OpWorkCount[CurOpNumber] > 0) && \"Node should not appear in worklist\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 1964, __PRETTY_FUNCTION__));
1965	switch (CurNode->getOpcode()) {
1966	case ISD::EntryToken:
1967	// Hitting EntryToken is the only way for the search to terminate without
1968	// hitting
1969	// another operand's search. Prevent us from marking this operand
1970	// considered.
1971	NumLeftToConsider++;
1972	break;
1973	case ISD::TokenFactor:
1974	for (const SDValue &Op : CurNode->op_values())
1975	AddToWorklist(i, Op.getNode(), CurOpNumber);
1976	break;
1977	case ISD::LIFETIME_START:
1978	case ISD::LIFETIME_END:
1979	case ISD::CopyFromReg:
1980	case ISD::CopyToReg:
1981	AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1982	break;
1983	default:
1984	if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1985	AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1986	break;
1987	}
1988	OpWorkCount[CurOpNumber]--;
1989	if (OpWorkCount[CurOpNumber] == 0)
1990	NumLeftToConsider--;
1991	}
1992
1993	// If we've changed things around then replace token factor.
1994	if (Changed) {
1995	SDValue Result;
1996	if (Ops.empty()) {
1997	// The entry token is the only possible outcome.
1998	Result = DAG.getEntryNode();
1999	} else {
2000	if (DidPruneOps) {
2001	SmallVector<SDValue, 8> PrunedOps;
2002	//
2003	for (const SDValue &Op : Ops) {
2004	if (SeenChains.count(Op.getNode()) == 0)
2005	PrunedOps.push_back(Op);
2006	}
2007	Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2008	} else {
2009	Result = DAG.getTokenFactor(SDLoc(N), Ops);
2010	}
2011	}
2012	return Result;
2013	}
2014	return SDValue();
2015	}
2016
2017	/// MERGE_VALUES can always be eliminated.
2018	SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2019	WorklistRemover DeadNodes(*this);
2020	// Replacing results may cause a different MERGE_VALUES to suddenly
2021	// be CSE'd with N, and carry its uses with it. Iterate until no
2022	// uses remain, to ensure that the node can be safely deleted.
2023	// First add the users of this node to the work list so that they
2024	// can be tried again once they have new operands.
2025	AddUsersToWorklist(N);
2026	do {
2027	// Do as a single replacement to avoid rewalking use lists.
2028	SmallVector<SDValue, 8> Ops;
2029	for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2030	Ops.push_back(N->getOperand(i));
2031	DAG.ReplaceAllUsesWith(N, Ops.data());
2032	} while (!N->use_empty());
2033	deleteAndRecombine(N);
2034	return SDValue(N, 0); // Return N so it doesn't get rechecked!
2035	}
2036
2037	/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2038	/// ConstantSDNode pointer else nullptr.
2039	static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2040	ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2041	return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2042	}
2043
2044	/// Return true if 'Use' is a load or a store that uses N as its base pointer
2045	/// and that N may be folded in the load / store addressing mode.
2046	static bool canFoldInAddressingMode(SDNode N, SDNode Use, SelectionDAG &DAG,
2047	const TargetLowering &TLI) {
2048	EVT VT;
2049	unsigned AS;
2050
2051	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2052	if (LD->isIndexed() \|\| LD->getBasePtr().getNode() != N)
2053	return false;
2054	VT = LD->getMemoryVT();
2055	AS = LD->getAddressSpace();
2056	} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2057	if (ST->isIndexed() \|\| ST->getBasePtr().getNode() != N)
2058	return false;
2059	VT = ST->getMemoryVT();
2060	AS = ST->getAddressSpace();
2061	} else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2062	if (LD->isIndexed() \|\| LD->getBasePtr().getNode() != N)
2063	return false;
2064	VT = LD->getMemoryVT();
2065	AS = LD->getAddressSpace();
2066	} else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2067	if (ST->isIndexed() \|\| ST->getBasePtr().getNode() != N)
2068	return false;
2069	VT = ST->getMemoryVT();
2070	AS = ST->getAddressSpace();
2071	} else
2072	return false;
2073
2074	TargetLowering::AddrMode AM;
2075	if (N->getOpcode() == ISD::ADD) {
2076	AM.HasBaseReg = true;
2077	ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2078	if (Offset)
2079	// [reg +/- imm]
2080	AM.BaseOffs = Offset->getSExtValue();
2081	else
2082	// [reg +/- reg]
2083	AM.Scale = 1;
2084	} else if (N->getOpcode() == ISD::SUB) {
2085	AM.HasBaseReg = true;
2086	ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2087	if (Offset)
2088	// [reg +/- imm]
2089	AM.BaseOffs = -Offset->getSExtValue();
2090	else
2091	// [reg +/- reg]
2092	AM.Scale = 1;
2093	} else
2094	return false;
2095
2096	return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2097	VT.getTypeForEVT(*DAG.getContext()), AS);
2098	}
2099
2100	SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2101	assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&((TLI.isBinOp(BO->getOpcode()) && BO->getNumValues () == 1 && "Unexpected binary operator") ? static_cast <void> (0) : __assert_fail ("TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 && \"Unexpected binary operator\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 2102, __PRETTY_FUNCTION__))
2102	"Unexpected binary operator")((TLI.isBinOp(BO->getOpcode()) && BO->getNumValues () == 1 && "Unexpected binary operator") ? static_cast <void> (0) : __assert_fail ("TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 && \"Unexpected binary operator\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 2102, __PRETTY_FUNCTION__));
2103
2104	// Don't do this unless the old select is going away. We want to eliminate the
2105	// binary operator, not replace a binop with a select.
2106	// TODO: Handle ISD::SELECT_CC.
2107	unsigned SelOpNo = 0;
2108	SDValue Sel = BO->getOperand(0);
2109	if (Sel.getOpcode() != ISD::SELECT \|\| !Sel.hasOneUse()) {
2110	SelOpNo = 1;
2111	Sel = BO->getOperand(1);
2112	}
2113
2114	if (Sel.getOpcode() != ISD::SELECT \|\| !Sel.hasOneUse())
2115	return SDValue();
2116
2117	SDValue CT = Sel.getOperand(1);
2118	if (!isConstantOrConstantVector(CT, true) &&
2119	!DAG.isConstantFPBuildVectorOrConstantFP(CT))
2120	return SDValue();
2121
2122	SDValue CF = Sel.getOperand(2);
2123	if (!isConstantOrConstantVector(CF, true) &&
2124	!DAG.isConstantFPBuildVectorOrConstantFP(CF))
2125	return SDValue();
2126
2127	// Bail out if any constants are opaque because we can't constant fold those.
2128	// The exception is "and" and "or" with either 0 or -1 in which case we can
2129	// propagate non constant operands into select. I.e.:
2130	// and (select Cond, 0, -1), X --> select Cond, 0, X
2131	// or X, (select Cond, -1, 0) --> select Cond, -1, X
2132	auto BinOpcode = BO->getOpcode();
2133	bool CanFoldNonConst =
2134	(BinOpcode == ISD::AND \|\| BinOpcode == ISD::OR) &&
2135	(isNullOrNullSplat(CT) \|\| isAllOnesOrAllOnesSplat(CT)) &&
2136	(isNullOrNullSplat(CF) \|\| isAllOnesOrAllOnesSplat(CF));
2137
2138	SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2139	if (!CanFoldNonConst &&
2140	!isConstantOrConstantVector(CBO, true) &&
2141	!DAG.isConstantFPBuildVectorOrConstantFP(CBO))
2142	return SDValue();
2143
2144	EVT VT = BO->getValueType(0);
2145
2146	// We have a select-of-constants followed by a binary operator with a
2147	// constant. Eliminate the binop by pulling the constant math into the select.
2148	// Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2149	SDLoc DL(Sel);
2150	SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2151	: DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2152	if (!CanFoldNonConst && !NewCT.isUndef() &&
2153	!isConstantOrConstantVector(NewCT, true) &&
2154	!DAG.isConstantFPBuildVectorOrConstantFP(NewCT))
2155	return SDValue();
2156
2157	SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2158	: DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2159	if (!CanFoldNonConst && !NewCF.isUndef() &&
2160	!isConstantOrConstantVector(NewCF, true) &&
2161	!DAG.isConstantFPBuildVectorOrConstantFP(NewCF))
2162	return SDValue();
2163
2164	SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2165	SelectOp->setFlags(BO->getFlags());
2166	return SelectOp;
2167	}
2168
2169	static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2170	assert((N->getOpcode() == ISD::ADD \|\| N->getOpcode() == ISD::SUB) &&(((N->getOpcode() == ISD::ADD \|\| N->getOpcode() == ISD:: SUB) && "Expecting add or sub") ? static_cast<void > (0) : __assert_fail ("(N->getOpcode() == ISD::ADD \|\| N->getOpcode() == ISD::SUB) && \"Expecting add or sub\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 2171, __PRETTY_FUNCTION__))
2171	"Expecting add or sub")(((N->getOpcode() == ISD::ADD \|\| N->getOpcode() == ISD:: SUB) && "Expecting add or sub") ? static_cast<void > (0) : __assert_fail ("(N->getOpcode() == ISD::ADD \|\| N->getOpcode() == ISD::SUB) && \"Expecting add or sub\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 2171, __PRETTY_FUNCTION__));
2172
2173	// Match a constant operand and a zext operand for the math instruction:
2174	// add Z, C
2175	// sub C, Z
2176	bool IsAdd = N->getOpcode() == ISD::ADD;
2177	SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2178	SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2179	auto *CN = dyn_cast<ConstantSDNode>(C);
2180	if (!CN \|\| Z.getOpcode() != ISD::ZERO_EXTEND)
2181	return SDValue();
2182
2183	// Match the zext operand as a setcc of a boolean.
2184	if (Z.getOperand(0).getOpcode() != ISD::SETCC \|\|
2185	Z.getOperand(0).getValueType() != MVT::i1)
2186	return SDValue();
2187
2188	// Match the compare as: setcc (X & 1), 0, eq.
2189	SDValue SetCC = Z.getOperand(0);
2190	ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2191	if (CC != ISD::SETEQ \|\| !isNullConstant(SetCC.getOperand(1)) \|\|
2192	SetCC.getOperand(0).getOpcode() != ISD::AND \|\|
2193	!isOneConstant(SetCC.getOperand(0).getOperand(1)))
2194	return SDValue();
2195
2196	// We are adding/subtracting a constant and an inverted low bit. Turn that
2197	// into a subtract/add of the low bit with incremented/decremented constant:
2198	// add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2199	// sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2200	EVT VT = C.getValueType();
2201	SDLoc DL(N);
2202	SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2203	SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2204	DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2205	return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2206	}
2207
2208	/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2209	/// a shift and add with a different constant.
2210	static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2211	assert((N->getOpcode() == ISD::ADD \|\| N->getOpcode() == ISD::SUB) &&(((N->getOpcode() == ISD::ADD \|\| N->getOpcode() == ISD:: SUB) && "Expecting add or sub") ? static_cast<void > (0) : __assert_fail ("(N->getOpcode() == ISD::ADD \|\| N->getOpcode() == ISD::SUB) && \"Expecting add or sub\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 2212, __PRETTY_FUNCTION__))
2212	"Expecting add or sub")(((N->getOpcode() == ISD::ADD \|\| N->getOpcode() == ISD:: SUB) && "Expecting add or sub") ? static_cast<void > (0) : __assert_fail ("(N->getOpcode() == ISD::ADD \|\| N->getOpcode() == ISD::SUB) && \"Expecting add or sub\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 2212, __PRETTY_FUNCTION__));
2213
2214	// We need a constant operand for the add/sub, and the other operand is a
2215	// logical shift right: add (srl), C or sub C, (srl).
2216	bool IsAdd = N->getOpcode() == ISD::ADD;
2217	SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2218	SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2219	if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) \|\|
2220	ShiftOp.getOpcode() != ISD::SRL)
2221	return SDValue();
2222
2223	// The shift must be of a 'not' value.
2224	SDValue Not = ShiftOp.getOperand(0);
2225	if (!Not.hasOneUse() \|\| !isBitwiseNot(Not))
2226	return SDValue();
2227
2228	// The shift must be moving the sign bit to the least-significant-bit.
2229	EVT VT = ShiftOp.getValueType();
2230	SDValue ShAmt = ShiftOp.getOperand(1);
2231	ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2232	if (!ShAmtC \|\| ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2233	return SDValue();
2234
2235	// Eliminate the 'not' by adjusting the shift and add/sub constant:
2236	// add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2237	// sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2238	SDLoc DL(N);
2239	auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2240	SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2241	if (SDValue NewC =
2242	DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2243	{ConstantOp, DAG.getConstant(1, DL, VT)}))
2244	return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2245	return SDValue();
2246	}
2247
2248	/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2249	/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2250	/// are no common bits set in the operands).
2251	SDValue DAGCombiner::visitADDLike(SDNode *N) {
2252	SDValue N0 = N->getOperand(0);
2253	SDValue N1 = N->getOperand(1);
2254	EVT VT = N0.getValueType();
2255	SDLoc DL(N);
2256
2257	// fold vector ops
2258	if (VT.isVector()) {
2259	if (SDValue FoldedVOp = SimplifyVBinOp(N))
2260	return FoldedVOp;
2261
2262	// fold (add x, 0) -> x, vector edition
2263	if (ISD::isBuildVectorAllZeros(N1.getNode()))
2264	return N0;
2265	if (ISD::isBuildVectorAllZeros(N0.getNode()))
2266	return N1;
2267	}
2268
2269	// fold (add x, undef) -> undef
2270	if (N0.isUndef())
2271	return N0;
2272
2273	if (N1.isUndef())
2274	return N1;
2275
2276	if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2277	// canonicalize constant to RHS
2278	if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2279	return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2280	// fold (add c1, c2) -> c1+c2
2281	return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1});
2282	}
2283
2284	// fold (add x, 0) -> x
2285	if (isNullConstant(N1))
2286	return N0;
2287
2288	if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2289	// fold ((A-c1)+c2) -> (A+(c2-c1))
2290	if (N0.getOpcode() == ISD::SUB &&
2291	isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2292	SDValue Sub =
2293	DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
2294	assert(Sub && "Constant folding failed")((Sub && "Constant folding failed") ? static_cast< void> (0) : __assert_fail ("Sub && \"Constant folding failed\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 2294, __PRETTY_FUNCTION__));
2295	return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2296	}
2297
2298	// fold ((c1-A)+c2) -> (c1+c2)-A
2299	if (N0.getOpcode() == ISD::SUB &&
2300	isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2301	SDValue Add =
2302	DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
2303	assert(Add && "Constant folding failed")((Add && "Constant folding failed") ? static_cast< void> (0) : __assert_fail ("Add && \"Constant folding failed\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 2303, __PRETTY_FUNCTION__));
2304	return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2305	}
2306
2307	// add (sext i1 X), 1 -> zext (not i1 X)
2308	// We don't transform this pattern:
2309	// add (zext i1 X), -1 -> sext (not i1 X)
2310	// because most (?) targets generate better code for the zext form.
2311	if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2312	isOneOrOneSplat(N1)) {
2313	SDValue X = N0.getOperand(0);
2314	if ((!LegalOperations \|\|
2315	(TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2316	TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2317	X.getScalarValueSizeInBits() == 1) {
2318	SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2319	return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2320	}
2321	}
2322
2323	// Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
2324	// equivalent to (add x, c0).
2325	if (N0.getOpcode() == ISD::OR &&
2326	isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2327	DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2328	if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT,
2329	{N1, N0.getOperand(1)}))
2330	return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2331	}
2332	}
2333
2334	if (SDValue NewSel = foldBinOpIntoSelect(N))
2335	return NewSel;
2336
2337	// reassociate add
2338	if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2339	if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2340	return RADD;
2341	}
2342	// fold ((0-A) + B) -> B-A
2343	if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2344	return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2345
2346	// fold (A + (0-B)) -> A-B
2347	if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2348	return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2349
2350	// fold (A+(B-A)) -> B
2351	if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2352	return N1.getOperand(0);
2353
2354	// fold ((B-A)+A) -> B
2355	if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2356	return N0.getOperand(0);
2357
2358	// fold ((A-B)+(C-A)) -> (C-B)
2359	if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2360	N0.getOperand(0) == N1.getOperand(1))
2361	return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2362	N0.getOperand(1));
2363
2364	// fold ((A-B)+(B-C)) -> (A-C)
2365	if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2366	N0.getOperand(1) == N1.getOperand(0))
2367	return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2368	N1.getOperand(1));
2369
2370	// fold (A+(B-(A+C))) to (B-C)
2371	if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2372	N0 == N1.getOperand(1).getOperand(0))
2373	return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2374	N1.getOperand(1).getOperand(1));
2375
2376	// fold (A+(B-(C+A))) to (B-C)
2377	if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2378	N0 == N1.getOperand(1).getOperand(1))
2379	return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2380	N1.getOperand(1).getOperand(0));
2381
2382	// fold (A+((B-A)+or-C)) to (B+or-C)
2383	if ((N1.getOpcode() == ISD::SUB \|\| N1.getOpcode() == ISD::ADD) &&
2384	N1.getOperand(0).getOpcode() == ISD::SUB &&
2385	N0 == N1.getOperand(0).getOperand(1))
2386	return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2387	N1.getOperand(1));
2388
2389	// fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2390	if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2391	SDValue N00 = N0.getOperand(0);
2392	SDValue N01 = N0.getOperand(1);
2393	SDValue N10 = N1.getOperand(0);
2394	SDValue N11 = N1.getOperand(1);
2395
2396	if (isConstantOrConstantVector(N00) \|\| isConstantOrConstantVector(N10))
2397	return DAG.getNode(ISD::SUB, DL, VT,
2398	DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2399	DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2400	}
2401
2402	// fold (add (umax X, C), -C) --> (usubsat X, C)
2403	if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2404	auto MatchUSUBSAT = [](ConstantSDNode Max, ConstantSDNode Op) {
2405	return (!Max && !Op) \|\|
2406	(Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2407	};
2408	if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2409	/AllowUndefs/ true))
2410	return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2411	N0.getOperand(1));
2412	}
2413
2414	if (SimplifyDemandedBits(SDValue(N, 0)))
2415	return SDValue(N, 0);
2416
2417	if (isOneOrOneSplat(N1)) {
2418	// fold (add (xor a, -1), 1) -> (sub 0, a)
2419	if (isBitwiseNot(N0))
2420	return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2421	N0.getOperand(0));
2422
2423	// fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2424	if (N0.getOpcode() == ISD::ADD \|\|
2425	N0.getOpcode() == ISD::UADDO \|\|
2426	N0.getOpcode() == ISD::SADDO) {
2427	SDValue A, Xor;
2428
2429	if (isBitwiseNot(N0.getOperand(0))) {
2430	A = N0.getOperand(1);
2431	Xor = N0.getOperand(0);
2432	} else if (isBitwiseNot(N0.getOperand(1))) {
2433	A = N0.getOperand(0);
2434	Xor = N0.getOperand(1);
2435	}
2436
2437	if (Xor)
2438	return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2439	}
2440
2441	// Look for:
2442	// add (add x, y), 1
2443	// And if the target does not like this form then turn into:
2444	// sub y, (xor x, -1)
2445	if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2446	N0.getOpcode() == ISD::ADD) {
2447	SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2448	DAG.getAllOnesConstant(DL, VT));
2449	return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2450	}
2451	}
2452
2453	// (x - y) + -1 -> add (xor y, -1), x
2454	if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2455	isAllOnesOrAllOnesSplat(N1)) {
2456	SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2457	return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2458	}
2459
2460	if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2461	return Combined;
2462
2463	if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2464	return Combined;
2465
2466	return SDValue();
2467	}
2468
2469	SDValue DAGCombiner::visitADD(SDNode *N) {
2470	SDValue N0 = N->getOperand(0);
2471	SDValue N1 = N->getOperand(1);
2472	EVT VT = N0.getValueType();
2473	SDLoc DL(N);
2474
2475	if (SDValue Combined = visitADDLike(N))
2476	return Combined;
2477
2478	if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2479	return V;
2480
2481	if (SDValue V = foldAddSubOfSignBit(N, DAG))
2482	return V;
2483
2484	// fold (a+b) -> (a\|b) iff a and b share no bits.
2485	if ((!LegalOperations \|\| TLI.isOperationLegal(ISD::OR, VT)) &&
2486	DAG.haveNoCommonBitsSet(N0, N1))
2487	return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2488
2489	// Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2490	if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2491	const APInt &C0 = N0->getConstantOperandAPInt(0);
2492	const APInt &C1 = N1->getConstantOperandAPInt(0);
2493	return DAG.getVScale(DL, VT, C0 + C1);
2494	}
2495
2496	// fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2497	if ((N0.getOpcode() == ISD::ADD) &&
2498	(N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
2499	(N1.getOpcode() == ISD::VSCALE)) {
2500	const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2501	const APInt &VS1 = N1->getConstantOperandAPInt(0);
2502	SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2503	return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2504	}
2505
2506	return SDValue();
2507	}
2508
2509	SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2510	unsigned Opcode = N->getOpcode();
2511	SDValue N0 = N->getOperand(0);
2512	SDValue N1 = N->getOperand(1);
2513	EVT VT = N0.getValueType();
2514	SDLoc DL(N);
2515
2516	// fold vector ops
2517	if (VT.isVector()) {
2518	// TODO SimplifyVBinOp
2519
2520	// fold (add_sat x, 0) -> x, vector edition
2521	if (ISD::isBuildVectorAllZeros(N1.getNode()))
2522	return N0;
2523	if (ISD::isBuildVectorAllZeros(N0.getNode()))
2524	return N1;
2525	}
2526
2527	// fold (add_sat x, undef) -> -1
2528	if (N0.isUndef() \|\| N1.isUndef())
2529	return DAG.getAllOnesConstant(DL, VT);
2530
2531	if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2532	// canonicalize constant to RHS
2533	if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2534	return DAG.getNode(Opcode, DL, VT, N1, N0);
2535	// fold (add_sat c1, c2) -> c3
2536	return DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1});
2537	}
2538
2539	// fold (add_sat x, 0) -> x
2540	if (isNullConstant(N1))
2541	return N0;
2542
2543	// If it cannot overflow, transform into an add.
2544	if (Opcode == ISD::UADDSAT)
2545	if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2546	return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2547
2548	return SDValue();
2549	}
2550
2551	static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2552	bool Masked = false;
2553
2554	// First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2555	while (true) {
2556	if (V.getOpcode() == ISD::TRUNCATE \|\| V.getOpcode() == ISD::ZERO_EXTEND) {
2557	V = V.getOperand(0);
2558	continue;
2559	}
2560
2561	if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2562	Masked = true;
2563	V = V.getOperand(0);
2564	continue;
2565	}
2566
2567	break;
2568	}
2569
2570	// If this is not a carry, return.
2571	if (V.getResNo() != 1)
2572	return SDValue();
2573
2574	if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2575	V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2576	return SDValue();
2577
2578	EVT VT = V.getNode()->getValueType(0);
2579	if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2580	return SDValue();
2581
2582	// If the result is masked, then no matter what kind of bool it is we can
2583	// return. If it isn't, then we need to make sure the bool type is either 0 or
2584	// 1 and not other values.
2585	if (Masked \|\|
2586	TLI.getBooleanContents(V.getValueType()) ==
2587	TargetLoweringBase::ZeroOrOneBooleanContent)
2588	return V;
2589
2590	return SDValue();
2591	}
2592
2593	/// Given the operands of an add/sub operation, see if the 2nd operand is a
2594	/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2595	/// the opcode and bypass the mask operation.
2596	static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2597	SelectionDAG &DAG, const SDLoc &DL) {
2598	if (N1.getOpcode() != ISD::AND \|\| !isOneOrOneSplat(N1->getOperand(1)))
2599	return SDValue();
2600
2601	EVT VT = N0.getValueType();
2602	if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2603	return SDValue();
2604
2605	// add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2606	// sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2607	return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2608	}
2609
2610	/// Helper for doing combines based on N0 and N1 being added to each other.
2611	SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2612	SDNode *LocReference) {
2613	EVT VT = N0.getValueType();
2614	SDLoc DL(LocReference);
2615
2616	// fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2617	if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2618	isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2619	return DAG.getNode(ISD::SUB, DL, VT, N0,
2620	DAG.getNode(ISD::SHL, DL, VT,
2621	N1.getOperand(0).getOperand(1),
2622	N1.getOperand(1)));
2623
2624	if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2625	return V;
2626
2627	// Look for:
2628	// add (add x, 1), y
2629	// And if the target does not like this form then turn into:
2630	// sub y, (xor x, -1)
2631	if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2632	N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2633	SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2634	DAG.getAllOnesConstant(DL, VT));
2635	return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2636	}
2637
2638	// Hoist one-use subtraction by non-opaque constant:
2639	// (x - C) + y -> (x + y) - C
2640	// This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2641	if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2642	isConstantOrConstantVector(N0.getOperand(1), /NoOpaques=/true)) {
2643	SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2644	return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2645	}
2646	// Hoist one-use subtraction from non-opaque constant:
2647	// (C - x) + y -> (y - x) + C
2648	if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2649	isConstantOrConstantVector(N0.getOperand(0), /NoOpaques=/true)) {
2650	SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2651	return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2652	}
2653
2654	// If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2655	// rather than 'add 0/-1' (the zext should get folded).
2656	// add (sext i1 Y), X --> sub X, (zext i1 Y)
2657	if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2658	N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2659	TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2660	SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2661	return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2662	}
2663
2664	// add X, (sextinreg Y i1) -> sub X, (and Y 1)
2665	if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2666	VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2667	if (TN->getVT() == MVT::i1) {
2668	SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2669	DAG.getConstant(1, DL, VT));
2670	return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2671	}
2672	}
2673
2674	// (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2675	if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2676	N1.getResNo() == 0)
2677	return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2678	N0, N1.getOperand(0), N1.getOperand(2));
2679
2680	// (add X, Carry) -> (addcarry X, 0, Carry)
2681	if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2682	if (SDValue Carry = getAsCarry(TLI, N1))
2683	return DAG.getNode(ISD::ADDCARRY, DL,
2684	DAG.getVTList(VT, Carry.getValueType()), N0,
2685	DAG.getConstant(0, DL, VT), Carry);
2686
2687	return SDValue();
2688	}
2689
2690	SDValue DAGCombiner::visitADDC(SDNode *N) {
2691	SDValue N0 = N->getOperand(0);
2692	SDValue N1 = N->getOperand(1);
2693	EVT VT = N0.getValueType();
2694	SDLoc DL(N);
2695
2696	// If the flag result is dead, turn this into an ADD.
2697	if (!N->hasAnyUseOfValue(1))
2698	return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2699	DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2700
2701	// canonicalize constant to RHS.
2702	ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2703	ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2704	if (N0C && !N1C)
2705	return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2706
2707	// fold (addc x, 0) -> x + no carry out
2708	if (isNullConstant(N1))
2709	return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2710	DL, MVT::Glue));
2711
2712	// If it cannot overflow, transform into an add.
2713	if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2714	return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2715	DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2716
2717	return SDValue();
2718	}
2719
2720	/**
2721	* Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2722	* then the flip also occurs if computing the inverse is the same cost.
2723	* This function returns an empty SDValue in case it cannot flip the boolean
2724	* without increasing the cost of the computation. If you want to flip a boolean
2725	* no matter what, use DAG.getLogicalNOT.
2726	*/
2727	static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2728	const TargetLowering &TLI,
2729	bool Force) {
2730	if (Force && isa<ConstantSDNode>(V))
2731	return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2732
2733	if (V.getOpcode() != ISD::XOR)
2734	return SDValue();
2735
2736	ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2737	if (!Const)
2738	return SDValue();
2739
2740	EVT VT = V.getValueType();
2741
2742	bool IsFlip = false;
2743	switch(TLI.getBooleanContents(VT)) {
2744	case TargetLowering::ZeroOrOneBooleanContent:
2745	IsFlip = Const->isOne();
2746	break;
2747	case TargetLowering::ZeroOrNegativeOneBooleanContent:
2748	IsFlip = Const->isAllOnesValue();
2749	break;
2750	case TargetLowering::UndefinedBooleanContent:
2751	IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2752	break;
2753	}
2754
2755	if (IsFlip)
2756	return V.getOperand(0);
2757	if (Force)
2758	return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2759	return SDValue();
2760	}
2761
2762	SDValue DAGCombiner::visitADDO(SDNode *N) {
2763	SDValue N0 = N->getOperand(0);
2764	SDValue N1 = N->getOperand(1);
2765	EVT VT = N0.getValueType();
2766	bool IsSigned = (ISD::SADDO == N->getOpcode());
2767
2768	EVT CarryVT = N->getValueType(1);
2769	SDLoc DL(N);
2770
2771	// If the flag result is dead, turn this into an ADD.
2772	if (!N->hasAnyUseOfValue(1))
2773	return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2774	DAG.getUNDEF(CarryVT));
2775
2776	// canonicalize constant to RHS.
2777	if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2778	!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2779	return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2780
2781	// fold (addo x, 0) -> x + no carry out
2782	if (isNullOrNullSplat(N1))
2783	return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2784
2785	if (!IsSigned) {
2786	// If it cannot overflow, transform into an add.
2787	if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2788	return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2789	DAG.getConstant(0, DL, CarryVT));
2790
2791	// fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2792	if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2793	SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2794	DAG.getConstant(0, DL, VT), N0.getOperand(0));
2795	return CombineTo(
2796	N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
2797	}
2798
2799	if (SDValue Combined = visitUADDOLike(N0, N1, N))
2800	return Combined;
2801
2802	if (SDValue Combined = visitUADDOLike(N1, N0, N))
2803	return Combined;
2804	}
2805
2806	return SDValue();
2807	}
2808
2809	SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2810	EVT VT = N0.getValueType();
2811	if (VT.isVector())
2812	return SDValue();
2813
2814	// (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2815	// If Y + 1 cannot overflow.
2816	if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2817	SDValue Y = N1.getOperand(0);
2818	SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2819	if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2820	return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2821	N1.getOperand(2));
2822	}
2823
2824	// (uaddo X, Carry) -> (addcarry X, 0, Carry)
2825	if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2826	if (SDValue Carry = getAsCarry(TLI, N1))
2827	return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2828	DAG.getConstant(0, SDLoc(N), VT), Carry);
2829
2830	return SDValue();
2831	}
2832
2833	SDValue DAGCombiner::visitADDE(SDNode *N) {
2834	SDValue N0 = N->getOperand(0);
2835	SDValue N1 = N->getOperand(1);
2836	SDValue CarryIn = N->getOperand(2);
2837
2838	// canonicalize constant to RHS
2839	ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2840	ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2841	if (N0C && !N1C)
2842	return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2843	N1, N0, CarryIn);
2844
2845	// fold (adde x, y, false) -> (addc x, y)
2846	if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2847	return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2848
2849	return SDValue();
2850	}
2851
2852	SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2853	SDValue N0 = N->getOperand(0);
2854	SDValue N1 = N->getOperand(1);
2855	SDValue CarryIn = N->getOperand(2);
2856	SDLoc DL(N);
2857
2858	// canonicalize constant to RHS
2859	ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2860	ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2861	if (N0C && !N1C)
2862	return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2863
2864	// fold (addcarry x, y, false) -> (uaddo x, y)
2865	if (isNullConstant(CarryIn)) {
2866	if (!LegalOperations \|\|
2867	TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2868	return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2869	}
2870
2871	// fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2872	if (isNullConstant(N0) && isNullConstant(N1)) {
2873	EVT VT = N0.getValueType();
2874	EVT CarryVT = CarryIn.getValueType();
2875	SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2876	AddToWorklist(CarryExt.getNode());
2877	return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2878	DAG.getConstant(1, DL, VT)),
2879	DAG.getConstant(0, DL, CarryVT));
2880	}
2881
2882	if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2883	return Combined;
2884
2885	if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2886	return Combined;
2887
2888	return SDValue();
2889	}
2890
2891	SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
2892	SDValue N0 = N->getOperand(0);
2893	SDValue N1 = N->getOperand(1);
2894	SDValue CarryIn = N->getOperand(2);
2895	SDLoc DL(N);
2896
2897	// canonicalize constant to RHS
2898	ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2899	ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2900	if (N0C && !N1C)
2901	return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
2902
2903	// fold (saddo_carry x, y, false) -> (saddo x, y)
2904	if (isNullConstant(CarryIn)) {
2905	if (!LegalOperations \|\|
2906	TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
2907	return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
2908	}
2909
2910	return SDValue();
2911	}
2912
2913	/**
2914	* If we are facing some sort of diamond carry propapagtion pattern try to
2915	* break it up to generate something like:
2916	* (addcarry X, 0, (addcarry A, B, Z):Carry)
2917	*
2918	* The end result is usually an increase in operation required, but because the
2919	* carry is now linearized, other tranforms can kick in and optimize the DAG.
2920	*
2921	* Patterns typically look something like
2922	* (uaddo A, B)
2923	* / \
2924	* Carry Sum
2925	* \| \
2926	* \| (addcarry *, 0, Z)
2927	* \| /
2928	* \ Carry
2929	* \| /
2930	* (addcarry X, , )
2931	*
2932	* But numerous variation exist. Our goal is to identify A, B, X and Z and
2933	* produce a combine with a single path for carry propagation.
2934	*/
2935	static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2936	SDValue X, SDValue Carry0, SDValue Carry1,
2937	SDNode *N) {
2938	if (Carry1.getResNo() != 1 \|\| Carry0.getResNo() != 1)
2939	return SDValue();
2940	if (Carry1.getOpcode() != ISD::UADDO)
2941	return SDValue();
2942
2943	SDValue Z;
2944
2945	/**
2946	* First look for a suitable Z. It will present itself in the form of
2947	* (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2948	*/
2949	if (Carry0.getOpcode() == ISD::ADDCARRY &&
2950	isNullConstant(Carry0.getOperand(1))) {
2951	Z = Carry0.getOperand(2);
2952	} else if (Carry0.getOpcode() == ISD::UADDO &&
2953	isOneConstant(Carry0.getOperand(1))) {
2954	EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2955	Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2956	} else {
2957	// We couldn't find a suitable Z.
2958	return SDValue();
2959	}
2960
2961
2962	auto cancelDiamond = [&](SDValue A,SDValue B) {
2963	SDLoc DL(N);
2964	SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
2965	Combiner.AddToWorklist(NewY.getNode());
2966	return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
2967	DAG.getConstant(0, DL, X.getValueType()),
2968	NewY.getValue(1));
2969	};
2970
2971	/**
2972	* (uaddo A, B)
2973	* \|
2974	* Sum
2975	* \|
2976	* (addcarry *, 0, Z)
2977	*/
2978	if (Carry0.getOperand(0) == Carry1.getValue(0)) {
2979	return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
2980	}
2981
2982	/**
2983	* (addcarry A, 0, Z)
2984	* \|
2985	* Sum
2986	* \|
2987	* (uaddo *, B)
2988	*/
2989	if (Carry1.getOperand(0) == Carry0.getValue(0)) {
2990	return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
2991	}
2992
2993	if (Carry1.getOperand(1) == Carry0.getValue(0)) {
2994	return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
2995	}
2996
2997	return SDValue();
2998	}
2999
3000	// If we are facing some sort of diamond carry/borrow in/out pattern try to
3001	// match patterns like:
3002	//
3003	// (uaddo A, B) CarryIn
3004	// \| \ \|
3005	// \| \ \|
3006	// PartialSum PartialCarryOutX /
3007	// \| \| /
3008	// \| ____\|____________/
3009	// \| / \|
3010	// (uaddo , ) \________
3011	// \| \ \
3012	// \| \ \|
3013	// \| PartialCarryOutY \|
3014	// \| \ \|
3015	// \| \ /
3016	// AddCarrySum \| ______/
3017	// \| /
3018	// CarryOut = (or , )
3019	//
3020	// And generate ADDCARRY (or SUBCARRY) with two result values:
3021	//
3022	// {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
3023	//
3024	// Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
3025	// a single path for carry/borrow out propagation:
3026	static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
3027	const TargetLowering &TLI, SDValue Carry0,
3028	SDValue Carry1, SDNode *N) {
3029	if (Carry0.getResNo() != 1 \|\| Carry1.getResNo() != 1)
3030	return SDValue();
3031	unsigned Opcode = Carry0.getOpcode();
3032	if (Opcode != Carry1.getOpcode())
3033	return SDValue();
3034	if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3035	return SDValue();
3036
3037	// Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
3038	// carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
3039	// the above ASCII art.)
3040	if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3041	Carry1.getOperand(1) != Carry0.getValue(0))
3042	std::swap(Carry0, Carry1);
3043	if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3044	Carry1.getOperand(1) != Carry0.getValue(0))
3045	return SDValue();
3046
3047	// The carry in value must be on the righthand side for subtraction.
3048	unsigned CarryInOperandNum =
3049	Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3050	if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3051	return SDValue();
3052	SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3053
3054	unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
3055	if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3056	return SDValue();
3057
3058	// Verify that the carry/borrow in is plausibly a carry/borrow bit.
3059	// TODO: make getAsCarry() aware of how partial carries are merged.
3060	if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
3061	return SDValue();
3062	CarryIn = CarryIn.getOperand(0);
3063	if (CarryIn.getValueType() != MVT::i1)
3064	return SDValue();
3065
3066	SDLoc DL(N);
3067	SDValue Merged =
3068	DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3069	Carry0.getOperand(1), CarryIn);
3070
3071	// Please note that because we have proven that the result of the UADDO/USUBO
3072	// of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3073	// therefore prove that if the first UADDO/USUBO overflows, the second
3074	// UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3075	// maximum value.
3076	//
3077	// 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3078	// 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3079	//
3080	// This is important because it means that OR and XOR can be used to merge
3081	// carry flags; and that AND can return a constant zero.
3082	//
3083	// TODO: match other operations that can merge flags (ADD, etc)
3084	DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3085	if (N->getOpcode() == ISD::AND)
3086	return DAG.getConstant(0, DL, MVT::i1);
3087	return Merged.getValue(1);
3088	}
3089
3090	SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3091	SDNode *N) {
3092	// fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3093	if (isBitwiseNot(N0))
3094	if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3095	SDLoc DL(N);
3096	SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3097	N0.getOperand(0), NotC);
3098	return CombineTo(
3099	N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3100	}
3101
3102	// Iff the flag result is dead:
3103	// (addcarry (add\|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3104	// Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3105	// or the dependency between the instructions.
3106	if ((N0.getOpcode() == ISD::ADD \|\|
3107	(N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3108	N0.getValue(1) != CarryIn)) &&
3109	isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3110	return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3111	N0.getOperand(0), N0.getOperand(1), CarryIn);
3112
3113	/**
3114	* When one of the addcarry argument is itself a carry, we may be facing
3115	* a diamond carry propagation. In which case we try to transform the DAG
3116	* to ensure linear carry propagation if that is possible.
3117	*/
3118	if (auto Y = getAsCarry(TLI, N1)) {
3119	// Because both are carries, Y and Z can be swapped.
3120	if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3121	return R;
3122	if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3123	return R;
3124	}
3125
3126	return SDValue();
3127	}
3128
3129	// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3130	// clamp/truncation if necessary.
3131	static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3132	SDValue RHS, SelectionDAG &DAG,
3133	const SDLoc &DL) {
3134	assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&((DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits () && "Illegal truncation") ? static_cast<void> (0) : __assert_fail ("DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() && \"Illegal truncation\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 3135, __PRETTY_FUNCTION__))
3135	"Illegal truncation")((DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits () && "Illegal truncation") ? static_cast<void> (0) : __assert_fail ("DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() && \"Illegal truncation\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 3135, __PRETTY_FUNCTION__));
3136
3137	if (DstVT == SrcVT)
3138	return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3139
3140	// If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3141	// clamping RHS.
3142	APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
3143	DstVT.getScalarSizeInBits());
3144	if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3145	return SDValue();
3146
3147	SDValue SatLimit =
3148	DAG.getConstant(APInt::getLowBitsSet(SrcVT.getScalarSizeInBits(),
3149	DstVT.getScalarSizeInBits()),
3150	DL, SrcVT);
3151	RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3152	RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3153	LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3154	return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3155	}
3156
3157	// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3158	// usubsat(a,b), optionally as a truncated type.
3159	SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
3160	if (N->getOpcode() != ISD::SUB \|\|
3161	!(!LegalOperations \|\| hasOperation(ISD::USUBSAT, DstVT)))
3162	return SDValue();
3163
3164	EVT SubVT = N->getValueType(0);
3165	SDValue Op0 = N->getOperand(0);
3166	SDValue Op1 = N->getOperand(1);
3167
3168	// Try to find umax(a,b) - b or a - umin(a,b) patterns
3169	// they may be converted to usubsat(a,b).
3170	if (Op0.getOpcode() == ISD::UMAX) {
3171	SDValue MaxLHS = Op0.getOperand(0);
3172	SDValue MaxRHS = Op0.getOperand(1);
3173	if (MaxLHS == Op1)
3174	return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
3175	if (MaxRHS == Op1)
3176	return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
3177	}
3178
3179	if (Op1.getOpcode() == ISD::UMIN) {
3180	SDValue MinLHS = Op1.getOperand(0);
3181	SDValue MinRHS = Op1.getOperand(1);
3182	if (MinLHS == Op0)
3183	return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
3184	if (MinRHS == Op0)
3185	return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
3186	}
3187
3188	// sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3189	if (Op1.getOpcode() == ISD::TRUNCATE &&
3190	Op1.getOperand(0).getOpcode() == ISD::UMIN) {
3191	SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3192	SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3193	if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3194	return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3195	DAG, SDLoc(N));
3196	if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3197	return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3198	DAG, SDLoc(N));
3199	}
3200
3201	return SDValue();
3202	}
3203
3204	// Since it may not be valid to emit a fold to zero for vector initializers
3205	// check if we can before folding.
3206	static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3207	SelectionDAG &DAG, bool LegalOperations) {
3208	if (!VT.isVector())
3209	return DAG.getConstant(0, DL, VT);
3210	if (!LegalOperations \|\| TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3211	return DAG.getConstant(0, DL, VT);
3212	return SDValue();
3213	}
3214
3215	SDValue DAGCombiner::visitSUB(SDNode *N) {
3216	SDValue N0 = N->getOperand(0);
3217	SDValue N1 = N->getOperand(1);
3218	EVT VT = N0.getValueType();
3219	SDLoc DL(N);
3220
3221	// fold vector ops
3222	if (VT.isVector()) {
3223	if (SDValue FoldedVOp = SimplifyVBinOp(N))
3224	return FoldedVOp;
3225
3226	// fold (sub x, 0) -> x, vector edition
3227	if (ISD::isBuildVectorAllZeros(N1.getNode()))
3228	return N0;
3229	}
3230
3231	// fold (sub x, x) -> 0
3232	// FIXME: Refactor this and xor and other similar operations together.
3233	if (N0 == N1)
3234	return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3235
3236	// fold (sub c1, c2) -> c3
3237	if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3238	return C;
3239
3240	if (SDValue NewSel = foldBinOpIntoSelect(N))
3241	return NewSel;
3242
3243	ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3244
3245	// fold (sub x, c) -> (add x, -c)
3246	if (N1C) {
3247	return DAG.getNode(ISD::ADD, DL, VT, N0,
3248	DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3249	}
3250
3251	if (isNullOrNullSplat(N0)) {
3252	unsigned BitWidth = VT.getScalarSizeInBits();
3253	// Right-shifting everything out but the sign bit followed by negation is
3254	// the same as flipping arithmetic/logical shift type without the negation:
3255	// -(X >>u 31) -> (X >>s 31)
3256	// -(X >>s 31) -> (X >>u 31)
3257	if (N1->getOpcode() == ISD::SRA \|\| N1->getOpcode() == ISD::SRL) {
3258	ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3259	if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3260	auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3261	if (!LegalOperations \|\| TLI.isOperationLegal(NewSh, VT))
3262	return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3263	}
3264	}
3265
3266	// 0 - X --> 0 if the sub is NUW.
3267	if (N->getFlags().hasNoUnsignedWrap())
3268	return N0;
3269
3270	if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3271	// N1 is either 0 or the minimum signed value. If the sub is NSW, then
3272	// N1 must be 0 because negating the minimum signed value is undefined.
3273	if (N->getFlags().hasNoSignedWrap())
3274	return N0;
3275
3276	// 0 - X --> X if X is 0 or the minimum signed value.
3277	return N1;
3278	}
3279
3280	// Convert 0 - abs(x).
3281	SDValue Result;
3282	if (N1->getOpcode() == ISD::ABS &&
3283	!TLI.isOperationLegalOrCustom(ISD::ABS, VT) &&
3284	TLI.expandABS(N1.getNode(), Result, DAG, true))
3285	return Result;
3286	}
3287
3288	// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3289	if (isAllOnesOrAllOnesSplat(N0))
3290	return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3291
3292	// fold (A - (0-B)) -> A+B
3293	if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3294	return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3295
3296	// fold A-(A-B) -> B
3297	if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3298	return N1.getOperand(1);
3299
3300	// fold (A+B)-A -> B
3301	if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3302	return N0.getOperand(1);
3303
3304	// fold (A+B)-B -> A
3305	if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3306	return N0.getOperand(0);
3307
3308	// fold (A+C1)-C2 -> A+(C1-C2)
3309	if (N0.getOpcode() == ISD::ADD &&
3310	isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3311	isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3312	SDValue NewC =
3313	DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
3314	assert(NewC && "Constant folding failed")((NewC && "Constant folding failed") ? static_cast< void> (0) : __assert_fail ("NewC && \"Constant folding failed\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 3314, __PRETTY_FUNCTION__));
3315	return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3316	}
3317
3318	// fold C2-(A+C1) -> (C2-C1)-A
3319	if (N1.getOpcode() == ISD::ADD) {
3320	SDValue N11 = N1.getOperand(1);
3321	if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3322	isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3323	SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
3324	assert(NewC && "Constant folding failed")((NewC && "Constant folding failed") ? static_cast< void> (0) : __assert_fail ("NewC && \"Constant folding failed\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 3324, __PRETTY_FUNCTION__));
3325	return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3326	}
3327	}
3328
3329	// fold (A-C1)-C2 -> A-(C1+C2)
3330	if (N0.getOpcode() == ISD::SUB &&
3331	isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3332	isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3333	SDValue NewC =
3334	DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
3335	assert(NewC && "Constant folding failed")((NewC && "Constant folding failed") ? static_cast< void> (0) : __assert_fail ("NewC && \"Constant folding failed\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 3335, __PRETTY_FUNCTION__));
3336	return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3337	}
3338
3339	// fold (c1-A)-c2 -> (c1-c2)-A
3340	if (N0.getOpcode() == ISD::SUB &&
3341	isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3342	isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3343	SDValue NewC =
3344	DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
3345	assert(NewC && "Constant folding failed")((NewC && "Constant folding failed") ? static_cast< void> (0) : __assert_fail ("NewC && \"Constant folding failed\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 3345, __PRETTY_FUNCTION__));
3346	return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3347	}
3348
3349	// fold ((A+(B+or-C))-B) -> A+or-C
3350	if (N0.getOpcode() == ISD::ADD &&
3351	(N0.getOperand(1).getOpcode() == ISD::SUB \|\|
3352	N0.getOperand(1).getOpcode() == ISD::ADD) &&
3353	N0.getOperand(1).getOperand(0) == N1)
3354	return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3355	N0.getOperand(1).getOperand(1));
3356
3357	// fold ((A+(C+B))-B) -> A+C
3358	if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3359	N0.getOperand(1).getOperand(1) == N1)
3360	return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3361	N0.getOperand(1).getOperand(0));
3362
3363	// fold ((A-(B-C))-C) -> A-B
3364	if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3365	N0.getOperand(1).getOperand(1) == N1)
3366	return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3367	N0.getOperand(1).getOperand(0));
3368
3369	// fold (A-(B-C)) -> A+(C-B)
3370	if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3371	return DAG.getNode(ISD::ADD, DL, VT, N0,
3372	DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3373	N1.getOperand(0)));
3374
3375	// A - (A & B) -> A & (~B)
3376	if (N1.getOpcode() == ISD::AND) {
3377	SDValue A = N1.getOperand(0);
3378	SDValue B = N1.getOperand(1);
3379	if (A != N0)
3380	std::swap(A, B);
3381	if (A == N0 &&
3382	(N1.hasOneUse() \|\| isConstantOrConstantVector(B, /NoOpaques=/true))) {
3383	SDValue InvB =
3384	DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3385	return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3386	}
3387	}
3388
3389	// fold (X - (-Y * Z)) -> (X + (Y * Z))
3390	if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3391	if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3392	isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3393	SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3394	N1.getOperand(0).getOperand(1),
3395	N1.getOperand(1));
3396	return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3397	}
3398	if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3399	isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3400	SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3401	N1.getOperand(0),
3402	N1.getOperand(1).getOperand(1));
3403	return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3404	}
3405	}
3406
3407	// If either operand of a sub is undef, the result is undef
3408	if (N0.isUndef())
3409	return N0;
3410	if (N1.isUndef())
3411	return N1;
3412
3413	if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3414	return V;
3415
3416	if (SDValue V = foldAddSubOfSignBit(N, DAG))
3417	return V;
3418
3419	if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3420	return V;
3421
3422	if (SDValue V = foldSubToUSubSat(VT, N))
3423	return V;
3424
3425	// (x - y) - 1 -> add (xor y, -1), x
3426	if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3427	SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3428	DAG.getAllOnesConstant(DL, VT));
3429	return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3430	}
3431
3432	// Look for:
3433	// sub y, (xor x, -1)
3434	// And if the target does not like this form then turn into:
3435	// add (add x, y), 1
3436	if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3437	SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3438	return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3439	}
3440
3441	// Hoist one-use addition by non-opaque constant:
3442	// (x + C) - y -> (x - y) + C
3443	if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3444	isConstantOrConstantVector(N0.getOperand(1), /NoOpaques=/true)) {
3445	SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3446	return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3447	}
3448	// y - (x + C) -> (y - x) - C
3449	if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3450	isConstantOrConstantVector(N1.getOperand(1), /NoOpaques=/true)) {
3451	SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3452	return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3453	}
3454	// (x - C) - y -> (x - y) - C
3455	// This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3456	if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3457	isConstantOrConstantVector(N0.getOperand(1), /NoOpaques=/true)) {
3458	SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3459	return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3460	}
3461	// (C - x) - y -> C - (x + y)
3462	if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3463	isConstantOrConstantVector(N0.getOperand(0), /NoOpaques=/true)) {
3464	SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3465	return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3466	}
3467
3468	// If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3469	// rather than 'sub 0/1' (the sext should get folded).
3470	// sub X, (zext i1 Y) --> add X, (sext i1 Y)
3471	if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3472	N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3473	TLI.getBooleanContents(VT) ==
3474	TargetLowering::ZeroOrNegativeOneBooleanContent) {
3475	SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3476	return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3477	}
3478
3479	// fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3480	if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3481	if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3482	SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3483	SDValue S0 = N1.getOperand(0);
3484	if ((X0 == S0 && X1 == N1) \|\| (X0 == N1 && X1 == S0))
3485	if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3486	if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
3487	return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3488	}
3489	}
3490
3491	// If the relocation model supports it, consider symbol offsets.
3492	if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3493	if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3494	// fold (sub Sym, c) -> Sym-c
3495	if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3496	return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3497	GA->getOffset() -
3498	(uint64_t)N1C->getSExtValue());
3499	// fold (sub Sym+c1, Sym+c2) -> c1-c2
3500	if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3501	if (GA->getGlobal() == GB->getGlobal())
3502	return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3503	DL, VT);
3504	}
3505
3506	// sub X, (sextinreg Y i1) -> add X, (and Y 1)
3507	if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3508	VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3509	if (TN->getVT() == MVT::i1) {
3510	SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3511	DAG.getConstant(1, DL, VT));
3512	return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3513	}
3514	}
3515
3516	// canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3517	if (N1.getOpcode() == ISD::VSCALE) {
3518	const APInt &IntVal = N1.getConstantOperandAPInt(0);
3519	return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3520	}
3521
3522	// Prefer an add for more folding potential and possibly better codegen:
3523	// sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3524	if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3525	SDValue ShAmt = N1.getOperand(1);
3526	ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3527	if (ShAmtC &&
3528	ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3529	SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3530	return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3531	}
3532	}
3533
3534	if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3535	// (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
3536	if (SDValue Carry = getAsCarry(TLI, N0)) {
3537	SDValue X = N1;
3538	SDValue Zero = DAG.getConstant(0, DL, VT);
3539	SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3540	return DAG.getNode(ISD::ADDCARRY, DL,
3541	DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3542	Carry);
3543	}
3544	}
3545
3546	return SDValue();
3547	}
3548
3549	SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3550	SDValue N0 = N->getOperand(0);
3551	SDValue N1 = N->getOperand(1);
3552	EVT VT = N0.getValueType();
3553	SDLoc DL(N);
3554
3555	// fold vector ops
3556	if (VT.isVector()) {
3557	// TODO SimplifyVBinOp
3558
3559	// fold (sub_sat x, 0) -> x, vector edition
3560	if (ISD::isBuildVectorAllZeros(N1.getNode()))
3561	return N0;
3562	}
3563
3564	// fold (sub_sat x, undef) -> 0
3565	if (N0.isUndef() \|\| N1.isUndef())
3566	return DAG.getConstant(0, DL, VT);
3567
3568	// fold (sub_sat x, x) -> 0
3569	if (N0 == N1)
3570	return DAG.getConstant(0, DL, VT);
3571
3572	// fold (sub_sat c1, c2) -> c3
3573	if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3574	return C;
3575
3576	// fold (sub_sat x, 0) -> x
3577	if (isNullConstant(N1))
3578	return N0;
3579
3580	return SDValue();
3581	}
3582
3583	SDValue DAGCombiner::visitSUBC(SDNode *N) {
3584	SDValue N0 = N->getOperand(0);
3585	SDValue N1 = N->getOperand(1);
3586	EVT VT = N0.getValueType();
3587	SDLoc DL(N);
3588
3589	// If the flag result is dead, turn this into an SUB.
3590	if (!N->hasAnyUseOfValue(1))
3591	return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3592	DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3593
3594	// fold (subc x, x) -> 0 + no borrow
3595	if (N0 == N1)
3596	return CombineTo(N, DAG.getConstant(0, DL, VT),
3597	DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3598
3599	// fold (subc x, 0) -> x + no borrow
3600	if (isNullConstant(N1))
3601	return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3602
3603	// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3604	if (isAllOnesConstant(N0))
3605	return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3606	DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3607
3608	return SDValue();
3609	}
3610
3611	SDValue DAGCombiner::visitSUBO(SDNode *N) {
3612	SDValue N0 = N->getOperand(0);
3613	SDValue N1 = N->getOperand(1);
3614	EVT VT = N0.getValueType();
3615	bool IsSigned = (ISD::SSUBO == N->getOpcode());
3616
3617	EVT CarryVT = N->getValueType(1);
3618	SDLoc DL(N);
3619
3620	// If the flag result is dead, turn this into an SUB.
3621	if (!N->hasAnyUseOfValue(1))
3622	return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3623	DAG.getUNDEF(CarryVT));
3624
3625	// fold (subo x, x) -> 0 + no borrow
3626	if (N0 == N1)
3627	return CombineTo(N, DAG.getConstant(0, DL, VT),
3628	DAG.getConstant(0, DL, CarryVT));
3629
3630	ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3631
3632	// fold (subox, c) -> (addo x, -c)
3633	if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3634	return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3635	DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3636	}
3637
3638	// fold (subo x, 0) -> x + no borrow
3639	if (isNullOrNullSplat(N1))
3640	return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3641
3642	// Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3643	if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3644	return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3645	DAG.getConstant(0, DL, CarryVT));
3646
3647	return SDValue();
3648	}
3649
3650	SDValue DAGCombiner::visitSUBE(SDNode *N) {
3651	SDValue N0 = N->getOperand(0);
3652	SDValue N1 = N->getOperand(1);
3653	SDValue CarryIn = N->getOperand(2);
3654
3655	// fold (sube x, y, false) -> (subc x, y)
3656	if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3657	return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3658
3659	return SDValue();
3660	}
3661
3662	SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3663	SDValue N0 = N->getOperand(0);
3664	SDValue N1 = N->getOperand(1);
3665	SDValue CarryIn = N->getOperand(2);
3666
3667	// fold (subcarry x, y, false) -> (usubo x, y)
3668	if (isNullConstant(CarryIn)) {
3669	if (!LegalOperations \|\|
3670	TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3671	return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3672	}
3673
3674	return SDValue();
3675	}
3676
3677	SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
3678	SDValue N0 = N->getOperand(0);
3679	SDValue N1 = N->getOperand(1);
3680	SDValue CarryIn = N->getOperand(2);
3681
3682	// fold (ssubo_carry x, y, false) -> (ssubo x, y)
3683	if (isNullConstant(CarryIn)) {
3684	if (!LegalOperations \|\|
3685	TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
3686	return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
3687	}
3688
3689	return SDValue();
3690	}
3691
3692	// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3693	// UMULFIXSAT here.
3694	SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3695	SDValue N0 = N->getOperand(0);
3696	SDValue N1 = N->getOperand(1);
3697	SDValue Scale = N->getOperand(2);
3698	EVT VT = N0.getValueType();
3699
3700	// fold (mulfix x, undef, scale) -> 0
3701	if (N0.isUndef() \|\| N1.isUndef())
3702	return DAG.getConstant(0, SDLoc(N), VT);
3703
3704	// Canonicalize constant to RHS (vector doesn't have to splat)
3705	if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3706	!DAG.isConstantIntBuildVectorOrConstantInt(N1))
3707	return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3708
3709	// fold (mulfix x, 0, scale) -> 0
3710	if (isNullConstant(N1))
3711	return DAG.getConstant(0, SDLoc(N), VT);
3712
3713	return SDValue();
3714	}
3715
3716	SDValue DAGCombiner::visitMUL(SDNode *N) {
3717	SDValue N0 = N->getOperand(0);
3718	SDValue N1 = N->getOperand(1);
3719	EVT VT = N0.getValueType();
3720
3721	// fold (mul x, undef) -> 0
3722	if (N0.isUndef() \|\| N1.isUndef())
3723	return DAG.getConstant(0, SDLoc(N), VT);
3724
3725	bool N1IsConst = false;
3726	bool N1IsOpaqueConst = false;
3727	APInt ConstValue1;
3728
3729	// fold vector ops
3730	if (VT.isVector()) {
3731	if (SDValue FoldedVOp = SimplifyVBinOp(N))
3732	return FoldedVOp;
3733
3734	N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3735	assert((!N1IsConst \|\|(((!N1IsConst \|\| ConstValue1.getBitWidth() == VT.getScalarSizeInBits ()) && "Splat APInt should be element width") ? static_cast <void> (0) : __assert_fail ("(!N1IsConst \|\| ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && \"Splat APInt should be element width\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 3737, __PRETTY_FUNCTION__))
3736	ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&(((!N1IsConst \|\| ConstValue1.getBitWidth() == VT.getScalarSizeInBits ()) && "Splat APInt should be element width") ? static_cast <void> (0) : __assert_fail ("(!N1IsConst \|\| ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && \"Splat APInt should be element width\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 3737, __PRETTY_FUNCTION__))
3737	"Splat APInt should be element width")(((!N1IsConst \|\| ConstValue1.getBitWidth() == VT.getScalarSizeInBits ()) && "Splat APInt should be element width") ? static_cast <void> (0) : __assert_fail ("(!N1IsConst \|\| ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && \"Splat APInt should be element width\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 3737, __PRETTY_FUNCTION__));
3738	} else {
3739	N1IsConst = isa<ConstantSDNode>(N1);
3740	if (N1IsConst) {
3741	ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3742	N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3743	}
3744	}
3745
3746	// fold (mul c1, c2) -> c1*c2
3747	if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
3748	return C;
3749
3750	// canonicalize constant to RHS (vector doesn't have to splat)
3751	if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3752	!DAG.isConstantIntBuildVectorOrConstantInt(N1))
3753	return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3754
3755	// fold (mul x, 0) -> 0
3756	if (N1IsConst && ConstValue1.isNullValue())
3757	return N1;
3758
3759	// fold (mul x, 1) -> x
3760	if (N1IsConst && ConstValue1.isOneValue())
3761	return N0;
3762
3763	if (SDValue NewSel = foldBinOpIntoSelect(N))
3764	return NewSel;
3765
3766	// fold (mul x, -1) -> 0-x
3767	if (N1IsConst && ConstValue1.isAllOnesValue()) {
3768	SDLoc DL(N);
3769	return DAG.getNode(ISD::SUB, DL, VT,
3770	DAG.getConstant(0, DL, VT), N0);
3771	}
3772
3773	// fold (mul x, (1 << c)) -> x << c
3774	if (isConstantOrConstantVector(N1, /NoOpaques/ true) &&
3775	DAG.isKnownToBeAPowerOfTwo(N1) &&
3776	(!VT.isVector() \|\| Level <= AfterLegalizeVectorOps)) {
3777	SDLoc DL(N);
3778	SDValue LogBase2 = BuildLogBase2(N1, DL);
3779	EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3780	SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3781	return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3782	}
3783
3784	// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3785	if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3786	unsigned Log2Val = (-ConstValue1).logBase2();
3787	SDLoc DL(N);
3788	// FIXME: If the input is something that is easily negated (e.g. a
3789	// single-use add), we should put the negate there.
3790	return DAG.getNode(ISD::SUB, DL, VT,
3791	DAG.getConstant(0, DL, VT),
3792	DAG.getNode(ISD::SHL, DL, VT, N0,
3793	DAG.getConstant(Log2Val, DL,
3794	getShiftAmountTy(N0.getValueType()))));
3795	}
3796
3797	// Try to transform:
3798	// (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3799	// mul x, (2^N + 1) --> add (shl x, N), x
3800	// mul x, (2^N - 1) --> sub (shl x, N), x
3801	// Examples: x * 33 --> (x << 5) + x
3802	// x * 15 --> (x << 4) - x
3803	// x * -33 --> -((x << 5) + x)
3804	// x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3805	// (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
3806	// mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
3807	// mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
3808	// Examples: x * 0x8800 --> (x << 15) + (x << 11)
3809	// x * 0xf800 --> (x << 16) - (x << 11)
3810	// x * -0x8800 --> -((x << 15) + (x << 11))
3811	// x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
3812	if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3813	// TODO: We could handle more general decomposition of any constant by
3814	// having the target set a limit on number of ops and making a
3815	// callback to determine that sequence (similar to sqrt expansion).
3816	unsigned MathOp = ISD::DELETED_NODE;
3817	APInt MulC = ConstValue1.abs();
3818	// The constant `2` should be treated as (2^0 + 1).
3819	unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
3820	MulC.lshrInPlace(TZeros);
3821	if ((MulC - 1).isPowerOf2())
3822	MathOp = ISD::ADD;
3823	else if ((MulC + 1).isPowerOf2())
3824	MathOp = ISD::SUB;
3825
3826	if (MathOp != ISD::DELETED_NODE) {
3827	unsigned ShAmt =
3828	MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3829	ShAmt += TZeros;
3830	assert(ShAmt < VT.getScalarSizeInBits() &&((ShAmt < VT.getScalarSizeInBits() && "multiply-by-constant generated out of bounds shift" ) ? static_cast<void> (0) : __assert_fail ("ShAmt < VT.getScalarSizeInBits() && \"multiply-by-constant generated out of bounds shift\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 3831, __PRETTY_FUNCTION__))
3831	"multiply-by-constant generated out of bounds shift")((ShAmt < VT.getScalarSizeInBits() && "multiply-by-constant generated out of bounds shift" ) ? static_cast<void> (0) : __assert_fail ("ShAmt < VT.getScalarSizeInBits() && \"multiply-by-constant generated out of bounds shift\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 3831, __PRETTY_FUNCTION__));
3832	SDLoc DL(N);
3833	SDValue Shl =
3834	DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3835	SDValue R =
3836	TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
3837	DAG.getNode(ISD::SHL, DL, VT, N0,
3838	DAG.getConstant(TZeros, DL, VT)))
3839	: DAG.getNode(MathOp, DL, VT, Shl, N0);
3840	if (ConstValue1.isNegative())
3841	R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3842	return R;
3843	}
3844	}
3845
3846	// (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3847	if (N0.getOpcode() == ISD::SHL &&
3848	isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3849	isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3850	SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3851	if (isConstantOrConstantVector(C3))
3852	return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3853	}
3854
3855	// Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3856	// use.
3857	{
3858	SDValue Sh(nullptr, 0), Y(nullptr, 0);
3859
3860	// Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
3861	if (N0.getOpcode() == ISD::SHL &&
3862	isConstantOrConstantVector(N0.getOperand(1)) &&
3863	N0.getNode()->hasOneUse()) {
3864	Sh = N0; Y = N1;
3865	} else if (N1.getOpcode() == ISD::SHL &&
3866	isConstantOrConstantVector(N1.getOperand(1)) &&
3867	N1.getNode()->hasOneUse()) {
3868	Sh = N1; Y = N0;
3869	}
3870
3871	if (Sh.getNode()) {
3872	SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3873	return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3874	}
3875	}
3876
3877	// fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3878	if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3879	N0.getOpcode() == ISD::ADD &&
3880	DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
3881	isMulAddWithConstProfitable(N, N0, N1))
3882	return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3883	DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3884	N0.getOperand(0), N1),
3885	DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3886	N0.getOperand(1), N1));
3887
3888	// Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
3889	if (N0.getOpcode() == ISD::VSCALE)
3890	if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
3891	const APInt &C0 = N0.getConstantOperandAPInt(0);
3892	const APInt &C1 = NC1->getAPIntValue();
3893	return DAG.getVScale(SDLoc(N), VT, C0 * C1);
3894	}
3895
3896	// Fold ((mul x, 0/undef) -> 0,
3897	// (mul x, 1) -> x) -> x)
3898	// -> and(x, mask)
3899	// We can replace vectors with '0' and '1' factors with a clearing mask.
3900	if (VT.isFixedLengthVector()) {
3901	unsigned NumElts = VT.getVectorNumElements();
3902	SmallBitVector ClearMask;
3903	ClearMask.reserve(NumElts);
3904	auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
3905	if (!V \|\| V->isNullValue()) {
3906	ClearMask.push_back(true);
3907	return true;
3908	}
3909	ClearMask.push_back(false);
3910	return V->isOne();
3911	};
3912	if ((!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
3913	ISD::matchUnaryPredicate(N1, IsClearMask, /AllowUndefs/ true)) {
3914	assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector")((N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector" ) ? static_cast<void> (0) : __assert_fail ("N1.getOpcode() == ISD::BUILD_VECTOR && \"Unknown constant vector\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 3914, __PRETTY_FUNCTION__));
3915	SDLoc DL(N);
3916	EVT LegalSVT = N1.getOperand(0).getValueType();
3917	SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
3918	SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
3919	SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
3920	for (unsigned I = 0; I != NumElts; ++I)
3921	if (ClearMask[I])
3922	Mask[I] = Zero;
3923	return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
3924	}
3925	}
3926
3927	// reassociate mul
3928	if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3929	return RMUL;
3930
3931	return SDValue();
3932	}
3933
3934	/// Return true if divmod libcall is available.
3935	static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3936	const TargetLowering &TLI) {
3937	RTLIB::Libcall LC;
3938	EVT NodeType = Node->getValueType(0);
3939	if (!NodeType.isSimple())
3940	return false;
3941	switch (NodeType.getSimpleVT().SimpleTy) {
3942	default: return false; // No libcall for vector types.
3943	case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
3944	case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3945	case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3946	case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3947	case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3948	}
3949
3950	return TLI.getLibcallName(LC) != nullptr;
3951	}
3952
3953	/// Issue divrem if both quotient and remainder are needed.
3954	SDValue DAGCombiner::useDivRem(SDNode *Node) {
3955	if (Node->use_empty())
3956	return SDValue(); // This is a dead node, leave it alone.
3957
3958	unsigned Opcode = Node->getOpcode();
3959	bool isSigned = (Opcode == ISD::SDIV) \|\| (Opcode == ISD::SREM);
3960	unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3961
3962	// DivMod lib calls can still work on non-legal types if using lib-calls.
3963	EVT VT = Node->getValueType(0);
3964	if (VT.isVector() \|\| !VT.isInteger())
3965	return SDValue();
3966
3967	if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3968	return SDValue();
3969
3970	// If DIVREM is going to get expanded into a libcall,
3971	// but there is no libcall available, then don't combine.
3972	if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3973	!isDivRemLibcallAvailable(Node, isSigned, TLI))
3974	return SDValue();
3975
3976	// If div is legal, it's better to do the normal expansion
3977	unsigned OtherOpcode = 0;
3978	if ((Opcode == ISD::SDIV) \|\| (Opcode == ISD::UDIV)) {
3979	OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3980	if (TLI.isOperationLegalOrCustom(Opcode, VT))
3981	return SDValue();
3982	} else {
3983	OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3984	if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3985	return SDValue();
3986	}
3987
3988	SDValue Op0 = Node->getOperand(0);
3989	SDValue Op1 = Node->getOperand(1);
3990	SDValue combined;
3991	for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3992	UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3993	SDNode User = UI;
3994	if (User == Node \|\| User->getOpcode() == ISD::DELETED_NODE \|\|
3995	User->use_empty())
3996	continue;
3997	// Convert the other matching node(s), too;
3998	// otherwise, the DIVREM may get target-legalized into something
3999	// target-specific that we won't be able to recognize.
4000	unsigned UserOpc = User->getOpcode();
4001	if ((UserOpc == Opcode \|\| UserOpc == OtherOpcode \|\| UserOpc == DivRemOpc) &&
4002	User->getOperand(0) == Op0 &&
4003	User->getOperand(1) == Op1) {
4004	if (!combined) {
4005	if (UserOpc == OtherOpcode) {
4006	SDVTList VTs = DAG.getVTList(VT, VT);
4007	combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4008	} else if (UserOpc == DivRemOpc) {
4009	combined = SDValue(User, 0);
4010	} else {
4011	assert(UserOpc == Opcode)((UserOpc == Opcode) ? static_cast<void> (0) : __assert_fail ("UserOpc == Opcode", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 4011, __PRETTY_FUNCTION__));
4012	continue;
4013	}
4014	}
4015	if (UserOpc == ISD::SDIV \|\| UserOpc == ISD::UDIV)
4016	CombineTo(User, combined);
4017	else if (UserOpc == ISD::SREM \|\| UserOpc == ISD::UREM)
4018	CombineTo(User, combined.getValue(1));
4019	}
4020	}
4021	return combined;
4022	}
4023
4024	static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
4025	SDValue N0 = N->getOperand(0);
4026	SDValue N1 = N->getOperand(1);
4027	EVT VT = N->getValueType(0);
4028	SDLoc DL(N);
4029
4030	unsigned Opc = N->getOpcode();
4031	bool IsDiv = (ISD::SDIV == Opc) \|\| (ISD::UDIV == Opc);
4032	ConstantSDNode *N1C = isConstOrConstSplat(N1);
4033
4034	// X / undef -> undef
4035	// X % undef -> undef
4036	// X / 0 -> undef
4037	// X % 0 -> undef
4038	// NOTE: This includes vectors where any divisor element is zero/undef.
4039	if (DAG.isUndef(Opc, {N0, N1}))
4040	return DAG.getUNDEF(VT);
4041
4042	// undef / X -> 0
4043	// undef % X -> 0
4044	if (N0.isUndef())
4045	return DAG.getConstant(0, DL, VT);
4046
4047	// 0 / X -> 0
4048	// 0 % X -> 0
4049	ConstantSDNode *N0C = isConstOrConstSplat(N0);
4050	if (N0C && N0C->isNullValue())
4051	return N0;
4052
4053	// X / X -> 1
4054	// X % X -> 0
4055	if (N0 == N1)
4056	return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4057
4058	// X / 1 -> X
4059	// X % 1 -> 0
4060	// If this is a boolean op (single-bit element type), we can't have
4061	// division-by-zero or remainder-by-zero, so assume the divisor is 1.
4062	// TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4063	// it's a 1.
4064	if ((N1C && N1C->isOne()) \|\| (VT.getScalarType() == MVT::i1))
4065	return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4066
4067	return SDValue();
4068	}
4069
4070	SDValue DAGCombiner::visitSDIV(SDNode *N) {
4071	SDValue N0 = N->getOperand(0);
4072	SDValue N1 = N->getOperand(1);
4073	EVT VT = N->getValueType(0);
4074	EVT CCVT = getSetCCResultType(VT);
4075
4076	// fold vector ops
4077	if (VT.isVector())
4078	if (SDValue FoldedVOp = SimplifyVBinOp(N))
4079	return FoldedVOp;
4080
4081	SDLoc DL(N);
4082
4083	// fold (sdiv c1, c2) -> c1/c2
4084	ConstantSDNode *N1C = isConstOrConstSplat(N1);
4085	if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4086	return C;
4087
4088	// fold (sdiv X, -1) -> 0-X
4089	if (N1C && N1C->isAllOnesValue())
4090	return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4091
4092	// fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4093	if (N1C && N1C->getAPIntValue().isMinSignedValue())
4094	return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4095	DAG.getConstant(1, DL, VT),
4096	DAG.getConstant(0, DL, VT));
4097
4098	if (SDValue V = simplifyDivRem(N, DAG))
4099	return V;
4100
4101	if (SDValue NewSel = foldBinOpIntoSelect(N))
4102	return NewSel;
4103
4104	// If we know the sign bits of both operands are zero, strength reduce to a
4105	// udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
4106	if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4107	return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4108
4109	if (SDValue V = visitSDIVLike(N0, N1, N)) {
4110	// If the corresponding remainder node exists, update its users with
4111	// (Dividend - (Quotient * Divisor).
4112	if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4113	{ N0, N1 })) {
4114	SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4115	SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4116	AddToWorklist(Mul.getNode());
4117	AddToWorklist(Sub.getNode());
4118	CombineTo(RemNode, Sub);
4119	}
4120	return V;
4121	}
4122
4123	// sdiv, srem -> sdivrem
4124	// If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4125	// true. Otherwise, we break the simplification logic in visitREM().
4126	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4127	if (!N1C \|\| TLI.isIntDivCheap(N->getValueType(0), Attr))
4128	if (SDValue DivRem = useDivRem(N))
4129	return DivRem;
4130
4131	return SDValue();
4132	}
4133
4134	SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4135	SDLoc DL(N);
4136	EVT VT = N->getValueType(0);
4137	EVT CCVT = getSetCCResultType(VT);
4138	unsigned BitWidth = VT.getScalarSizeInBits();
4139
4140	// Helper for determining whether a value is a power-2 constant scalar or a
4141	// vector of such elements.
4142	auto IsPowerOfTwo = [](ConstantSDNode *C) {
4143	if (C->isNullValue() \|\| C->isOpaque())
4144	return false;
4145	if (C->getAPIntValue().isPowerOf2())
4146	return true;
4147	if ((-C->getAPIntValue()).isPowerOf2())
4148	return true;
4149	return false;
4150	};
4151
4152	// fold (sdiv X, pow2) -> simple ops after legalize
4153	// FIXME: We check for the exact bit here because the generic lowering gives
4154	// better results in that case. The target-specific lowering should learn how
4155	// to handle exact sdivs efficiently.
4156	if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
4157	// Target-specific implementation of sdiv x, pow2.
4158	if (SDValue Res = BuildSDIVPow2(N))
4159	return Res;
4160
4161	// Create constants that are functions of the shift amount value.
4162	EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4163	SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4164	SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4165	C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4166	SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4167	if (!isConstantOrConstantVector(Inexact))
4168	return SDValue();
4169
4170	// Splat the sign bit into the register
4171	SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4172	DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4173	AddToWorklist(Sign.getNode());
4174
4175	// Add (N0 < 0) ? abs2 - 1 : 0;
4176	SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4177	AddToWorklist(Srl.getNode());
4178	SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4179	AddToWorklist(Add.getNode());
4180	SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4181	AddToWorklist(Sra.getNode());
4182
4183	// Special case: (sdiv X, 1) -> X
4184	// Special Case: (sdiv X, -1) -> 0-X
4185	SDValue One = DAG.getConstant(1, DL, VT);
4186	SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4187	SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4188	SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4189	SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4190	Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4191
4192	// If dividing by a positive value, we're done. Otherwise, the result must
4193	// be negated.
4194	SDValue Zero = DAG.getConstant(0, DL, VT);
4195	SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4196
4197	// FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4198	SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4199	SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4200	return Res;
4201	}
4202
4203	// If integer divide is expensive and we satisfy the requirements, emit an
4204	// alternate sequence. Targets may check function attributes for size/speed
4205	// trade-offs.
4206	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4207	if (isConstantOrConstantVector(N1) &&
4208	!TLI.isIntDivCheap(N->getValueType(0), Attr))
4209	if (SDValue Op = BuildSDIV(N))
4210	return Op;
4211
4212	return SDValue();
4213	}
4214
4215	SDValue DAGCombiner::visitUDIV(SDNode *N) {
4216	SDValue N0 = N->getOperand(0);
4217	SDValue N1 = N->getOperand(1);
4218	EVT VT = N->getValueType(0);
4219	EVT CCVT = getSetCCResultType(VT);
4220
4221	// fold vector ops
4222	if (VT.isVector())
4223	if (SDValue FoldedVOp = SimplifyVBinOp(N))
4224	return FoldedVOp;
4225
4226	SDLoc DL(N);
4227
4228	// fold (udiv c1, c2) -> c1/c2
4229	ConstantSDNode *N1C = isConstOrConstSplat(N1);
4230	if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4231	return C;
4232
4233	// fold (udiv X, -1) -> select(X == -1, 1, 0)
4234	if (N1C && N1C->getAPIntValue().isAllOnesValue())
4235	return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4236	DAG.getConstant(1, DL, VT),
4237	DAG.getConstant(0, DL, VT));
4238
4239	if (SDValue V = simplifyDivRem(N, DAG))
4240	return V;
4241
4242	if (SDValue NewSel = foldBinOpIntoSelect(N))
4243	return NewSel;
4244
4245	if (SDValue V = visitUDIVLike(N0, N1, N)) {
4246	// If the corresponding remainder node exists, update its users with
4247	// (Dividend - (Quotient * Divisor).
4248	if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4249	{ N0, N1 })) {
4250	SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4251	SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4252	AddToWorklist(Mul.getNode());
4253	AddToWorklist(Sub.getNode());
4254	CombineTo(RemNode, Sub);
4255	}
4256	return V;
4257	}
4258
4259	// sdiv, srem -> sdivrem
4260	// If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4261	// true. Otherwise, we break the simplification logic in visitREM().
4262	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4263	if (!N1C \|\| TLI.isIntDivCheap(N->getValueType(0), Attr))
4264	if (SDValue DivRem = useDivRem(N))
4265	return DivRem;
4266
4267	return SDValue();
4268	}
4269
4270	SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4271	SDLoc DL(N);
4272	EVT VT = N->getValueType(0);
4273
4274	// fold (udiv x, (1 << c)) -> x >>u c
4275	if (isConstantOrConstantVector(N1, /NoOpaques/ true) &&
4276	DAG.isKnownToBeAPowerOfTwo(N1)) {
4277	SDValue LogBase2 = BuildLogBase2(N1, DL);
4278	AddToWorklist(LogBase2.getNode());
4279
4280	EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4281	SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4282	AddToWorklist(Trunc.getNode());
4283	return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4284	}
4285
4286	// fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4287	if (N1.getOpcode() == ISD::SHL) {
4288	SDValue N10 = N1.getOperand(0);
4289	if (isConstantOrConstantVector(N10, /NoOpaques/ true) &&
4290	DAG.isKnownToBeAPowerOfTwo(N10)) {
4291	SDValue LogBase2 = BuildLogBase2(N10, DL);
4292	AddToWorklist(LogBase2.getNode());
4293
4294	EVT ADDVT = N1.getOperand(1).getValueType();
4295	SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4296	AddToWorklist(Trunc.getNode());
4297	SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4298	AddToWorklist(Add.getNode());
4299	return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4300	}
4301	}
4302
4303	// fold (udiv x, c) -> alternate
4304	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4305	if (isConstantOrConstantVector(N1) &&
4306	!TLI.isIntDivCheap(N->getValueType(0), Attr))
4307	if (SDValue Op = BuildUDIV(N))
4308	return Op;
4309
4310	return SDValue();
4311	}
4312
4313	// handles ISD::SREM and ISD::UREM
4314	SDValue DAGCombiner::visitREM(SDNode *N) {
4315	unsigned Opcode = N->getOpcode();
4316	SDValue N0 = N->getOperand(0);
4317	SDValue N1 = N->getOperand(1);
4318	EVT VT = N->getValueType(0);
4319	EVT CCVT = getSetCCResultType(VT);
4320
4321	bool isSigned = (Opcode == ISD::SREM);
4322	SDLoc DL(N);
4323
4324	// fold (rem c1, c2) -> c1%c2
4325	ConstantSDNode *N1C = isConstOrConstSplat(N1);
4326	if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4327	return C;
4328
4329	// fold (urem X, -1) -> select(X == -1, 0, x)
4330	if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
4331	return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4332	DAG.getConstant(0, DL, VT), N0);
4333
4334	if (SDValue V = simplifyDivRem(N, DAG))
4335	return V;
4336
4337	if (SDValue NewSel = foldBinOpIntoSelect(N))
4338	return NewSel;
4339
4340	if (isSigned) {
4341	// If we know the sign bits of both operands are zero, strength reduce to a
4342	// urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4343	if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4344	return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4345	} else {
4346	if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4347	// fold (urem x, pow2) -> (and x, pow2-1)
4348	SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4349	SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4350	AddToWorklist(Add.getNode());
4351	return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4352	}
4353	if (N1.getOpcode() == ISD::SHL &&
4354	DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
4355	// fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4356	SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4357	SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4358	AddToWorklist(Add.getNode());
4359	return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4360	}
4361	}
4362
4363	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4364
4365	// If X/C can be simplified by the division-by-constant logic, lower
4366	// X%C to the equivalent of X-X/C*C.
4367	// Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4368	// speculative DIV must not cause a DIVREM conversion. We guard against this
4369	// by skipping the simplification if isIntDivCheap(). When div is not cheap,
4370	// combine will not return a DIVREM. Regardless, checking cheapness here
4371	// makes sense since the simplification results in fatter code.
4372	if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4373	SDValue OptimizedDiv =
4374	isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4375	if (OptimizedDiv.getNode()) {
4376	// If the equivalent Div node also exists, update its users.
4377	unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4378	if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4379	{ N0, N1 }))
4380	CombineTo(DivNode, OptimizedDiv);
4381	SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4382	SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4383	AddToWorklist(OptimizedDiv.getNode());
4384	AddToWorklist(Mul.getNode());
4385	return Sub;
4386	}
4387	}
4388
4389	// sdiv, srem -> sdivrem
4390	if (SDValue DivRem = useDivRem(N))
4391	return DivRem.getValue(1);
4392
4393	return SDValue();
4394	}
4395
4396	SDValue DAGCombiner::visitMULHS(SDNode *N) {
4397	SDValue N0 = N->getOperand(0);
4398	SDValue N1 = N->getOperand(1);
4399	EVT VT = N->getValueType(0);
4400	SDLoc DL(N);
4401
4402	if (VT.isVector()) {
4403	// fold (mulhs x, 0) -> 0
4404	// do not return N0/N1, because undef node may exist.
4405	if (ISD::isBuildVectorAllZeros(N0.getNode()) \|\|
4406	ISD::isBuildVectorAllZeros(N1.getNode()))
4407	return DAG.getConstant(0, DL, VT);
4408	}
4409
4410	// fold (mulhs x, 0) -> 0
4411	if (isNullConstant(N1))
4412	return N1;
4413	// fold (mulhs x, 1) -> (sra x, size(x)-1)
4414	if (isOneConstant(N1))
4415	return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4416	DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
4417	getShiftAmountTy(N0.getValueType())));
4418
4419	// fold (mulhs x, undef) -> 0
4420	if (N0.isUndef() \|\| N1.isUndef())
4421	return DAG.getConstant(0, DL, VT);
4422
4423	// If the type twice as wide is legal, transform the mulhs to a wider multiply
4424	// plus a shift.
4425	if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
4426	!VT.isVector()) {
4427	MVT Simple = VT.getSimpleVT();
4428	unsigned SimpleSize = Simple.getSizeInBits();
4429	EVT NewVT = EVT::getIntegerVT(DAG.getContext(), SimpleSize2);
4430	if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4431	N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4432	N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4433	N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4434	N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4435	DAG.getConstant(SimpleSize, DL,
4436	getShiftAmountTy(N1.getValueType())));
4437	return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4438	}
4439	}
4440
4441	return SDValue();
4442	}
4443
4444	SDValue DAGCombiner::visitMULHU(SDNode *N) {
4445	SDValue N0 = N->getOperand(0);
4446	SDValue N1 = N->getOperand(1);
4447	EVT VT = N->getValueType(0);
4448	SDLoc DL(N);
4449
4450	if (VT.isVector()) {
4451	// fold (mulhu x, 0) -> 0
4452	// do not return N0/N1, because undef node may exist.
4453	if (ISD::isBuildVectorAllZeros(N0.getNode()) \|\|
4454	ISD::isBuildVectorAllZeros(N1.getNode()))
4455	return DAG.getConstant(0, DL, VT);
4456	}
4457
4458	// fold (mulhu x, 0) -> 0
4459	if (isNullConstant(N1))
4460	return N1;
4461	// fold (mulhu x, 1) -> 0
4462	if (isOneConstant(N1))
4463	return DAG.getConstant(0, DL, N0.getValueType());
4464	// fold (mulhu x, undef) -> 0
4465	if (N0.isUndef() \|\| N1.isUndef())
4466	return DAG.getConstant(0, DL, VT);
4467
4468	// fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4469	if (isConstantOrConstantVector(N1, /NoOpaques/ true) &&
4470	DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4471	unsigned NumEltBits = VT.getScalarSizeInBits();
4472	SDValue LogBase2 = BuildLogBase2(N1, DL);
4473	SDValue SRLAmt = DAG.getNode(
4474	ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4475	EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4476	SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4477	return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4478	}
4479
4480	// If the type twice as wide is legal, transform the mulhu to a wider multiply
4481	// plus a shift.
4482	if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
4483	!VT.isVector()) {
4484	MVT Simple = VT.getSimpleVT();
4485	unsigned SimpleSize = Simple.getSizeInBits();
4486	EVT NewVT = EVT::getIntegerVT(DAG.getContext(), SimpleSize2);
4487	if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4488	N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4489	N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4490	N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4491	N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4492	DAG.getConstant(SimpleSize, DL,
4493	getShiftAmountTy(N1.getValueType())));
4494	return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4495	}
4496	}
4497
4498	return SDValue();
4499	}
4500
4501	/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4502	/// give the opcodes for the two computations that are being performed. Return
4503	/// true if a simplification was made.
4504	SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4505	unsigned HiOp) {
4506	// If the high half is not needed, just compute the low half.
4507	bool HiExists = N->hasAnyUseOfValue(1);
4508	if (!HiExists && (!LegalOperations \|\|
4509	TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4510	SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4511	return CombineTo(N, Res, Res);
4512	}
4513
4514	// If the low half is not needed, just compute the high half.
4515	bool LoExists = N->hasAnyUseOfValue(0);
4516	if (!LoExists && (!LegalOperations \|\|
4517	TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4518	SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4519	return CombineTo(N, Res, Res);
4520	}
4521
4522	// If both halves are used, return as it is.
4523	if (LoExists && HiExists)
4524	return SDValue();
4525
4526	// If the two computed results can be simplified separately, separate them.
4527	if (LoExists) {
4528	SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4529	AddToWorklist(Lo.getNode());
4530	SDValue LoOpt = combine(Lo.getNode());
4531	if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4532	(!LegalOperations \|\|
4533	TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4534	return CombineTo(N, LoOpt, LoOpt);
4535	}
4536
4537	if (HiExists) {
4538	SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4539	AddToWorklist(Hi.getNode());
4540	SDValue HiOpt = combine(Hi.getNode());
4541	if (HiOpt.getNode() && HiOpt != Hi &&
4542	(!LegalOperations \|\|
4543	TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4544	return CombineTo(N, HiOpt, HiOpt);
4545	}
4546
4547	return SDValue();
4548	}
4549
4550	SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4551	if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4552	return Res;
4553
4554	EVT VT = N->getValueType(0);
4555	SDLoc DL(N);
4556
4557	// If the type is twice as wide is legal, transform the mulhu to a wider
4558	// multiply plus a shift.
4559	if (VT.isSimple() && !VT.isVector()) {
4560	MVT Simple = VT.getSimpleVT();
4561	unsigned SimpleSize = Simple.getSizeInBits();
4562	EVT NewVT = EVT::getIntegerVT(DAG.getContext(), SimpleSize2);
4563	if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4564	SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4565	SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4566	Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4567	// Compute the high part as N1.
4568	Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4569	DAG.getConstant(SimpleSize, DL,
4570	getShiftAmountTy(Lo.getValueType())));
4571	Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4572	// Compute the low part as N0.
4573	Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4574	return CombineTo(N, Lo, Hi);
4575	}
4576	}
4577
4578	return SDValue();
4579	}
4580
4581	SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4582	if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4583	return Res;
4584
4585	EVT VT = N->getValueType(0);
4586	SDLoc DL(N);
4587
4588	// (umul_lohi N0, 0) -> (0, 0)
4589	if (isNullConstant(N->getOperand(1))) {
4590	SDValue Zero = DAG.getConstant(0, DL, VT);
4591	return CombineTo(N, Zero, Zero);
4592	}
4593
4594	// (umul_lohi N0, 1) -> (N0, 0)
4595	if (isOneConstant(N->getOperand(1))) {
4596	SDValue Zero = DAG.getConstant(0, DL, VT);
4597	return CombineTo(N, N->getOperand(0), Zero);
4598	}
4599
4600	// If the type is twice as wide is legal, transform the mulhu to a wider
4601	// multiply plus a shift.
4602	if (VT.isSimple() && !VT.isVector()) {
4603	MVT Simple = VT.getSimpleVT();
4604	unsigned SimpleSize = Simple.getSizeInBits();
4605	EVT NewVT = EVT::getIntegerVT(DAG.getContext(), SimpleSize2);
4606	if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4607	SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4608	SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4609	Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4610	// Compute the high part as N1.
4611	Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4612	DAG.getConstant(SimpleSize, DL,
4613	getShiftAmountTy(Lo.getValueType())));
4614	Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4615	// Compute the low part as N0.
4616	Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4617	return CombineTo(N, Lo, Hi);
4618	}
4619	}
4620
4621	return SDValue();
4622	}
4623
4624	SDValue DAGCombiner::visitMULO(SDNode *N) {
4625	SDValue N0 = N->getOperand(0);
4626	SDValue N1 = N->getOperand(1);
4627	EVT VT = N0.getValueType();
4628	bool IsSigned = (ISD::SMULO == N->getOpcode());
4629
4630	EVT CarryVT = N->getValueType(1);
4631	SDLoc DL(N);
4632
4633	ConstantSDNode *N0C = isConstOrConstSplat(N0);
4634	ConstantSDNode *N1C = isConstOrConstSplat(N1);
4635
4636	// fold operation with constant operands.
4637	// TODO: Move this to FoldConstantArithmetic when it supports nodes with
4638	// multiple results.
4639	if (N0C && N1C) {
4640	bool Overflow;
4641	APInt Result =
4642	IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
4643	: N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
4644	return CombineTo(N, DAG.getConstant(Result, DL, VT),
4645	DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
4646	}
4647
4648	// canonicalize constant to RHS.
4649	if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4650	!DAG.isConstantIntBuildVectorOrConstantInt(N1))
4651	return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
4652
4653	// fold (mulo x, 0) -> 0 + no carry out
4654	if (isNullOrNullSplat(N1))
4655	return CombineTo(N, DAG.getConstant(0, DL, VT),
4656	DAG.getConstant(0, DL, CarryVT));
4657
4658	// (mulo x, 2) -> (addo x, x)
4659	if (N1C && N1C->getAPIntValue() == 2)
4660	return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
4661	N->getVTList(), N0, N0);
4662
4663	if (IsSigned) {
4664	// Multiplying n * m significant bits yields a result of n + m significant
4665	// bits. If the total number of significant bits does not exceed the
4666	// result bit width (minus 1), there is no overflow.
4667	unsigned SignBits = DAG.ComputeNumSignBits(N0);
4668	if (SignBits > 1)
4669	SignBits += DAG.ComputeNumSignBits(N1);
4670	if (SignBits > VT.getScalarSizeInBits() + 1)
4671	return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4672	DAG.getConstant(0, DL, CarryVT));
4673	} else {
4674	KnownBits N1Known = DAG.computeKnownBits(N1);
4675	KnownBits N0Known = DAG.computeKnownBits(N0);
4676	bool Overflow;
4677	(void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow);
4678	if (!Overflow)
4679	return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4680	DAG.getConstant(0, DL, CarryVT));
4681	}
4682
4683	return SDValue();
4684	}
4685
4686	SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
4687	SDValue N0 = N->getOperand(0);
4688	SDValue N1 = N->getOperand(1);
4689	EVT VT = N0.getValueType();
4690	unsigned Opcode = N->getOpcode();
4691
4692	// fold vector ops
4693	if (VT.isVector())
4694	if (SDValue FoldedVOp = SimplifyVBinOp(N))
4695	return FoldedVOp;
4696
4697	// fold operation with constant operands.
4698	if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1}))
4699	return C;
4700
4701	// canonicalize constant to RHS
4702	if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4703	!DAG.isConstantIntBuildVectorOrConstantInt(N1))
4704	return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
4705
4706	// Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
4707	// Only do this if the current op isn't legal and the flipped is.
4708	if (!TLI.isOperationLegal(Opcode, VT) &&
4709	(N0.isUndef() \|\| DAG.SignBitIsZero(N0)) &&
4710	(N1.isUndef() \|\| DAG.SignBitIsZero(N1))) {
4711	unsigned AltOpcode;
4712	switch (Opcode) {
4713	case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4714	case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4715	case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4716	case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4717	default: llvm_unreachable("Unknown MINMAX opcode")::llvm::llvm_unreachable_internal("Unknown MINMAX opcode", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 4717);
4718	}
4719	if (TLI.isOperationLegal(AltOpcode, VT))
4720	return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4721	}
4722
4723	// Simplify the operands using demanded-bits information.
4724	if (SimplifyDemandedBits(SDValue(N, 0)))
4725	return SDValue(N, 0);
4726
4727	return SDValue();
4728	}
4729
4730	/// If this is a bitwise logic instruction and both operands have the same
4731	/// opcode, try to sink the other opcode after the logic instruction.
4732	SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4733	SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4734	EVT VT = N0.getValueType();
4735	unsigned LogicOpcode = N->getOpcode();
4736	unsigned HandOpcode = N0.getOpcode();
4737	assert((LogicOpcode == ISD::AND \|\| LogicOpcode == ISD::OR \|\|(((LogicOpcode == ISD::AND \|\| LogicOpcode == ISD::OR \|\| LogicOpcode == ISD::XOR) && "Expected logic opcode") ? static_cast <void> (0) : __assert_fail ("(LogicOpcode == ISD::AND \|\| LogicOpcode == ISD::OR \|\| LogicOpcode == ISD::XOR) && \"Expected logic opcode\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 4738, __PRETTY_FUNCTION__))
4738	LogicOpcode == ISD::XOR) && "Expected logic opcode")(((LogicOpcode == ISD::AND \|\| LogicOpcode == ISD::OR \|\| LogicOpcode == ISD::XOR) && "Expected logic opcode") ? static_cast <void> (0) : __assert_fail ("(LogicOpcode == ISD::AND \|\| LogicOpcode == ISD::OR \|\| LogicOpcode == ISD::XOR) && \"Expected logic opcode\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 4738, __PRETTY_FUNCTION__));
4739	assert(HandOpcode == N1.getOpcode() && "Bad input!")((HandOpcode == N1.getOpcode() && "Bad input!") ? static_cast <void> (0) : __assert_fail ("HandOpcode == N1.getOpcode() && \"Bad input!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 4739, __PRETTY_FUNCTION__));
4740
4741	// Bail early if none of these transforms apply.
4742	if (N0.getNumOperands() == 0)
4743	return SDValue();
4744
4745	// FIXME: We should check number of uses of the operands to not increase
4746	// the instruction count for all transforms.
4747
4748	// Handle size-changing casts.
4749	SDValue X = N0.getOperand(0);
4750	SDValue Y = N1.getOperand(0);
4751	EVT XVT = X.getValueType();
4752	SDLoc DL(N);
4753	if (HandOpcode == ISD::ANY_EXTEND \|\| HandOpcode == ISD::ZERO_EXTEND \|\|
4754	HandOpcode == ISD::SIGN_EXTEND) {
4755	// If both operands have other uses, this transform would create extra
4756	// instructions without eliminating anything.
4757	if (!N0.hasOneUse() && !N1.hasOneUse())
4758	return SDValue();
4759	// We need matching integer source types.
4760	if (XVT != Y.getValueType())
4761	return SDValue();
4762	// Don't create an illegal op during or after legalization. Don't ever
4763	// create an unsupported vector op.
4764	if ((VT.isVector() \|\| LegalOperations) &&
4765	!TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4766	return SDValue();
4767	// Avoid infinite looping with PromoteIntBinOp.
4768	// TODO: Should we apply desirable/legal constraints to all opcodes?
4769	if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4770	!TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4771	return SDValue();
4772	// logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4773	SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4774	return DAG.getNode(HandOpcode, DL, VT, Logic);
4775	}
4776
4777	// logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4778	if (HandOpcode == ISD::TRUNCATE) {
4779	// If both operands have other uses, this transform would create extra
4780	// instructions without eliminating anything.
4781	if (!N0.hasOneUse() && !N1.hasOneUse())
4782	return SDValue();
4783	// We need matching source types.
4784	if (XVT != Y.getValueType())
4785	return SDValue();
4786	// Don't create an illegal op during or after legalization.
4787	if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
4788	return SDValue();
4789	// Be extra careful sinking truncate. If it's free, there's no benefit in
4790	// widening a binop. Also, don't create a logic op on an illegal type.
4791	if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
4792	return SDValue();
4793	if (!TLI.isTypeLegal(XVT))
4794	return SDValue();
4795	SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4796	return DAG.getNode(HandOpcode, DL, VT, Logic);
4797	}
4798
4799	// For binops SHL/SRL/SRA/AND:
4800	// logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4801	if ((HandOpcode == ISD::SHL \|\| HandOpcode == ISD::SRL \|\|
4802	HandOpcode == ISD::SRA \|\| HandOpcode == ISD::AND) &&
4803	N0.getOperand(1) == N1.getOperand(1)) {
4804	// If either operand has other uses, this transform is not an improvement.
4805	if (!N0.hasOneUse() \|\| !N1.hasOneUse())
4806	return SDValue();
4807	SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4808	return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4809	}
4810
4811	// Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4812	if (HandOpcode == ISD::BSWAP) {
4813	// If either operand has other uses, this transform is not an improvement.
4814	if (!N0.hasOneUse() \|\| !N1.hasOneUse())
4815	return SDValue();
4816	SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4817	return DAG.getNode(HandOpcode, DL, VT, Logic);
4818	}
4819
4820	// Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4821	// Only perform this optimization up until type legalization, before
4822	// LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4823	// adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4824	// we don't want to undo this promotion.
4825	// We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4826	// on scalars.
4827	if ((HandOpcode == ISD::BITCAST \|\| HandOpcode == ISD::SCALAR_TO_VECTOR) &&
4828	Level <= AfterLegalizeTypes) {
4829	// Input types must be integer and the same.
4830	if (XVT.isInteger() && XVT == Y.getValueType() &&
4831	!(VT.isVector() && TLI.isTypeLegal(VT) &&
4832	!XVT.isVector() && !TLI.isTypeLegal(XVT))) {
4833	SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4834	return DAG.getNode(HandOpcode, DL, VT, Logic);
4835	}
4836	}
4837
4838	// Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4839	// Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4840	// If both shuffles use the same mask, and both shuffle within a single
4841	// vector, then it is worthwhile to move the swizzle after the operation.
4842	// The type-legalizer generates this pattern when loading illegal
4843	// vector types from memory. In many cases this allows additional shuffle
4844	// optimizations.
4845	// There are other cases where moving the shuffle after the xor/and/or
4846	// is profitable even if shuffles don't perform a swizzle.
4847	// If both shuffles use the same mask, and both shuffles have the same first
4848	// or second operand, then it might still be profitable to move the shuffle
4849	// after the xor/and/or operation.
4850	if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4851	auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4852	auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4853	assert(X.getValueType() == Y.getValueType() &&((X.getValueType() == Y.getValueType() && "Inputs to shuffles are not the same type" ) ? static_cast<void> (0) : __assert_fail ("X.getValueType() == Y.getValueType() && \"Inputs to shuffles are not the same type\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 4854, __PRETTY_FUNCTION__))
4854	"Inputs to shuffles are not the same type")((X.getValueType() == Y.getValueType() && "Inputs to shuffles are not the same type" ) ? static_cast<void> (0) : __assert_fail ("X.getValueType() == Y.getValueType() && \"Inputs to shuffles are not the same type\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 4854, __PRETTY_FUNCTION__));
4855
4856	// Check that both shuffles use the same mask. The masks are known to be of
4857	// the same length because the result vector type is the same.
4858	// Check also that shuffles have only one use to avoid introducing extra
4859	// instructions.
4860	if (!SVN0->hasOneUse() \|\| !SVN1->hasOneUse() \|\|
4861	!SVN0->getMask().equals(SVN1->getMask()))
4862	return SDValue();
4863
4864	// Don't try to fold this node if it requires introducing a
4865	// build vector of all zeros that might be illegal at this stage.
4866	SDValue ShOp = N0.getOperand(1);
4867	if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4868	ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4869
4870	// (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4871	if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4872	SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4873	N0.getOperand(0), N1.getOperand(0));
4874	return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4875	}
4876
4877	// Don't try to fold this node if it requires introducing a
4878	// build vector of all zeros that might be illegal at this stage.
4879	ShOp = N0.getOperand(0);
4880	if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4881	ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4882
4883	// (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4884	if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4885	SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4886	N1.getOperand(1));
4887	return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4888	}
4889	}
4890
4891	return SDValue();
4892	}
4893
4894	/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4895	SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4896	const SDLoc &DL) {
4897	SDValue LL, LR, RL, RR, N0CC, N1CC;
4898	if (!isSetCCEquivalent(N0, LL, LR, N0CC) \|\|
4899	!isSetCCEquivalent(N1, RL, RR, N1CC))
4900	return SDValue();
4901
4902	assert(N0.getValueType() == N1.getValueType() &&((N0.getValueType() == N1.getValueType() && "Unexpected operand types for bitwise logic op" ) ? static_cast<void> (0) : __assert_fail ("N0.getValueType() == N1.getValueType() && \"Unexpected operand types for bitwise logic op\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 4903, __PRETTY_FUNCTION__))
4903	"Unexpected operand types for bitwise logic op")((N0.getValueType() == N1.getValueType() && "Unexpected operand types for bitwise logic op" ) ? static_cast<void> (0) : __assert_fail ("N0.getValueType() == N1.getValueType() && \"Unexpected operand types for bitwise logic op\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 4903, __PRETTY_FUNCTION__));
4904	assert(LL.getValueType() == LR.getValueType() &&((LL.getValueType() == LR.getValueType() && RL.getValueType () == RR.getValueType() && "Unexpected operand types for setcc" ) ? static_cast<void> (0) : __assert_fail ("LL.getValueType() == LR.getValueType() && RL.getValueType() == RR.getValueType() && \"Unexpected operand types for setcc\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 4906, __PRETTY_FUNCTION__))
4905	RL.getValueType() == RR.getValueType() &&((LL.getValueType() == LR.getValueType() && RL.getValueType () == RR.getValueType() && "Unexpected operand types for setcc" ) ? static_cast<void> (0) : __assert_fail ("LL.getValueType() == LR.getValueType() && RL.getValueType() == RR.getValueType() && \"Unexpected operand types for setcc\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 4906, __PRETTY_FUNCTION__))
4906	"Unexpected operand types for setcc")((LL.getValueType() == LR.getValueType() && RL.getValueType () == RR.getValueType() && "Unexpected operand types for setcc" ) ? static_cast<void> (0) : __assert_fail ("LL.getValueType() == LR.getValueType() && RL.getValueType() == RR.getValueType() && \"Unexpected operand types for setcc\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 4906, __PRETTY_FUNCTION__));
4907
4908	// If we're here post-legalization or the logic op type is not i1, the logic
4909	// op type must match a setcc result type. Also, all folds require new
4910	// operations on the left and right operands, so those types must match.
4911	EVT VT = N0.getValueType();
4912	EVT OpVT = LL.getValueType();
4913	if (LegalOperations \|\| VT.getScalarType() != MVT::i1)
4914	if (VT != getSetCCResultType(OpVT))
4915	return SDValue();
4916	if (OpVT != RL.getValueType())
4917	return SDValue();
4918
4919	ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4920	ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4921	bool IsInteger = OpVT.isInteger();
4922	if (LR == RR && CC0 == CC1 && IsInteger) {
4923	bool IsZero = isNullOrNullSplat(LR);
4924	bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4925
4926	// All bits clear?
4927	bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
4928	// All sign bits clear?
4929	bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
4930	// Any bits set?
4931	bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
4932	// Any sign bits set?
4933	bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
4934
4935	// (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
4936	// (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4937	// (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
4938	// (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
4939	if (AndEqZero \|\| AndGtNeg1 \|\| OrNeZero \|\| OrLtZero) {
4940	SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4941	AddToWorklist(Or.getNode());
4942	return DAG.getSetCC(DL, VT, Or, LR, CC1);
4943	}
4944
4945	// All bits set?
4946	bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
4947	// All sign bits set?
4948	bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
4949	// Any bits clear?
4950	bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
4951	// Any sign bits clear?
4952	bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
4953
4954	// (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4955	// (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
4956	// (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4957	// (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
4958	if (AndEqNeg1 \|\| AndLtZero \|\| OrNeNeg1 \|\| OrGtNeg1) {
4959	SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4960	AddToWorklist(And.getNode());
4961	return DAG.getSetCC(DL, VT, And, LR, CC1);
4962	}
4963	}
4964
4965	// TODO: What is the 'or' equivalent of this fold?
4966	// (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4967	if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
4968	IsInteger && CC0 == ISD::SETNE &&
4969	((isNullConstant(LR) && isAllOnesConstant(RR)) \|\|
4970	(isAllOnesConstant(LR) && isNullConstant(RR)))) {
4971	SDValue One = DAG.getConstant(1, DL, OpVT);
4972	SDValue Two = DAG.getConstant(2, DL, OpVT);
4973	SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4974	AddToWorklist(Add.getNode());
4975	return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4976	}
4977
4978	// Try more general transforms if the predicates match and the only user of
4979	// the compares is the 'and' or 'or'.
4980	if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
4981	N0.hasOneUse() && N1.hasOneUse()) {
4982	// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4983	// or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4984	if ((IsAnd && CC1 == ISD::SETEQ) \|\| (!IsAnd && CC1 == ISD::SETNE)) {
4985	SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4986	SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4987	SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4988	SDValue Zero = DAG.getConstant(0, DL, OpVT);
4989	return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4990	}
4991
4992	// Turn compare of constants whose difference is 1 bit into add+and+setcc.
4993	// TODO - support non-uniform vector amounts.
4994	if ((IsAnd && CC1 == ISD::SETNE) \|\| (!IsAnd && CC1 == ISD::SETEQ)) {
4995	// Match a shared variable operand and 2 non-opaque constant operands.
4996	ConstantSDNode *C0 = isConstOrConstSplat(LR);
4997	ConstantSDNode *C1 = isConstOrConstSplat(RR);
4998	if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
4999	const APInt &CMax =
5000	APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
5001	const APInt &CMin =
5002	APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
5003	// The difference of the constants must be a single bit.
5004	if ((CMax - CMin).isPowerOf2()) {
5005	// and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
5006	// setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
5007	SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
5008	SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
5009	SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
5010	SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
5011	SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
5012	SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
5013	SDValue Zero = DAG.getConstant(0, DL, OpVT);
5014	return DAG.getSetCC(DL, VT, And, Zero, CC0);
5015	}
5016	}
5017	}
5018	}
5019
5020	// Canonicalize equivalent operands to LL == RL.
5021	if (LL == RR && LR == RL) {
5022	CC1 = ISD::getSetCCSwappedOperands(CC1);
5023	std::swap(RL, RR);
5024	}
5025
5026	// (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5027	// (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5028	if (LL == RL && LR == RR) {
5029	ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
5030	: ISD::getSetCCOrOperation(CC0, CC1, OpVT);
5031	if (NewCC != ISD::SETCC_INVALID &&
5032	(!LegalOperations \|\|
5033	(TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
5034	TLI.isOperationLegal(ISD::SETCC, OpVT))))
5035	return DAG.getSetCC(DL, VT, LL, LR, NewCC);
5036	}
5037
5038	return SDValue();
5039	}
5040
5041	/// This contains all DAGCombine rules which reduce two values combined by
5042	/// an And operation to a single value. This makes them reusable in the context
5043	/// of visitSELECT(). Rules involving constants are not included as
5044	/// visitSELECT() already handles those cases.
5045	SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
5046	EVT VT = N1.getValueType();
5047	SDLoc DL(N);
5048
5049	// fold (and x, undef) -> 0
5050	if (N0.isUndef() \|\| N1.isUndef())
5051	return DAG.getConstant(0, DL, VT);
5052
5053	if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
5054	return V;
5055
5056	if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
5057	VT.getSizeInBits() <= 64) {
5058	if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5059	if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
5060	// Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
5061	// immediate for an add, but it is legal if its top c2 bits are set,
5062	// transform the ADD so the immediate doesn't need to be materialized
5063	// in a register.
5064	APInt ADDC = ADDI->getAPIntValue();
5065	APInt SRLC = SRLI->getAPIntValue();
5066	if (ADDC.getMinSignedBits() <= 64 &&
5067	SRLC.ult(VT.getSizeInBits()) &&
5068	!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5069	APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
5070	SRLC.getZExtValue());
5071	if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
5072	ADDC \|= Mask;
5073	if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5074	SDLoc DL0(N0);
5075	SDValue NewAdd =
5076	DAG.getNode(ISD::ADD, DL0, VT,
5077	N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
5078	CombineTo(N0.getNode(), NewAdd);
5079	// Return N so it doesn't get rechecked!
5080	return SDValue(N, 0);
5081	}
5082	}
5083	}
5084	}
5085	}
5086	}
5087
5088	// Reduce bit extract of low half of an integer to the narrower type.
5089	// (and (srl i64:x, K), KMask) ->
5090	// (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
5091	if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5092	if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
5093	if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5094	unsigned Size = VT.getSizeInBits();
5095	const APInt &AndMask = CAnd->getAPIntValue();
5096	unsigned ShiftBits = CShift->getZExtValue();
5097
5098	// Bail out, this node will probably disappear anyway.
5099	if (ShiftBits == 0)
5100	return SDValue();
5101
5102	unsigned MaskBits = AndMask.countTrailingOnes();
5103	EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
5104
5105	if (AndMask.isMask() &&
5106	// Required bits must not span the two halves of the integer and
5107	// must fit in the half size type.
5108	(ShiftBits + MaskBits <= Size / 2) &&
5109	TLI.isNarrowingProfitable(VT, HalfVT) &&
5110	TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
5111	TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
5112	TLI.isTruncateFree(VT, HalfVT) &&
5113	TLI.isZExtFree(HalfVT, VT)) {
5114	// The isNarrowingProfitable is to avoid regressions on PPC and
5115	// AArch64 which match a few 64-bit bit insert / bit extract patterns
5116	// on downstream users of this. Those patterns could probably be
5117	// extended to handle extensions mixed in.
5118
5119	SDValue SL(N0);
5120	assert(MaskBits <= Size)((MaskBits <= Size) ? static_cast<void> (0) : __assert_fail ("MaskBits <= Size", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 5120, __PRETTY_FUNCTION__));
5121
5122	// Extracting the highest bit of the low half.
5123	EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
5124	SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
5125	N0.getOperand(0));
5126
5127	SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
5128	SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
5129	SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
5130	SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
5131	return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
5132	}
5133	}
5134	}
5135	}
5136
5137	return SDValue();
5138	}
5139
5140	bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode AndC, LoadSDNode LoadN,
5141	EVT LoadResultTy, EVT &ExtVT) {
5142	if (!AndC->getAPIntValue().isMask())
5143	return false;
5144
5145	unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
5146
5147	ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5148	EVT LoadedVT = LoadN->getMemoryVT();
5149
5150	if (ExtVT == LoadedVT &&
5151	(!LegalOperations \|\|
5152	TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
5153	// ZEXTLOAD will match without needing to change the size of the value being
5154	// loaded.
5155	return true;
5156	}
5157
5158	// Do not change the width of a volatile or atomic loads.
5159	if (!LoadN->isSimple())
5160	return false;
5161
5162	// Do not generate loads of non-round integer types since these can
5163	// be expensive (and would be wrong if the type is not byte sized).
5164	if (!LoadedVT.bitsGT(ExtVT) \|\| !ExtVT.isRound())
5165	return false;
5166
5167	if (LegalOperations &&
5168	!TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
5169	return false;
5170
5171	if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
5172	return false;
5173
5174	return true;
5175	}
5176
5177	bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
5178	ISD::LoadExtType ExtType, EVT &MemVT,
5179	unsigned ShAmt) {
5180	if (!LDST)
5181	return false;
5182	// Only allow byte offsets.
5183	if (ShAmt % 8)
5184	return false;
5185
5186	// Do not generate loads of non-round integer types since these can
5187	// be expensive (and would be wrong if the type is not byte sized).
5188	if (!MemVT.isRound())
5189	return false;
5190
5191	// Don't change the width of a volatile or atomic loads.
5192	if (!LDST->isSimple())
5193	return false;
5194
5195	EVT LdStMemVT = LDST->getMemoryVT();
5196
5197	// Bail out when changing the scalable property, since we can't be sure that
5198	// we're actually narrowing here.
5199	if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
5200	return false;
5201
5202	// Verify that we are actually reducing a load width here.
5203	if (LdStMemVT.bitsLT(MemVT))
5204	return false;
5205
5206	// Ensure that this isn't going to produce an unsupported memory access.
5207	if (ShAmt) {
5208	assert(ShAmt % 8 == 0 && "ShAmt is byte offset")((ShAmt % 8 == 0 && "ShAmt is byte offset") ? static_cast <void> (0) : __assert_fail ("ShAmt % 8 == 0 && \"ShAmt is byte offset\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 5208, __PRETTY_FUNCTION__));
5209	const unsigned ByteShAmt = ShAmt / 8;
5210	const Align LDSTAlign = LDST->getAlign();
5211	const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
5212	if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
5213	LDST->getAddressSpace(), NarrowAlign,
5214	LDST->getMemOperand()->getFlags()))
5215	return false;
5216	}
5217
5218	// It's not possible to generate a constant of extended or untyped type.
5219	EVT PtrType = LDST->getBasePtr().getValueType();
5220	if (PtrType == MVT::Untyped \|\| PtrType.isExtended())
5221	return false;
5222
5223	if (isa<LoadSDNode>(LDST)) {
5224	LoadSDNode *Load = cast<LoadSDNode>(LDST);
5225	// Don't transform one with multiple uses, this would require adding a new
5226	// load.
5227	if (!SDValue(Load, 0).hasOneUse())
5228	return false;
5229
5230	if (LegalOperations &&
5231	!TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
5232	return false;
5233
5234	// For the transform to be legal, the load must produce only two values
5235	// (the value loaded and the chain). Don't transform a pre-increment
5236	// load, for example, which produces an extra value. Otherwise the
5237	// transformation is not equivalent, and the downstream logic to replace
5238	// uses gets things wrong.
5239	if (Load->getNumValues() > 2)
5240	return false;
5241
5242	// If the load that we're shrinking is an extload and we're not just
5243	// discarding the extension we can't simply shrink the load. Bail.
5244	// TODO: It would be possible to merge the extensions in some cases.
5245	if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
5246	Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5247	return false;
5248
5249	if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
5250	return false;
5251	} else {
5252	assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode")((isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode" ) ? static_cast<void> (0) : __assert_fail ("isa<StoreSDNode>(LDST) && \"It is not a Load nor a Store SDNode\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 5252, __PRETTY_FUNCTION__));
5253	StoreSDNode *Store = cast<StoreSDNode>(LDST);
5254	// Can't write outside the original store
5255	if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5256	return false;
5257
5258	if (LegalOperations &&
5259	!TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
5260	return false;
5261	}
5262	return true;
5263	}
5264
5265	bool DAGCombiner::SearchForAndLoads(SDNode *N,
5266	SmallVectorImpl<LoadSDNode*> &Loads,
5267	SmallPtrSetImpl<SDNode*> &NodesWithConsts,
5268	ConstantSDNode *Mask,
5269	SDNode *&NodeToMask) {
5270	// Recursively search for the operands, looking for loads which can be
5271	// narrowed.
5272	for (SDValue Op : N->op_values()) {
5273	if (Op.getValueType().isVector())
5274	return false;
5275
5276	// Some constants may need fixing up later if they are too large.
5277	if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5278	if ((N->getOpcode() == ISD::OR \|\| N->getOpcode() == ISD::XOR) &&
5279	(Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
5280	NodesWithConsts.insert(N);
5281	continue;
5282	}
5283
5284	if (!Op.hasOneUse())
5285	return false;
5286
5287	switch(Op.getOpcode()) {
5288	case ISD::LOAD: {
5289	auto *Load = cast<LoadSDNode>(Op);
5290	EVT ExtVT;
5291	if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
5292	isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
5293
5294	// ZEXTLOAD is already small enough.
5295	if (Load->getExtensionType() == ISD::ZEXTLOAD &&
5296	ExtVT.bitsGE(Load->getMemoryVT()))
5297	continue;
5298
5299	// Use LE to convert equal sized loads to zext.
5300	if (ExtVT.bitsLE(Load->getMemoryVT()))
5301	Loads.push_back(Load);
5302
5303	continue;
5304	}
5305	return false;
5306	}
5307	case ISD::ZERO_EXTEND:
5308	case ISD::AssertZext: {
5309	unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
5310	EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5311	EVT VT = Op.getOpcode() == ISD::AssertZext ?
5312	cast<VTSDNode>(Op.getOperand(1))->getVT() :
5313	Op.getOperand(0).getValueType();
5314
5315	// We can accept extending nodes if the mask is wider or an equal
5316	// width to the original type.
5317	if (ExtVT.bitsGE(VT))
5318	continue;
5319	break;
5320	}
5321	case ISD::OR:
5322	case ISD::XOR:
5323	case ISD::AND:
5324	if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
5325	NodeToMask))
5326	return false;
5327	continue;
5328	}
5329
5330	// Allow one node which will masked along with any loads found.
5331	if (NodeToMask)
5332	return false;
5333
5334	// Also ensure that the node to be masked only produces one data result.
5335	NodeToMask = Op.getNode();
5336	if (NodeToMask->getNumValues() > 1) {
5337	bool HasValue = false;
5338	for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
5339	MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
5340	if (VT != MVT::Glue && VT != MVT::Other) {
5341	if (HasValue) {
5342	NodeToMask = nullptr;
5343	return false;
5344	}
5345	HasValue = true;
5346	}
5347	}
5348	assert(HasValue && "Node to be masked has no data result?")((HasValue && "Node to be masked has no data result?" ) ? static_cast<void> (0) : __assert_fail ("HasValue && \"Node to be masked has no data result?\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 5348, __PRETTY_FUNCTION__));
5349	}
5350	}
5351	return true;
5352	}
5353
5354	bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
5355	auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
5356	if (!Mask)
5357	return false;
5358
5359	if (!Mask->getAPIntValue().isMask())
5360	return false;
5361
5362	// No need to do anything if the and directly uses a load.
5363	if (isa<LoadSDNode>(N->getOperand(0)))
5364	return false;
5365
5366	SmallVector<LoadSDNode*, 8> Loads;
5367	SmallPtrSet<SDNode*, 2> NodesWithConsts;
5368	SDNode *FixupNode = nullptr;
5369	if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
5370	if (Loads.size() == 0)
5371	return false;
5372
5373	LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << "Backwards propagate AND: " ; N->dump(); } } while (false);
5374	SDValue MaskOp = N->getOperand(1);
5375
5376	// If it exists, fixup the single node we allow in the tree that needs
5377	// masking.
5378	if (FixupNode) {
5379	LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << "First, need to fix up: "; FixupNode ->dump(); } } while (false);
5380	SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
5381	FixupNode->getValueType(0),
5382	SDValue(FixupNode, 0), MaskOp);
5383	DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
5384	if (And.getOpcode() == ISD ::AND)
5385	DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
5386	}
5387
5388	// Narrow any constants that need it.
5389	for (auto *LogicN : NodesWithConsts) {
5390	SDValue Op0 = LogicN->getOperand(0);
5391	SDValue Op1 = LogicN->getOperand(1);
5392
5393	if (isa<ConstantSDNode>(Op0))
5394	std::swap(Op0, Op1);
5395
5396	SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5397	Op1, MaskOp);
5398
5399	DAG.UpdateNodeOperands(LogicN, Op0, And);
5400	}
5401
5402	// Create narrow loads.
5403	for (auto *Load : Loads) {
5404	LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << "Propagate AND back to: "; Load ->dump(); } } while (false);
5405	SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5406	SDValue(Load, 0), MaskOp);
5407	DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
5408	if (And.getOpcode() == ISD ::AND)
5409	And = SDValue(
5410	DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
5411	SDValue NewLoad = ReduceLoadWidth(And.getNode());
5412	assert(NewLoad &&((NewLoad && "Shouldn't be masking the load if it can't be narrowed" ) ? static_cast<void> (0) : __assert_fail ("NewLoad && \"Shouldn't be masking the load if it can't be narrowed\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 5413, __PRETTY_FUNCTION__))
5413	"Shouldn't be masking the load if it can't be narrowed")((NewLoad && "Shouldn't be masking the load if it can't be narrowed" ) ? static_cast<void> (0) : __assert_fail ("NewLoad && \"Shouldn't be masking the load if it can't be narrowed\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 5413, __PRETTY_FUNCTION__));
5414	CombineTo(Load, NewLoad, NewLoad.getValue(1));
5415	}
5416	DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
5417	return true;
5418	}
5419	return false;
5420	}
5421
5422	// Unfold
5423	// x & (-1 'logical shift' y)
5424	// To
5425	// (x 'opposite logical shift' y) 'logical shift' y
5426	// if it is better for performance.
5427	SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
5428	assert(N->getOpcode() == ISD::AND)((N->getOpcode() == ISD::AND) ? static_cast<void> (0 ) : __assert_fail ("N->getOpcode() == ISD::AND", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 5428, __PRETTY_FUNCTION__));
5429
5430	SDValue N0 = N->getOperand(0);
5431	SDValue N1 = N->getOperand(1);
5432
5433	// Do we actually prefer shifts over mask?
5434	if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
5435	return SDValue();
5436
5437	// Try to match (-1 '[outer] logical shift' y)
5438	unsigned OuterShift;
5439	unsigned InnerShift; // The opposite direction to the OuterShift.
5440	SDValue Y; // Shift amount.
5441	auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
5442	if (!M.hasOneUse())
5443	return false;
5444	OuterShift = M->getOpcode();
5445	if (OuterShift == ISD::SHL)
5446	InnerShift = ISD::SRL;
5447	else if (OuterShift == ISD::SRL)
5448	InnerShift = ISD::SHL;
5449	else
5450	return false;
5451	if (!isAllOnesConstant(M->getOperand(0)))
5452	return false;
5453	Y = M->getOperand(1);
5454	return true;
5455	};
5456
5457	SDValue X;
5458	if (matchMask(N1))
5459	X = N0;
5460	else if (matchMask(N0))
5461	X = N1;
5462	else
5463	return SDValue();
5464
5465	SDLoc DL(N);
5466	EVT VT = N->getValueType(0);
5467
5468	// tmp = x 'opposite logical shift' y
5469	SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
5470	// ret = tmp 'logical shift' y
5471	SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
5472
5473	return T1;
5474	}
5475
5476	/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
5477	/// For a target with a bit test, this is expected to become test + set and save
5478	/// at least 1 instruction.
5479	static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
5480	assert(And->getOpcode() == ISD::AND && "Expected an 'and' op")((And->getOpcode() == ISD::AND && "Expected an 'and' op" ) ? static_cast<void> (0) : __assert_fail ("And->getOpcode() == ISD::AND && \"Expected an 'and' op\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 5480, __PRETTY_FUNCTION__));
5481
5482	// This is probably not worthwhile without a supported type.
5483	EVT VT = And->getValueType(0);
5484	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5485	if (!TLI.isTypeLegal(VT))
5486	return SDValue();
5487
5488	// Look through an optional extension and find a 'not'.
5489	// TODO: Should we favor test+set even without the 'not' op?
5490	SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
5491	if (Not.getOpcode() == ISD::ANY_EXTEND)
5492	Not = Not.getOperand(0);
5493	if (!isBitwiseNot(Not) \|\| !Not.hasOneUse() \|\| !isOneConstant(And1))
5494	return SDValue();
5495
5496	// Look though an optional truncation. The source operand may not be the same
5497	// type as the original 'and', but that is ok because we are masking off
5498	// everything but the low bit.
5499	SDValue Srl = Not.getOperand(0);
5500	if (Srl.getOpcode() == ISD::TRUNCATE)
5501	Srl = Srl.getOperand(0);
5502
5503	// Match a shift-right by constant.
5504	if (Srl.getOpcode() != ISD::SRL \|\| !Srl.hasOneUse() \|\|
5505	!isa<ConstantSDNode>(Srl.getOperand(1)))
5506	return SDValue();
5507
5508	// We might have looked through casts that make this transform invalid.
5509	// TODO: If the source type is wider than the result type, do the mask and
5510	// compare in the source type.
5511	const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
5512	unsigned VTBitWidth = VT.getSizeInBits();
5513	if (ShiftAmt.uge(VTBitWidth))
5514	return SDValue();
5515
5516	// Turn this into a bit-test pattern using mask op + setcc:
5517	// and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
5518	SDLoc DL(And);
5519	SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
5520	EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5521	SDValue Mask = DAG.getConstant(
5522	APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
5523	SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
5524	SDValue Zero = DAG.getConstant(0, DL, VT);
5525	SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
5526	return DAG.getZExtOrTrunc(Setcc, DL, VT);
5527	}
5528
5529	SDValue DAGCombiner::visitAND(SDNode *N) {
5530	SDValue N0 = N->getOperand(0);
5531	SDValue N1 = N->getOperand(1);
5532	EVT VT = N1.getValueType();
5533
5534	// x & x --> x
5535	if (N0 == N1)
5536	return N0;
5537
5538	// fold vector ops
5539	if (VT.isVector()) {
5540	if (SDValue FoldedVOp = SimplifyVBinOp(N))
5541	return FoldedVOp;
5542
5543	// fold (and x, 0) -> 0, vector edition
5544	if (ISD::isBuildVectorAllZeros(N0.getNode()))
5545	// do not return N0, because undef node may exist in N0
5546	return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
5547	SDLoc(N), N0.getValueType());
5548	if (ISD::isBuildVectorAllZeros(N1.getNode()))
5549	// do not return N1, because undef node may exist in N1
5550	return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
5551	SDLoc(N), N1.getValueType());
5552
5553	// fold (and x, -1) -> x, vector edition
5554	if (ISD::isBuildVectorAllOnes(N0.getNode()))
5555	return N1;
5556	if (ISD::isBuildVectorAllOnes(N1.getNode()))
5557	return N0;
5558
5559	// fold (and (masked_load) (build_vec (x, ...))) to zext_masked_load
5560	auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
5561	auto *BVec = dyn_cast<BuildVectorSDNode>(N1);
5562	if (MLoad && BVec && MLoad->getExtensionType() == ISD::EXTLOAD &&
5563	N0.hasOneUse() && N1.hasOneUse()) {
5564	EVT LoadVT = MLoad->getMemoryVT();
5565	EVT ExtVT = VT;
5566	if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
5567	// For this AND to be a zero extension of the masked load the elements
5568	// of the BuildVec must mask the bottom bits of the extended element
5569	// type
5570	if (ConstantSDNode *Splat = BVec->getConstantSplatNode()) {
5571	uint64_t ElementSize =
5572	LoadVT.getVectorElementType().getScalarSizeInBits();
5573	if (Splat->getAPIntValue().isMask(ElementSize)) {
5574	return DAG.getMaskedLoad(
5575	ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
5576	MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
5577	LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
5578	ISD::ZEXTLOAD, MLoad->isExpandingLoad());
5579	}
5580	}
5581	}
5582	}
5583	}
5584
5585	// fold (and c1, c2) -> c1&c2
5586	ConstantSDNode *N1C = isConstOrConstSplat(N1);
5587	if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
5588	return C;
5589
5590	// canonicalize constant to RHS
5591	if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5592	!DAG.isConstantIntBuildVectorOrConstantInt(N1))
5593	return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
5594
5595	// fold (and x, -1) -> x
5596	if (isAllOnesConstant(N1))
5597	return N0;
5598
5599	// if (and x, c) is known to be zero, return 0
5600	unsigned BitWidth = VT.getScalarSizeInBits();
5601	if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5602	APInt::getAllOnesValue(BitWidth)))
5603	return DAG.getConstant(0, SDLoc(N), VT);
5604
5605	if (SDValue NewSel = foldBinOpIntoSelect(N))
5606	return NewSel;
5607
5608	// reassociate and
5609	if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
5610	return RAND;
5611
5612	// Try to convert a constant mask AND into a shuffle clear mask.
5613	if (VT.isVector())
5614	if (SDValue Shuffle = XformToShuffleWithZero(N))
5615	return Shuffle;
5616
5617	if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
5618	return Combined;
5619
5620	// fold (and (or x, C), D) -> D if (C & D) == D
5621	auto MatchSubset = [](ConstantSDNode LHS, ConstantSDNode RHS) {
5622	return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
5623	};
5624	if (N0.getOpcode() == ISD::OR &&
5625	ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
5626	return N1;
5627	// fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
5628	if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5629	SDValue N0Op0 = N0.getOperand(0);
5630	APInt Mask = ~N1C->getAPIntValue();
5631	Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
5632	if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
5633	SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
5634	N0.getValueType(), N0Op0);
5635
5636	// Replace uses of the AND with uses of the Zero extend node.
5637	CombineTo(N, Zext);
5638
5639	// We actually want to replace all uses of the any_extend with the
5640	// zero_extend, to avoid duplicating things. This will later cause this
5641	// AND to be folded.
5642	CombineTo(N0.getNode(), Zext);
5643	return SDValue(N, 0); // Return N so it doesn't get rechecked!
5644	}
5645	}
5646
5647	// similarly fold (and (X (load ([non_ext\|any_ext\|zero_ext] V))), c) ->
5648	// (X (load ([non_ext\|zero_ext] V))) if 'and' only clears top bits which must
5649	// already be zero by virtue of the width of the base type of the load.
5650	//
5651	// the 'X' node here can either be nothing or an extract_vector_elt to catch
5652	// more cases.
5653	if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5654	N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
5655	N0.getOperand(0).getOpcode() == ISD::LOAD &&
5656	N0.getOperand(0).getResNo() == 0) \|\|
5657	(N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
5658	LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
5659	N0 : N0.getOperand(0) );
5660
5661	// Get the constant (if applicable) the zero'th operand is being ANDed with.
5662	// This can be a pure constant or a vector splat, in which case we treat the
5663	// vector as a scalar and use the splat value.
5664	APInt Constant = APInt::getNullValue(1);
5665	if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
5666	Constant = C->getAPIntValue();
5667	} else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
5668	APInt SplatValue, SplatUndef;
5669	unsigned SplatBitSize;
5670	bool HasAnyUndefs;
5671	bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
5672	SplatBitSize, HasAnyUndefs);
5673	if (IsSplat) {
5674	// Undef bits can contribute to a possible optimisation if set, so
5675	// set them.
5676	SplatValue \|= SplatUndef;
5677
5678	// The splat value may be something like "0x00FFFFFF", which means 0 for
5679	// the first vector value and FF for the rest, repeating. We need a mask
5680	// that will apply equally to all members of the vector, so AND all the
5681	// lanes of the constant together.
5682	unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
5683
5684	// If the splat value has been compressed to a bitlength lower
5685	// than the size of the vector lane, we need to re-expand it to
5686	// the lane size.
5687	if (EltBitWidth > SplatBitSize)
5688	for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
5689	SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
5690	SplatValue \|= SplatValue.shl(SplatBitSize);
5691
5692	// Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
5693	// multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
5694	if ((SplatBitSize % EltBitWidth) == 0) {
5695	Constant = APInt::getAllOnesValue(EltBitWidth);
5696	for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
5697	Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
5698	}
5699	}
5700	}
5701
5702	// If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
5703	// actually legal and isn't going to get expanded, else this is a false
5704	// optimisation.
5705	bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
5706	Load->getValueType(0),
5707	Load->getMemoryVT());
5708
5709	// Resize the constant to the same size as the original memory access before
5710	// extension. If it is still the AllOnesValue then this AND is completely
5711	// unneeded.
5712	Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
5713
5714	bool B;
5715	switch (Load->getExtensionType()) {
5716	default: B = false; break;
5717	case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
5718	case ISD::ZEXTLOAD:
5719	case ISD::NON_EXTLOAD: B = true; break;
5720	}
5721
5722	if (B && Constant.isAllOnesValue()) {
5723	// If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
5724	// preserve semantics once we get rid of the AND.
5725	SDValue NewLoad(Load, 0);
5726
5727	// Fold the AND away. NewLoad may get replaced immediately.
5728	CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
5729
5730	if (Load->getExtensionType() == ISD::EXTLOAD) {
5731	NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
5732	Load->getValueType(0), SDLoc(Load),
5733	Load->getChain(), Load->getBasePtr(),
5734	Load->getOffset(), Load->getMemoryVT(),
5735	Load->getMemOperand());
5736	// Replace uses of the EXTLOAD with the new ZEXTLOAD.
5737	if (Load->getNumValues() == 3) {
5738	// PRE/POST_INC loads have 3 values.
5739	SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
5740	NewLoad.getValue(2) };
5741	CombineTo(Load, To, 3, true);
5742	} else {
5743	CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
5744	}
5745	}
5746
5747	return SDValue(N, 0); // Return N so it doesn't get rechecked!
5748	}
5749	}
5750
5751	// fold (and (masked_gather x)) -> (zext_masked_gather x)
5752	if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
5753	EVT MemVT = GN0->getMemoryVT();
5754	EVT ScalarVT = MemVT.getScalarType();
5755
5756	if (SDValue(GN0, 0).hasOneUse() &&
5757	isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
5758	TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
5759	SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
5760	GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
5761
5762	SDValue ZExtLoad = DAG.getMaskedGather(
5763	DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
5764	GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);
5765
5766	CombineTo(N, ZExtLoad);
5767	AddToWorklist(ZExtLoad.getNode());
5768	// Avoid recheck of N.
5769	return SDValue(N, 0);
5770	}
5771	}
5772
5773	// fold (and (load x), 255) -> (zextload x, i8)
5774	// fold (and (extload x, i16), 255) -> (zextload x, i8)
5775	// fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
5776	if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD \|\|
5777	(N0.getOpcode() == ISD::ANY_EXTEND &&
5778	N0.getOperand(0).getOpcode() == ISD::LOAD))) {
5779	if (SDValue Res = ReduceLoadWidth(N)) {
5780	LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
5781	? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
5782	AddToWorklist(N);
5783	DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
5784	return SDValue(N, 0);
5785	}
5786	}
5787
5788	if (LegalTypes) {
5789	// Attempt to propagate the AND back up to the leaves which, if they're
5790	// loads, can be combined to narrow loads and the AND node can be removed.
5791	// Perform after legalization so that extend nodes will already be
5792	// combined into the loads.
5793	if (BackwardsPropagateMask(N))
5794	return SDValue(N, 0);
5795	}
5796
5797	if (SDValue Combined = visitANDLike(N0, N1, N))
5798	return Combined;
5799
5800	// Simplify: (and (op x...), (op y...)) -> (op (and x, y))
5801	if (N0.getOpcode() == N1.getOpcode())
5802	if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5803	return V;
5804
5805	// Masking the negated extension of a boolean is just the zero-extended
5806	// boolean:
5807	// and (sub 0, zext(bool X)), 1 --> zext(bool X)
5808	// and (sub 0, sext(bool X)), 1 --> zext(bool X)
5809	//
5810	// Note: the SimplifyDemandedBits fold below can make an information-losing
5811	// transform, and then we have no way to find this better fold.
5812	if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
5813	if (isNullOrNullSplat(N0.getOperand(0))) {
5814	SDValue SubRHS = N0.getOperand(1);
5815	if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
5816	SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5817	return SubRHS;
5818	if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
5819	SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5820	return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
5821	}
5822	}
5823
5824	// fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
5825	// fold (and (sra)) -> (and (srl)) when possible.
5826	if (SimplifyDemandedBits(SDValue(N, 0)))
5827	return SDValue(N, 0);
5828
5829	// fold (zext_inreg (extload x)) -> (zextload x)
5830	// fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
5831	if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
5832	(ISD::isEXTLoad(N0.getNode()) \|\|
5833	(ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
5834	LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5835	EVT MemVT = LN0->getMemoryVT();
5836	// If we zero all the possible extended bits, then we can turn this into
5837	// a zextload if we are running before legalize or the operation is legal.
5838	unsigned ExtBitSize = N1.getScalarValueSizeInBits();
5839	unsigned MemBitSize = MemVT.getScalarSizeInBits();
5840	APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
5841	if (DAG.MaskedValueIsZero(N1, ExtBits) &&
5842	((!LegalOperations && LN0->isSimple()) \|\|
5843	TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
5844	SDValue ExtLoad =
5845	DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
5846	LN0->getBasePtr(), MemVT, LN0->getMemOperand());
5847	AddToWorklist(N);
5848	CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5849	return SDValue(N, 0); // Return N so it doesn't get rechecked!
5850	}
5851	}
5852
5853	// fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
5854	if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
5855	if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
5856	N0.getOperand(1), false))
5857	return BSwap;
5858	}
5859
5860	if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
5861	return Shifts;
5862
5863	if (TLI.hasBitTest(N0, N1))
5864	if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
5865	return V;
5866
5867	// Recognize the following pattern:
5868	//
5869	// AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
5870	//
5871	// where bitmask is a mask that clears the upper bits of AndVT. The
5872	// number of bits in bitmask must be a power of two.
5873	auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
5874	if (LHS->getOpcode() != ISD::SIGN_EXTEND)
5875	return false;
5876
5877	auto *C = dyn_cast<ConstantSDNode>(RHS);
5878	if (!C)
5879	return false;
5880
5881	if (!C->getAPIntValue().isMask(
5882	LHS.getOperand(0).getValueType().getFixedSizeInBits()))
5883	return false;
5884
5885	return true;
5886	};
5887
5888	// Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
5889	if (IsAndZeroExtMask(N0, N1))
5890	return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
5891
5892	return SDValue();
5893	}
5894
5895	/// Match (a >> 8) \| (a << 8) as (bswap a) >> 16.
5896	SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
5897	bool DemandHighBits) {
5898	if (!LegalOperations)
5899	return SDValue();
5900
5901	EVT VT = N->getValueType(0);
5902	if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
5903	return SDValue();
5904	if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5905	return SDValue();
5906
5907	// Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
5908	bool LookPassAnd0 = false;
5909	bool LookPassAnd1 = false;
5910	if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
5911	std::swap(N0, N1);
5912	if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
5913	std::swap(N0, N1);
5914	if (N0.getOpcode() == ISD::AND) {
5915	if (!N0.getNode()->hasOneUse())
5916	return SDValue();
5917	ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5918	// Also handle 0xffff since the LHS is guaranteed to have zeros there.
5919	// This is needed for X86.
5920	if (!N01C \|\| (N01C->getZExtValue() != 0xFF00 &&
5921	N01C->getZExtValue() != 0xFFFF))
5922	return SDValue();
5923	N0 = N0.getOperand(0);
5924	LookPassAnd0 = true;
5925	}
5926
5927	if (N1.getOpcode() == ISD::AND) {
5928	if (!N1.getNode()->hasOneUse())
5929	return SDValue();
5930	ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5931	if (!N11C \|\| N11C->getZExtValue() != 0xFF)
5932	return SDValue();
5933	N1 = N1.getOperand(0);
5934	LookPassAnd1 = true;
5935	}
5936
5937	if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
5938	std::swap(N0, N1);
5939	if (N0.getOpcode() != ISD::SHL \|\| N1.getOpcode() != ISD::SRL)
5940	return SDValue();
5941	if (!N0.getNode()->hasOneUse() \|\| !N1.getNode()->hasOneUse())
5942	return SDValue();
5943
5944	ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5945	ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5946	if (!N01C \|\| !N11C)
5947	return SDValue();
5948	if (N01C->getZExtValue() != 8 \|\| N11C->getZExtValue() != 8)
5949	return SDValue();
5950
5951	// Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
5952	SDValue N00 = N0->getOperand(0);
5953	if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
5954	if (!N00.getNode()->hasOneUse())
5955	return SDValue();
5956	ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
5957	if (!N001C \|\| N001C->getZExtValue() != 0xFF)
5958	return SDValue();
5959	N00 = N00.getOperand(0);
5960	LookPassAnd0 = true;
5961	}
5962
5963	SDValue N10 = N1->getOperand(0);
5964	if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
5965	if (!N10.getNode()->hasOneUse())
5966	return SDValue();
5967	ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
5968	// Also allow 0xFFFF since the bits will be shifted out. This is needed
5969	// for X86.
5970	if (!N101C \|\| (N101C->getZExtValue() != 0xFF00 &&
5971	N101C->getZExtValue() != 0xFFFF))
5972	return SDValue();
5973	N10 = N10.getOperand(0);
5974	LookPassAnd1 = true;
5975	}
5976
5977	if (N00 != N10)
5978	return SDValue();
5979
5980	// Make sure everything beyond the low halfword gets set to zero since the SRL
5981	// 16 will clear the top bits.
5982	unsigned OpSizeInBits = VT.getSizeInBits();
5983	if (DemandHighBits && OpSizeInBits > 16) {
5984	// If the left-shift isn't masked out then the only way this is a bswap is
5985	// if all bits beyond the low 8 are 0. In that case the entire pattern
5986	// reduces to a left shift anyway: leave it for other parts of the combiner.
5987	if (!LookPassAnd0)
5988	return SDValue();
5989
5990	// However, if the right shift isn't masked out then it might be because
5991	// it's not needed. See if we can spot that too.
5992	if (!LookPassAnd1 &&
5993	!DAG.MaskedValueIsZero(
5994	N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
5995	return SDValue();
5996	}
5997
5998	SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
5999	if (OpSizeInBits > 16) {
6000	SDLoc DL(N);
6001	Res = DAG.getNode(ISD::SRL, DL, VT, Res,
6002	DAG.getConstant(OpSizeInBits - 16, DL,
6003	getShiftAmountTy(VT)));
6004	}
6005	return Res;
6006	}
6007
6008	/// Return true if the specified node is an element that makes up a 32-bit
6009	/// packed halfword byteswap.
6010	/// ((x & 0x000000ff) << 8) \|
6011	/// ((x & 0x0000ff00) >> 8) \|
6012	/// ((x & 0x00ff0000) << 8) \|
6013	/// ((x & 0xff000000) >> 8)
6014	static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
6015	if (!N.getNode()->hasOneUse())
6016	return false;
6017
6018	unsigned Opc = N.getOpcode();
6019	if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
6020	return false;
6021
6022	SDValue N0 = N.getOperand(0);
6023	unsigned Opc0 = N0.getOpcode();
6024	if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
6025	return false;
6026
6027	ConstantSDNode *N1C = nullptr;
6028	// SHL or SRL: look upstream for AND mask operand
6029	if (Opc == ISD::AND)
6030	N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6031	else if (Opc0 == ISD::AND)
6032	N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6033	if (!N1C)
6034	return false;
6035
6036	unsigned MaskByteOffset;
6037	switch (N1C->getZExtValue()) {
6038	default:
6039	return false;
6040	case 0xFF: MaskByteOffset = 0; break;
6041	case 0xFF00: MaskByteOffset = 1; break;
6042	case 0xFFFF:
6043	// In case demanded bits didn't clear the bits that will be shifted out.
6044	// This is needed for X86.
6045	if (Opc == ISD::SRL \|\| (Opc == ISD::AND && Opc0 == ISD::SHL)) {
6046	MaskByteOffset = 1;
6047	break;
6048	}
6049	return false;
6050	case 0xFF0000: MaskByteOffset = 2; break;
6051	case 0xFF000000: MaskByteOffset = 3; break;
6052	}
6053
6054	// Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
6055	if (Opc == ISD::AND) {
6056	if (MaskByteOffset == 0 \|\| MaskByteOffset == 2) {
6057	// (x >> 8) & 0xff
6058	// (x >> 8) & 0xff0000
6059	if (Opc0 != ISD::SRL)
6060	return false;
6061	ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6062	if (!C \|\| C->getZExtValue() != 8)
6063	return false;
6064	} else {
6065	// (x << 8) & 0xff00
6066	// (x << 8) & 0xff000000
6067	if (Opc0 != ISD::SHL)
6068	return false;
6069	ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6070	if (!C \|\| C->getZExtValue() != 8)
6071	return false;
6072	}
6073	} else if (Opc == ISD::SHL) {
6074	// (x & 0xff) << 8
6075	// (x & 0xff0000) << 8
6076	if (MaskByteOffset != 0 && MaskByteOffset != 2)
6077	return false;
6078	ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6079	if (!C \|\| C->getZExtValue() != 8)
6080	return false;
6081	} else { // Opc == ISD::SRL
6082	// (x & 0xff00) >> 8
6083	// (x & 0xff000000) >> 8
6084	if (MaskByteOffset != 1 && MaskByteOffset != 3)
6085	return false;
6086	ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6087	if (!C \|\| C->getZExtValue() != 8)
6088	return false;
6089	}
6090
6091	if (Parts[MaskByteOffset])
6092	return false;
6093
6094	Parts[MaskByteOffset] = N0.getOperand(0).getNode();
6095	return true;
6096	}
6097
6098	// Match 2 elements of a packed halfword bswap.
6099	static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
6100	if (N.getOpcode() == ISD::OR)
6101	return isBSwapHWordElement(N.getOperand(0), Parts) &&
6102	isBSwapHWordElement(N.getOperand(1), Parts);
6103
6104	if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
6105	ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
6106	if (!C \|\| C->getAPIntValue() != 16)
6107	return false;
6108	Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
6109	return true;
6110	}
6111
6112	return false;
6113	}
6114
6115	// Match this pattern:
6116	// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
6117	// And rewrite this to:
6118	// (rotr (bswap A), 16)
6119	static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
6120	SelectionDAG &DAG, SDNode *N, SDValue N0,
6121	SDValue N1, EVT VT, EVT ShiftAmountTy) {
6122	assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&((N->getOpcode() == ISD::OR && VT == MVT::i32 && "MatchBSwapHWordOrAndAnd: expecting i32") ? static_cast<void > (0) : __assert_fail ("N->getOpcode() == ISD::OR && VT == MVT::i32 && \"MatchBSwapHWordOrAndAnd: expecting i32\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 6123, __PRETTY_FUNCTION__))
6123	"MatchBSwapHWordOrAndAnd: expecting i32")((N->getOpcode() == ISD::OR && VT == MVT::i32 && "MatchBSwapHWordOrAndAnd: expecting i32") ? static_cast<void > (0) : __assert_fail ("N->getOpcode() == ISD::OR && VT == MVT::i32 && \"MatchBSwapHWordOrAndAnd: expecting i32\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 6123, __PRETTY_FUNCTION__));
6124	if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6125	return SDValue();
6126	if (N0.getOpcode() != ISD::AND \|\| N1.getOpcode() != ISD::AND)
6127	return SDValue();
6128	// TODO: this is too restrictive; lifting this restriction requires more tests
6129	if (!N0->hasOneUse() \|\| !N1->hasOneUse())
6130	return SDValue();
6131	ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
6132	ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
6133	if (!Mask0 \|\| !Mask1)
6134	return SDValue();
6135	if (Mask0->getAPIntValue() != 0xff00ff00 \|\|
6136	Mask1->getAPIntValue() != 0x00ff00ff)
6137	return SDValue();
6138	SDValue Shift0 = N0.getOperand(0);
6139	SDValue Shift1 = N1.getOperand(0);
6140	if (Shift0.getOpcode() != ISD::SHL \|\| Shift1.getOpcode() != ISD::SRL)
6141	return SDValue();
6142	ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
6143	ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
6144	if (!ShiftAmt0 \|\| !ShiftAmt1)
6145	return SDValue();
6146	if (ShiftAmt0->getAPIntValue() != 8 \|\| ShiftAmt1->getAPIntValue() != 8)
6147	return SDValue();
6148	if (Shift0.getOperand(0) != Shift1.getOperand(0))
6149	return SDValue();
6150
6151	SDLoc DL(N);
6152	SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
6153	SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
6154	return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6155	}
6156
6157	/// Match a 32-bit packed halfword bswap. That is
6158	/// ((x & 0x000000ff) << 8) \|
6159	/// ((x & 0x0000ff00) >> 8) \|
6160	/// ((x & 0x00ff0000) << 8) \|
6161	/// ((x & 0xff000000) >> 8)
6162	/// => (rotl (bswap x), 16)
6163	SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
6164	if (!LegalOperations)
6165	return SDValue();
6166
6167	EVT VT = N->getValueType(0);
6168	if (VT != MVT::i32)
6169	return SDValue();
6170	if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
6171	return SDValue();
6172
6173	if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
6174	getShiftAmountTy(VT)))
6175	return BSwap;
6176
6177	// Try again with commuted operands.
6178	if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
6179	getShiftAmountTy(VT)))
6180	return BSwap;
6181
6182
6183	// Look for either
6184	// (or (bswaphpair), (bswaphpair))
6185	// (or (or (bswaphpair), (and)), (and))
6186	// (or (or (and), (bswaphpair)), (and))
6187	SDNode *Parts[4] = {};
6188
6189	if (isBSwapHWordPair(N0, Parts)) {
6190	// (or (or (and), (and)), (or (and), (and)))
6191	if (!isBSwapHWordPair(N1, Parts))
6192	return SDValue();
6193	} else if (N0.getOpcode() == ISD::OR) {
6194	// (or (or (or (and), (and)), (and)), (and))
6195	if (!isBSwapHWordElement(N1, Parts))
6196	return SDValue();
6197	SDValue N00 = N0.getOperand(0);
6198	SDValue N01 = N0.getOperand(1);
6199	if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
6200	!(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
6201	return SDValue();
6202	} else
6203	return SDValue();
6204
6205	// Make sure the parts are all coming from the same node.
6206	if (Parts[0] != Parts[1] \|\| Parts[0] != Parts[2] \|\| Parts[0] != Parts[3])
6207	return SDValue();
6208
6209	SDLoc DL(N);
6210	SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
6211	SDValue(Parts[0], 0));
6212
6213	// Result of the bswap should be rotated by 16. If it's not legal, then
6214	// do (x << 16) \| (x >> 16).
6215	SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
6216	if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
6217	return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
6218	if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6219	return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6220	return DAG.getNode(ISD::OR, DL, VT,
6221	DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
6222	DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
6223	}
6224
6225	/// This contains all DAGCombine rules which reduce two values combined by
6226	/// an Or operation to a single value \see visitANDLike().
6227	SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
6228	EVT VT = N1.getValueType();
6229	SDLoc DL(N);
6230
6231	// fold (or x, undef) -> -1
6232	if (!LegalOperations && (N0.isUndef() \|\| N1.isUndef()))
6233	return DAG.getAllOnesConstant(DL, VT);
6234
6235	if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
6236	return V;
6237
6238	// (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
6239	if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
6240	// Don't increase # computations.
6241	(N0.getNode()->hasOneUse() \|\| N1.getNode()->hasOneUse())) {
6242	// We can only do this xform if we know that bits from X that are set in C2
6243	// but not in C1 are already zero. Likewise for Y.
6244	if (const ConstantSDNode *N0O1C =
6245	getAsNonOpaqueConstant(N0.getOperand(1))) {
6246	if (const ConstantSDNode *N1O1C =
6247	getAsNonOpaqueConstant(N1.getOperand(1))) {
6248	// We can only do this xform if we know that bits from X that are set in
6249	// C2 but not in C1 are already zero. Likewise for Y.
6250	const APInt &LHSMask = N0O1C->getAPIntValue();
6251	const APInt &RHSMask = N1O1C->getAPIntValue();
6252
6253	if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
6254	DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
6255	SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6256	N0.getOperand(0), N1.getOperand(0));
6257	return DAG.getNode(ISD::AND, DL, VT, X,
6258	DAG.getConstant(LHSMask \| RHSMask, DL, VT));
6259	}
6260	}
6261	}
6262	}
6263
6264	// (or (and X, M), (and X, N)) -> (and X, (or M, N))
6265	if (N0.getOpcode() == ISD::AND &&
6266	N1.getOpcode() == ISD::AND &&
6267	N0.getOperand(0) == N1.getOperand(0) &&
6268	// Don't increase # computations.
6269	(N0.getNode()->hasOneUse() \|\| N1.getNode()->hasOneUse())) {
6270	SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6271	N0.getOperand(1), N1.getOperand(1));
6272	return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
6273	}
6274
6275	return SDValue();
6276	}
6277
6278	/// OR combines for which the commuted variant will be tried as well.
6279	static SDValue visitORCommutative(
6280	SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
6281	EVT VT = N0.getValueType();
6282	if (N0.getOpcode() == ISD::AND) {
6283	// fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
6284	if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
6285	return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
6286
6287	// fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
6288	if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
6289	return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
6290	}
6291
6292	return SDValue();
6293	}
6294
6295	SDValue DAGCombiner::visitOR(SDNode *N) {
6296	SDValue N0 = N->getOperand(0);
6297	SDValue N1 = N->getOperand(1);
6298	EVT VT = N1.getValueType();
6299
6300	// x \| x --> x
6301	if (N0 == N1)
6302	return N0;
6303
6304	// fold vector ops
6305	if (VT.isVector()) {
6306	if (SDValue FoldedVOp = SimplifyVBinOp(N))
6307	return FoldedVOp;
6308
6309	// fold (or x, 0) -> x, vector edition
6310	if (ISD::isBuildVectorAllZeros(N0.getNode()))
6311	return N1;
6312	if (ISD::isBuildVectorAllZeros(N1.getNode()))
6313	return N0;
6314
6315	// fold (or x, -1) -> -1, vector edition
6316	if (ISD::isBuildVectorAllOnes(N0.getNode()))
6317	// do not return N0, because undef node may exist in N0
6318	return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
6319	if (ISD::isBuildVectorAllOnes(N1.getNode()))
6320	// do not return N1, because undef node may exist in N1
6321	return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
6322
6323	// fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
6324	// Do this only if the resulting shuffle is legal.
6325	if (isa<ShuffleVectorSDNode>(N0) &&
6326	isa<ShuffleVectorSDNode>(N1) &&
6327	// Avoid folding a node with illegal type.
6328	TLI.isTypeLegal(VT)) {
6329	bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
6330	bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
6331	bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
6332	bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
6333	// Ensure both shuffles have a zero input.
6334	if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
6335	assert((!ZeroN00 \|\| !ZeroN01) && "Both inputs zero!")(((!ZeroN00 \|\| !ZeroN01) && "Both inputs zero!") ? static_cast <void> (0) : __assert_fail ("(!ZeroN00 \|\| !ZeroN01) && \"Both inputs zero!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 6335, __PRETTY_FUNCTION__));
6336	assert((!ZeroN10 \|\| !ZeroN11) && "Both inputs zero!")(((!ZeroN10 \|\| !ZeroN11) && "Both inputs zero!") ? static_cast <void> (0) : __assert_fail ("(!ZeroN10 \|\| !ZeroN11) && \"Both inputs zero!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 6336, __PRETTY_FUNCTION__));
6337	const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
6338	const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
6339	bool CanFold = true;
6340	int NumElts = VT.getVectorNumElements();
6341	SmallVector<int, 4> Mask(NumElts);
6342
6343	for (int i = 0; i != NumElts; ++i) {
6344	int M0 = SV0->getMaskElt(i);
6345	int M1 = SV1->getMaskElt(i);
6346
6347	// Determine if either index is pointing to a zero vector.
6348	bool M0Zero = M0 < 0 \|\| (ZeroN00 == (M0 < NumElts));
6349	bool M1Zero = M1 < 0 \|\| (ZeroN10 == (M1 < NumElts));
6350
6351	// If one element is zero and the otherside is undef, keep undef.
6352	// This also handles the case that both are undef.
6353	if ((M0Zero && M1 < 0) \|\| (M1Zero && M0 < 0)) {
6354	Mask[i] = -1;
6355	continue;
6356	}
6357
6358	// Make sure only one of the elements is zero.
6359	if (M0Zero == M1Zero) {
6360	CanFold = false;
6361	break;
6362	}
6363
6364	assert((M0 >= 0 \|\| M1 >= 0) && "Undef index!")(((M0 >= 0 \|\| M1 >= 0) && "Undef index!") ? static_cast <void> (0) : __assert_fail ("(M0 >= 0 \|\| M1 >= 0) && \"Undef index!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 6364, __PRETTY_FUNCTION__));
6365
6366	// We have a zero and non-zero element. If the non-zero came from
6367	// SV0 make the index a LHS index. If it came from SV1, make it
6368	// a RHS index. We need to mod by NumElts because we don't care
6369	// which operand it came from in the original shuffles.
6370	Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
6371	}
6372
6373	if (CanFold) {
6374	SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
6375	SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
6376
6377	SDValue LegalShuffle =
6378	TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
6379	Mask, DAG);
6380	if (LegalShuffle)
6381	return LegalShuffle;
6382	}
6383	}
6384	}
6385	}
6386
6387	// fold (or c1, c2) -> c1\|c2
6388	ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
6389	if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
6390	return C;
6391
6392	// canonicalize constant to RHS
6393	if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6394	!DAG.isConstantIntBuildVectorOrConstantInt(N1))
6395	return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
6396
6397	// fold (or x, 0) -> x
6398	if (isNullConstant(N1))
6399	return N0;
6400
6401	// fold (or x, -1) -> -1
6402	if (isAllOnesConstant(N1))
6403	return N1;
6404
6405	if (SDValue NewSel = foldBinOpIntoSelect(N))
6406	return NewSel;
6407
6408	// fold (or x, c) -> c iff (x & ~c) == 0
6409	if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
6410	return N1;
6411
6412	if (SDValue Combined = visitORLike(N0, N1, N))
6413	return Combined;
6414
6415	if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
6416	return Combined;
6417
6418	// Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
6419	if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
6420	return BSwap;
6421	if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
6422	return BSwap;
6423
6424	// reassociate or
6425	if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
6426	return ROR;
6427
6428	// Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1\|c2)
6429	// iff (c1 & c2) != 0 or c1/c2 are undef.
6430	auto MatchIntersect = [](ConstantSDNode C1, ConstantSDNode C2) {
6431	return !C1 \|\| !C2 \|\| C1->getAPIntValue().intersects(C2->getAPIntValue());
6432	};
6433	if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
6434	ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
6435	if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
6436	{N1, N0.getOperand(1)})) {
6437	SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
6438	AddToWorklist(IOR.getNode());
6439	return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
6440	}
6441	}
6442
6443	if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
6444	return Combined;
6445	if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
6446	return Combined;
6447
6448	// Simplify: (or (op x...), (op y...)) -> (op (or x, y))
6449	if (N0.getOpcode() == N1.getOpcode())
6450	if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6451	return V;
6452
6453	// See if this is some rotate idiom.
6454	if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
6455	return Rot;
6456
6457	if (SDValue Load = MatchLoadCombine(N))
6458	return Load;
6459
6460	// Simplify the operands using demanded-bits information.
6461	if (SimplifyDemandedBits(SDValue(N, 0)))
6462	return SDValue(N, 0);
6463
6464	// If OR can be rewritten into ADD, try combines based on ADD.
6465	if ((!LegalOperations \|\| TLI.isOperationLegal(ISD::ADD, VT)) &&
6466	DAG.haveNoCommonBitsSet(N0, N1))
6467	if (SDValue Combined = visitADDLike(N))
6468	return Combined;
6469
6470	return SDValue();
6471	}
6472
6473	static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
6474	if (Op.getOpcode() == ISD::AND &&
6475	DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
6476	Mask = Op.getOperand(1);
6477	return Op.getOperand(0);
6478	}
6479	return Op;
6480	}
6481
6482	/// Match "(X shl/srl V1) & V2" where V2 may not be present.
6483	static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
6484	SDValue &Mask) {
6485	Op = stripConstantMask(DAG, Op, Mask);
6486	if (Op.getOpcode() == ISD::SRL \|\| Op.getOpcode() == ISD::SHL) {
6487	Shift = Op;
6488	return true;
6489	}
6490	return false;
6491	}
6492
6493	/// Helper function for visitOR to extract the needed side of a rotate idiom
6494	/// from a shl/srl/mul/udiv. This is meant to handle cases where
6495	/// InstCombine merged some outside op with one of the shifts from
6496	/// the rotate pattern.
6497	/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
6498	/// Otherwise, returns an expansion of \p ExtractFrom based on the following
6499	/// patterns:
6500	///
6501	/// (or (add v v) (shrl v bitwidth-1)):
6502	/// expands (add v v) -> (shl v 1)
6503	///
6504	/// (or (mul v c0) (shrl (mul v c1) c2)):
6505	/// expands (mul v c0) -> (shl (mul v c1) c3)
6506	///
6507	/// (or (udiv v c0) (shl (udiv v c1) c2)):
6508	/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
6509	///
6510	/// (or (shl v c0) (shrl (shl v c1) c2)):
6511	/// expands (shl v c0) -> (shl (shl v c1) c3)
6512	///
6513	/// (or (shrl v c0) (shl (shrl v c1) c2)):
6514	/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
6515	///
6516	/// Such that in all cases, c3+c2==bitwidth(op v c1).
6517	static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
6518	SDValue ExtractFrom, SDValue &Mask,
6519	const SDLoc &DL) {
6520	assert(OppShift && ExtractFrom && "Empty SDValue")((OppShift && ExtractFrom && "Empty SDValue") ? static_cast<void> (0) : __assert_fail ("OppShift && ExtractFrom && \"Empty SDValue\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 6520, __PRETTY_FUNCTION__));
6521	assert((((OppShift.getOpcode() == ISD::SHL \|\| OppShift.getOpcode() == ISD::SRL) && "Existing shift must be valid as a rotate half" ) ? static_cast<void> (0) : __assert_fail ("(OppShift.getOpcode() == ISD::SHL \|\| OppShift.getOpcode() == ISD::SRL) && \"Existing shift must be valid as a rotate half\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 6523, __PRETTY_FUNCTION__))
6522	(OppShift.getOpcode() == ISD::SHL \|\| OppShift.getOpcode() == ISD::SRL) &&(((OppShift.getOpcode() == ISD::SHL \|\| OppShift.getOpcode() == ISD::SRL) && "Existing shift must be valid as a rotate half" ) ? static_cast<void> (0) : __assert_fail ("(OppShift.getOpcode() == ISD::SHL \|\| OppShift.getOpcode() == ISD::SRL) && \"Existing shift must be valid as a rotate half\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 6523, __PRETTY_FUNCTION__))
6523	"Existing shift must be valid as a rotate half")(((OppShift.getOpcode() == ISD::SHL \|\| OppShift.getOpcode() == ISD::SRL) && "Existing shift must be valid as a rotate half" ) ? static_cast<void> (0) : __assert_fail ("(OppShift.getOpcode() == ISD::SHL \|\| OppShift.getOpcode() == ISD::SRL) && \"Existing shift must be valid as a rotate half\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 6523, __PRETTY_FUNCTION__));
6524
6525	ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
6526
6527	// Value and Type of the shift.
6528	SDValue OppShiftLHS = OppShift.getOperand(0);
6529	EVT ShiftedVT = OppShiftLHS.getValueType();
6530
6531	// Amount of the existing shift.
6532	ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
6533
6534	// (add v v) -> (shl v 1)
6535	// TODO: Should this be a general DAG canonicalization?
6536	if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
6537	ExtractFrom.getOpcode() == ISD::ADD &&
6538	ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
6539	ExtractFrom.getOperand(0) == OppShiftLHS &&
6540	OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
6541	return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
6542	DAG.getShiftAmountConstant(1, ShiftedVT, DL));
6543
6544	// Preconditions:
6545	// (or (op0 v c0) (shiftl/r (op0 v c1) c2))
6546	//
6547	// Find opcode of the needed shift to be extracted from (op0 v c0).
6548	unsigned Opcode = ISD::DELETED_NODE;
6549	bool IsMulOrDiv = false;
6550	// Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
6551	// opcode or its arithmetic (mul or udiv) variant.
6552	auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
6553	IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
6554	if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
6555	return false;
6556	Opcode = NeededShift;
6557	return true;
6558	};
6559	// op0 must be either the needed shift opcode or the mul/udiv equivalent
6560	// that the needed shift can be extracted from.
6561	if ((OppShift.getOpcode() != ISD::SRL \|\| !SelectOpcode(ISD::SHL, ISD::MUL)) &&
6562	(OppShift.getOpcode() != ISD::SHL \|\| !SelectOpcode(ISD::SRL, ISD::UDIV)))
6563	return SDValue();
6564
6565	// op0 must be the same opcode on both sides, have the same LHS argument,
6566	// and produce the same value type.
6567	if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() \|\|
6568	OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) \|\|
6569	ShiftedVT != ExtractFrom.getValueType())
6570	return SDValue();
6571
6572	// Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
6573	ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
6574	// Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
6575	ConstantSDNode *ExtractFromCst =
6576	isConstOrConstSplat(ExtractFrom.getOperand(1));
6577	// TODO: We should be able to handle non-uniform constant vectors for these values
6578	// Check that we have constant values.
6579	if (!OppShiftCst \|\| !OppShiftCst->getAPIntValue() \|\|
6580	!OppLHSCst \|\| !OppLHSCst->getAPIntValue() \|\|
6581	!ExtractFromCst \|\| !ExtractFromCst->getAPIntValue())
6582	return SDValue();
6583
6584	// Compute the shift amount we need to extract to complete the rotate.
6585	const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
6586	if (OppShiftCst->getAPIntValue().ugt(VTWidth))
6587	return SDValue();
6588	APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
6589	// Normalize the bitwidth of the two mul/udiv/shift constant operands.
6590	APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
6591	APInt OppLHSAmt = OppLHSCst->getAPIntValue();
6592	zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
6593
6594	// Now try extract the needed shift from the ExtractFrom op and see if the
6595	// result matches up with the existing shift's LHS op.
6596	if (IsMulOrDiv) {
6597	// Op to extract from is a mul or udiv by a constant.
6598	// Check:
6599	// c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
6600	// c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
6601	const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
6602	NeededShiftAmt.getZExtValue());
6603	APInt ResultAmt;
6604	APInt Rem;
6605	APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
6606	if (Rem != 0 \|\| ResultAmt != OppLHSAmt)
6607	return SDValue();
6608	} else {
6609	// Op to extract from is a shift by a constant.
6610	// Check:
6611	// c2 - (bitwidth(op0 v c0) - c1) == c0
6612	if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
6613	ExtractFromAmt.getBitWidth()))
6614	return SDValue();
6615	}
6616
6617	// Return the expanded shift op that should allow a rotate to be formed.
6618	EVT ShiftVT = OppShift.getOperand(1).getValueType();
6619	EVT ResVT = ExtractFrom.getValueType();
6620	SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
6621	return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
6622	}
6623
6624	// Return true if we can prove that, whenever Neg and Pos are both in the
6625	// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
6626	// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
6627	//
6628	// (or (shift1 X, Neg), (shift2 X, Pos))
6629	//
6630	// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
6631	// in direction shift1 by Neg. The range [0, EltSize) means that we only need
6632	// to consider shift amounts with defined behavior.
6633	//
6634	// The IsRotate flag should be set when the LHS of both shifts is the same.
6635	// Otherwise if matching a general funnel shift, it should be clear.
6636	static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
6637	SelectionDAG &DAG, bool IsRotate) {
6638	// If EltSize is a power of 2 then:
6639	//
6640	// (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
6641	// (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
6642	//
6643	// So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
6644	// for the stronger condition:
6645	//
6646	// Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
6647	//
6648	// for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
6649	// we can just replace Neg with Neg' for the rest of the function.
6650	//
6651	// In other cases we check for the even stronger condition:
6652	//
6653	// Neg == EltSize - Pos [B]
6654	//
6655	// for all Neg and Pos. Note that the (or ...) then invokes undefined
6656	// behavior if Pos == 0 (and consequently Neg == EltSize).
6657	//
6658	// We could actually use [A] whenever EltSize is a power of 2, but the
6659	// only extra cases that it would match are those uninteresting ones
6660	// where Neg and Pos are never in range at the same time. E.g. for
6661	// EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
6662	// as well as (sub 32, Pos), but:
6663	//
6664	// (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
6665	//
6666	// always invokes undefined behavior for 32-bit X.
6667	//
6668	// Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
6669	//
6670	// NOTE: We can only do this when matching an AND and not a general
6671	// funnel shift.
6672	unsigned MaskLoBits = 0;
6673	if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
6674	if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
6675	KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
6676	unsigned Bits = Log2_64(EltSize);
6677	if (NegC->getAPIntValue().getActiveBits() <= Bits &&
6678	((NegC->getAPIntValue() \| Known.Zero).countTrailingOnes() >= Bits)) {
6679	Neg = Neg.getOperand(0);
6680	MaskLoBits = Bits;
6681	}
6682	}
6683	}
6684
6685	// Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
6686	if (Neg.getOpcode() != ISD::SUB)
6687	return false;
6688	ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
6689	if (!NegC)
6690	return false;
6691	SDValue NegOp1 = Neg.getOperand(1);
6692
6693	// On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
6694	// Pos'. The truncation is redundant for the purpose of the equality.
6695	if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
6696	if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
6697	KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
6698	if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
6699	((PosC->getAPIntValue() \| Known.Zero).countTrailingOnes() >=
6700	MaskLoBits))
6701	Pos = Pos.getOperand(0);
6702	}
6703	}
6704
6705	// The condition we need is now:
6706	//
6707	// (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
6708	//
6709	// If NegOp1 == Pos then we need:
6710	//
6711	// EltSize & Mask == NegC & Mask
6712	//
6713	// (because "x & Mask" is a truncation and distributes through subtraction).
6714	//
6715	// We also need to account for a potential truncation of NegOp1 if the amount
6716	// has already been legalized to a shift amount type.
6717	APInt Width;
6718	if ((Pos == NegOp1) \|\|
6719	(NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
6720	Width = NegC->getAPIntValue();
6721
6722	// Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
6723	// Then the condition we want to prove becomes:
6724	//
6725	// (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
6726	//
6727	// which, again because "x & Mask" is a truncation, becomes:
6728	//
6729	// NegC & Mask == (EltSize - PosC) & Mask
6730	// EltSize & Mask == (NegC + PosC) & Mask
6731	else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
6732	if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
6733	Width = PosC->getAPIntValue() + NegC->getAPIntValue();
6734	else
6735	return false;
6736	} else
6737	return false;
6738
6739	// Now we just need to check that EltSize & Mask == Width & Mask.
6740	if (MaskLoBits)
6741	// EltSize & Mask is 0 since Mask is EltSize - 1.
6742	return Width.getLoBits(MaskLoBits) == 0;
6743	return Width == EltSize;
6744	}
6745
6746	// A subroutine of MatchRotate used once we have found an OR of two opposite
6747	// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
6748	// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
6749	// former being preferred if supported. InnerPos and InnerNeg are Pos and
6750	// Neg with outer conversions stripped away.
6751	SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
6752	SDValue Neg, SDValue InnerPos,
6753	SDValue InnerNeg, unsigned PosOpcode,
6754	unsigned NegOpcode, const SDLoc &DL) {
6755	// fold (or (shl x, (*ext y)),
6756	// (srl x, (*ext (sub 32, y)))) ->
6757	// (rotl x, y) or (rotr x, (sub 32, y))
6758	//
6759	// fold (or (shl x, (*ext (sub 32, y))),
6760	// (srl x, (*ext y))) ->
6761	// (rotr x, y) or (rotl x, (sub 32, y))
6762	EVT VT = Shifted.getValueType();
6763	if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
6764	/IsRotate/ true)) {
6765	bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6766	return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
6767	HasPos ? Pos : Neg);
6768	}
6769
6770	return SDValue();
6771	}
6772
6773	// A subroutine of MatchRotate used once we have found an OR of two opposite
6774	// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
6775	// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
6776	// former being preferred if supported. InnerPos and InnerNeg are Pos and
6777	// Neg with outer conversions stripped away.
6778	// TODO: Merge with MatchRotatePosNeg.
6779	SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
6780	SDValue Neg, SDValue InnerPos,
6781	SDValue InnerNeg, unsigned PosOpcode,
6782	unsigned NegOpcode, const SDLoc &DL) {
6783	EVT VT = N0.getValueType();
6784	unsigned EltBits = VT.getScalarSizeInBits();
6785
6786	// fold (or (shl x0, (*ext y)),
6787	// (srl x1, (*ext (sub 32, y)))) ->
6788	// (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
6789	//
6790	// fold (or (shl x0, (*ext (sub 32, y))),
6791	// (srl x1, (*ext y))) ->
6792	// (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
6793	if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /IsRotate/ N0 == N1)) {
6794	bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6795	return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
6796	HasPos ? Pos : Neg);
6797	}
6798
6799	// Matching the shift+xor cases, we can't easily use the xor'd shift amount
6800	// so for now just use the PosOpcode case if its legal.
6801	// TODO: When can we use the NegOpcode case?
6802	if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
6803	auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
6804	if (Op.getOpcode() != BinOpc)
6805	return false;
6806	ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
6807	return Cst && (Cst->getAPIntValue() == Imm);
6808	};
6809
6810	// fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
6811	// -> (fshl x0, x1, y)
6812	if (IsBinOpImm(N1, ISD::SRL, 1) &&
6813	IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
6814	InnerPos == InnerNeg.getOperand(0) &&
6815	TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) {
6816	return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
6817	}
6818
6819	// fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
6820	// -> (fshr x0, x1, y)
6821	if (IsBinOpImm(N0, ISD::SHL, 1) &&
6822	IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
6823	InnerNeg == InnerPos.getOperand(0) &&
6824	TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
6825	return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
6826	}
6827
6828	// fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
6829	// -> (fshr x0, x1, y)
6830	// TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
6831	if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
6832	IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
6833	InnerNeg == InnerPos.getOperand(0) &&
6834	TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
6835	return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
6836	}
6837	}
6838
6839	return SDValue();
6840	}
6841
6842	// MatchRotate - Handle an 'or' of two operands. If this is one of the many
6843	// idioms for rotate, and if the target supports rotation instructions, generate
6844	// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
6845	// with different shifted sources.
6846	SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
6847	// Must be a legal type. Expanded 'n promoted things won't work with rotates.
6848	EVT VT = LHS.getValueType();
6849	if (!TLI.isTypeLegal(VT))
6850	return SDValue();
6851
6852	// The target must have at least one rotate/funnel flavor.
6853	bool HasROTL = hasOperation(ISD::ROTL, VT);
6854	bool HasROTR = hasOperation(ISD::ROTR, VT);
6855	bool HasFSHL = hasOperation(ISD::FSHL, VT);
6856	bool HasFSHR = hasOperation(ISD::FSHR, VT);
6857	if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
6858	return SDValue();
6859
6860	// Check for truncated rotate.
6861	if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
6862	LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
6863	assert(LHS.getValueType() == RHS.getValueType())((LHS.getValueType() == RHS.getValueType()) ? static_cast< void> (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType()" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 6863, __PRETTY_FUNCTION__));
6864	if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
6865	return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
6866	}
6867	}
6868
6869	// Match "(X shl/srl V1) & V2" where V2 may not be present.
6870	SDValue LHSShift; // The shift.
6871	SDValue LHSMask; // AND value if any.
6872	matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
6873
6874	SDValue RHSShift; // The shift.
6875	SDValue RHSMask; // AND value if any.
6876	matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
6877
6878	// If neither side matched a rotate half, bail
6879	if (!LHSShift && !RHSShift)
6880	return SDValue();
6881
6882	// InstCombine may have combined a constant shl, srl, mul, or udiv with one
6883	// side of the rotate, so try to handle that here. In all cases we need to
6884	// pass the matched shift from the opposite side to compute the opcode and
6885	// needed shift amount to extract. We still want to do this if both sides
6886	// matched a rotate half because one half may be a potential overshift that
6887	// can be broken down (ie if InstCombine merged two shl or srl ops into a
6888	// single one).
6889
6890	// Have LHS side of the rotate, try to extract the needed shift from the RHS.
6891	if (LHSShift)
6892	if (SDValue NewRHSShift =
6893	extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
6894	RHSShift = NewRHSShift;
6895	// Have RHS side of the rotate, try to extract the needed shift from the LHS.
6896	if (RHSShift)
6897	if (SDValue NewLHSShift =
6898	extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
6899	LHSShift = NewLHSShift;
6900
6901	// If a side is still missing, nothing else we can do.
6902	if (!RHSShift \|\| !LHSShift)
6903	return SDValue();
6904
6905	// At this point we've matched or extracted a shift op on each side.
6906
6907	if (LHSShift.getOpcode() == RHSShift.getOpcode())
6908	return SDValue(); // Shifts must disagree.
6909
6910	bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
6911	if (!IsRotate && !(HasFSHL \|\| HasFSHR))
6912	return SDValue(); // Requires funnel shift support.
6913
6914	// Canonicalize shl to left side in a shl/srl pair.
6915	if (RHSShift.getOpcode() == ISD::SHL) {
6916	std::swap(LHS, RHS);
6917	std::swap(LHSShift, RHSShift);
6918	std::swap(LHSMask, RHSMask);
6919	}
6920
6921	unsigned EltSizeInBits = VT.getScalarSizeInBits();
6922	SDValue LHSShiftArg = LHSShift.getOperand(0);
6923	SDValue LHSShiftAmt = LHSShift.getOperand(1);
6924	SDValue RHSShiftArg = RHSShift.getOperand(0);
6925	SDValue RHSShiftAmt = RHSShift.getOperand(1);
6926
6927	// fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
6928	// fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
6929	// fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
6930	// fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
6931	// iff C1+C2 == EltSizeInBits
6932	auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
6933	ConstantSDNode *RHS) {
6934	return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
6935	};
6936	if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
6937	SDValue Res;
6938	if (IsRotate && (HasROTL \|\| HasROTR))
6939	Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
6940	HasROTL ? LHSShiftAmt : RHSShiftAmt);
6941	else
6942	Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
6943	RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt);
6944
6945	// If there is an AND of either shifted operand, apply it to the result.
6946	if (LHSMask.getNode() \|\| RHSMask.getNode()) {
6947	SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
6948	SDValue Mask = AllOnes;
6949
6950	if (LHSMask.getNode()) {
6951	SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
6952	Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6953	DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
6954	}
6955	if (RHSMask.getNode()) {
6956	SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
6957	Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6958	DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
6959	}
6960
6961	Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
6962	}
6963
6964	return Res;
6965	}
6966
6967	// If there is a mask here, and we have a variable shift, we can't be sure
6968	// that we're masking out the right stuff.
6969	if (LHSMask.getNode() \|\| RHSMask.getNode())
6970	return SDValue();
6971
6972	// If the shift amount is sign/zext/any-extended just peel it off.
6973	SDValue LExtOp0 = LHSShiftAmt;
6974	SDValue RExtOp0 = RHSShiftAmt;
6975	if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND \|\|
6976	LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND \|\|
6977	LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND \|\|
6978	LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
6979	(RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND \|\|
6980	RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND \|\|
6981	RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND \|\|
6982	RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
6983	LExtOp0 = LHSShiftAmt.getOperand(0);
6984	RExtOp0 = RHSShiftAmt.getOperand(0);
6985	}
6986
6987	if (IsRotate && (HasROTL \|\| HasROTR)) {
6988	SDValue TryL =
6989	MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
6990	RExtOp0, ISD::ROTL, ISD::ROTR, DL);
6991	if (TryL)
6992	return TryL;
6993
6994	SDValue TryR =
6995	MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
6996	LExtOp0, ISD::ROTR, ISD::ROTL, DL);
6997	if (TryR)
6998	return TryR;
6999	}
7000
7001	SDValue TryL =
7002	MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
7003	LExtOp0, RExtOp0, ISD::FSHL, ISD::FSHR, DL);
7004	if (TryL)
7005	return TryL;
7006
7007	SDValue TryR =
7008	MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
7009	RExtOp0, LExtOp0, ISD::FSHR, ISD::FSHL, DL);
7010	if (TryR)
7011	return TryR;
7012
7013	return SDValue();
7014	}
7015
7016	namespace {
7017
7018	/// Represents known origin of an individual byte in load combine pattern. The
7019	/// value of the byte is either constant zero or comes from memory.
7020	struct ByteProvider {
7021	// For constant zero providers Load is set to nullptr. For memory providers
7022	// Load represents the node which loads the byte from memory.
7023	// ByteOffset is the offset of the byte in the value produced by the load.
7024	LoadSDNode *Load = nullptr;
7025	unsigned ByteOffset = 0;
7026
7027	ByteProvider() = default;
7028
7029	static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
7030	return ByteProvider(Load, ByteOffset);
7031	}
7032
7033	static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
7034
7035	bool isConstantZero() const { return !Load; }
7036	bool isMemory() const { return Load; }
7037
7038	bool operator==(const ByteProvider &Other) const {
7039	return Other.Load == Load && Other.ByteOffset == ByteOffset;
7040	}
7041
7042	private:
7043	ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
7044	: Load(Load), ByteOffset(ByteOffset) {}
7045	};
7046
7047	} // end anonymous namespace
7048
7049	/// Recursively traverses the expression calculating the origin of the requested
7050	/// byte of the given value. Returns None if the provider can't be calculated.
7051	///
7052	/// For all the values except the root of the expression verifies that the value
7053	/// has exactly one use and if it's not true return None. This way if the origin
7054	/// of the byte is returned it's guaranteed that the values which contribute to
7055	/// the byte are not used outside of this expression.
7056	///
7057	/// Because the parts of the expression are not allowed to have more than one
7058	/// use this function iterates over trees, not DAGs. So it never visits the same
7059	/// node more than once.
7060	static const Optional<ByteProvider>
7061	calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
7062	bool Root = false) {
7063	// Typical i64 by i8 pattern requires recursion up to 8 calls depth
7064	if (Depth == 10)
7065	return None;
7066
7067	if (!Root && !Op.hasOneUse())
7068	return None;
7069
7070	assert(Op.getValueType().isScalarInteger() && "can't handle other types")((Op.getValueType().isScalarInteger() && "can't handle other types" ) ? static_cast<void> (0) : __assert_fail ("Op.getValueType().isScalarInteger() && \"can't handle other types\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 7070, __PRETTY_FUNCTION__));
7071	unsigned BitWidth = Op.getValueSizeInBits();
7072	if (BitWidth % 8 != 0)
7073	return None;
7074	unsigned ByteWidth = BitWidth / 8;
7075	assert(Index < ByteWidth && "invalid index requested")((Index < ByteWidth && "invalid index requested") ? static_cast<void> (0) : __assert_fail ("Index < ByteWidth && \"invalid index requested\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 7075, __PRETTY_FUNCTION__));
7076	(void) ByteWidth;
7077
7078	switch (Op.getOpcode()) {
7079	case ISD::OR: {
7080	auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
7081	if (!LHS)
7082	return None;
7083	auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
7084	if (!RHS)
7085	return None;
7086
7087	if (LHS->isConstantZero())
7088	return RHS;
7089	if (RHS->isConstantZero())
7090	return LHS;
7091	return None;
7092	}
7093	case ISD::SHL: {
7094	auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
7095	if (!ShiftOp)
7096	return None;
7097
7098	uint64_t BitShift = ShiftOp->getZExtValue();
7099	if (BitShift % 8 != 0)
7100	return None;
7101	uint64_t ByteShift = BitShift / 8;
7102
7103	return Index < ByteShift
7104	? ByteProvider::getConstantZero()
7105	: calculateByteProvider(Op->getOperand(0), Index - ByteShift,
7106	Depth + 1);
7107	}
7108	case ISD::ANY_EXTEND:
7109	case ISD::SIGN_EXTEND:
7110	case ISD::ZERO_EXTEND: {
7111	SDValue NarrowOp = Op->getOperand(0);
7112	unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
7113	if (NarrowBitWidth % 8 != 0)
7114	return None;
7115	uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7116
7117	if (Index >= NarrowByteWidth)
7118	return Op.getOpcode() == ISD::ZERO_EXTEND
7119	? Optional<ByteProvider>(ByteProvider::getConstantZero())
7120	: None;
7121	return calculateByteProvider(NarrowOp, Index, Depth + 1);
7122	}
7123	case ISD::BSWAP:
7124	return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
7125	Depth + 1);
7126	case ISD::LOAD: {
7127	auto L = cast<LoadSDNode>(Op.getNode());
7128	if (!L->isSimple() \|\| L->isIndexed())
7129	return None;
7130
7131	unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
7132	if (NarrowBitWidth % 8 != 0)
7133	return None;
7134	uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7135
7136	if (Index >= NarrowByteWidth)
7137	return L->getExtensionType() == ISD::ZEXTLOAD
7138	? Optional<ByteProvider>(ByteProvider::getConstantZero())
7139	: None;
7140	return ByteProvider::getMemory(L, Index);
7141	}
7142	}
7143
7144	return None;
7145	}
7146
7147	static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
7148	return i;
7149	}
7150
7151	static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
7152	return BW - i - 1;
7153	}
7154
7155	// Check if the bytes offsets we are looking at match with either big or
7156	// little endian value loaded. Return true for big endian, false for little
7157	// endian, and None if match failed.
7158	static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
7159	int64_t FirstOffset) {
7160	// The endian can be decided only when it is 2 bytes at least.
7161	unsigned Width = ByteOffsets.size();
7162	if (Width < 2)
7163	return None;
7164
7165	bool BigEndian = true, LittleEndian = true;
7166	for (unsigned i = 0; i < Width; i++) {
7167	int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
7168	LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
7169	BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
7170	if (!BigEndian && !LittleEndian)
7171	return None;
7172	}
7173
7174	assert((BigEndian != LittleEndian) && "It should be either big endian or"(((BigEndian != LittleEndian) && "It should be either big endian or" "little endian") ? static_cast<void> (0) : __assert_fail ("(BigEndian != LittleEndian) && \"It should be either big endian or\" \"little endian\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 7175, __PRETTY_FUNCTION__))
7175	"little endian")(((BigEndian != LittleEndian) && "It should be either big endian or" "little endian") ? static_cast<void> (0) : __assert_fail ("(BigEndian != LittleEndian) && \"It should be either big endian or\" \"little endian\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 7175, __PRETTY_FUNCTION__));
7176	return BigEndian;
7177	}
7178
7179	static SDValue stripTruncAndExt(SDValue Value) {
7180	switch (Value.getOpcode()) {
7181	case ISD::TRUNCATE:
7182	case ISD::ZERO_EXTEND:
7183	case ISD::SIGN_EXTEND:
7184	case ISD::ANY_EXTEND:
7185	return stripTruncAndExt(Value.getOperand(0));
7186	}
7187	return Value;
7188	}
7189
7190	/// Match a pattern where a wide type scalar value is stored by several narrow
7191	/// stores. Fold it into a single store or a BSWAP and a store if the targets
7192	/// supports it.
7193	///
7194	/// Assuming little endian target:
7195	/// i8 *p = ...
7196	/// i32 val = ...
7197	/// p[0] = (val >> 0) & 0xFF;
7198	/// p[1] = (val >> 8) & 0xFF;
7199	/// p[2] = (val >> 16) & 0xFF;
7200	/// p[3] = (val >> 24) & 0xFF;
7201	/// =>
7202	/// *((i32)p) = val;
7203	///
7204	/// i8 *p = ...
7205	/// i32 val = ...
7206	/// p[0] = (val >> 24) & 0xFF;
7207	/// p[1] = (val >> 16) & 0xFF;
7208	/// p[2] = (val >> 8) & 0xFF;
7209	/// p[3] = (val >> 0) & 0xFF;
7210	/// =>
7211	/// *((i32)p) = BSWAP(val);
7212	SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
7213	// The matching looks for "store (trunc x)" patterns that appear early but are
7214	// likely to be replaced by truncating store nodes during combining.
7215	// TODO: If there is evidence that running this later would help, this
7216	// limitation could be removed. Legality checks may need to be added
7217	// for the created store and optional bswap/rotate.
7218	if (LegalOperations)
7219	return SDValue();
7220
7221	// Collect all the stores in the chain.
7222	SDValue Chain;
7223	SmallVector<StoreSDNode *, 8> Stores;
7224	for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
7225	// TODO: Allow unordered atomics when wider type is legal (see D66309)
7226	EVT MemVT = Store->getMemoryVT();
7227	if (!(MemVT == MVT::i8 \|\| MemVT == MVT::i16 \|\| MemVT == MVT::i32) \|\|
7228	!Store->isSimple() \|\| Store->isIndexed())
7229	return SDValue();
7230	Stores.push_back(Store);
7231	Chain = Store->getChain();
7232	}
7233	// There is no reason to continue if we do not have at least a pair of stores.
7234	if (Stores.size() < 2)
7235	return SDValue();
7236
7237	// Handle simple types only.
7238	LLVMContext &Context = *DAG.getContext();
7239	unsigned NumStores = Stores.size();
7240	unsigned NarrowNumBits = N->getMemoryVT().getScalarSizeInBits();
7241	unsigned WideNumBits = NumStores * NarrowNumBits;
7242	EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
7243	if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
7244	return SDValue();
7245
7246	// Check if all bytes of the source value that we are looking at are stored
7247	// to the same base address. Collect offsets from Base address into OffsetMap.
7248	SDValue SourceValue;
7249	SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX(9223372036854775807L));
7250	int64_t FirstOffset = INT64_MAX(9223372036854775807L);
7251	StoreSDNode *FirstStore = nullptr;
7252	Optional<BaseIndexOffset> Base;
7253	for (auto Store : Stores) {
7254	// All the stores store different parts of the CombinedValue. A truncate is
7255	// required to get the partial value.
7256	SDValue Trunc = Store->getValue();
7257	if (Trunc.getOpcode() != ISD::TRUNCATE)
7258	return SDValue();
7259	// Other than the first/last part, a shift operation is required to get the
7260	// offset.
7261	int64_t Offset = 0;
7262	SDValue WideVal = Trunc.getOperand(0);
7263	if ((WideVal.getOpcode() == ISD::SRL \|\| WideVal.getOpcode() == ISD::SRA) &&
7264	isa<ConstantSDNode>(WideVal.getOperand(1))) {
7265	// The shift amount must be a constant multiple of the narrow type.
7266	// It is translated to the offset address in the wide source value "y".
7267	//
7268	// x = srl y, ShiftAmtC
7269	// i8 z = trunc x
7270	// store z, ...
7271	uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
7272	if (ShiftAmtC % NarrowNumBits != 0)
7273	return SDValue();
7274
7275	Offset = ShiftAmtC / NarrowNumBits;
7276	WideVal = WideVal.getOperand(0);
7277	}
7278
7279	// Stores must share the same source value with different offsets.
7280	// Truncate and extends should be stripped to get the single source value.
7281	if (!SourceValue)
7282	SourceValue = WideVal;
7283	else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
7284	return SDValue();
7285	else if (SourceValue.getValueType() != WideVT) {
7286	if (WideVal.getValueType() == WideVT \|\|
7287	WideVal.getScalarValueSizeInBits() >
7288	SourceValue.getScalarValueSizeInBits())
7289	SourceValue = WideVal;
7290	// Give up if the source value type is smaller than the store size.
7291	if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
7292	return SDValue();
7293	}
7294
7295	// Stores must share the same base address.
7296	BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
7297	int64_t ByteOffsetFromBase = 0;
7298	if (!Base)
7299	Base = Ptr;
7300	else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7301	return SDValue();
7302
7303	// Remember the first store.
7304	if (ByteOffsetFromBase < FirstOffset) {
7305	FirstStore = Store;
7306	FirstOffset = ByteOffsetFromBase;
7307	}
7308	// Map the offset in the store and the offset in the combined value, and
7309	// early return if it has been set before.
7310	if (Offset < 0 \|\| Offset >= NumStores \|\| OffsetMap[Offset] != INT64_MAX(9223372036854775807L))
7311	return SDValue();
7312	OffsetMap[Offset] = ByteOffsetFromBase;
7313	}
7314
7315	assert(FirstOffset != INT64_MAX && "First byte offset must be set")((FirstOffset != (9223372036854775807L) && "First byte offset must be set" ) ? static_cast<void> (0) : __assert_fail ("FirstOffset != INT64_MAX && \"First byte offset must be set\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 7315, __PRETTY_FUNCTION__));
7316	assert(FirstStore && "First store must be set")((FirstStore && "First store must be set") ? static_cast <void> (0) : __assert_fail ("FirstStore && \"First store must be set\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 7316, __PRETTY_FUNCTION__));
7317
7318	// Check that a store of the wide type is both allowed and fast on the target
7319	const DataLayout &Layout = DAG.getDataLayout();
7320	bool Fast = false;
7321	bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
7322	*FirstStore->getMemOperand(), &Fast);
7323	if (!Allowed \|\| !Fast)
7324	return SDValue();
7325
7326	// Check if the pieces of the value are going to the expected places in memory
7327	// to merge the stores.
7328	auto checkOffsets = [&](bool MatchLittleEndian) {
7329	if (MatchLittleEndian) {
7330	for (unsigned i = 0; i != NumStores; ++i)
7331	if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
7332	return false;
7333	} else { // MatchBigEndian by reversing loop counter.
7334	for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
7335	if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
7336	return false;
7337	}
7338	return true;
7339	};
7340
7341	// Check if the offsets line up for the native data layout of this target.
7342	bool NeedBswap = false;
7343	bool NeedRotate = false;
7344	if (!checkOffsets(Layout.isLittleEndian())) {
7345	// Special-case: check if byte offsets line up for the opposite endian.
7346	if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
7347	NeedBswap = true;
7348	else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
7349	NeedRotate = true;
7350	else
7351	return SDValue();
7352	}
7353
7354	SDLoc DL(N);
7355	if (WideVT != SourceValue.getValueType()) {
7356	assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&((SourceValue.getValueType().getScalarSizeInBits() > WideNumBits && "Unexpected store value to merge") ? static_cast< void> (0) : __assert_fail ("SourceValue.getValueType().getScalarSizeInBits() > WideNumBits && \"Unexpected store value to merge\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 7357, __PRETTY_FUNCTION__))
7357	"Unexpected store value to merge")((SourceValue.getValueType().getScalarSizeInBits() > WideNumBits && "Unexpected store value to merge") ? static_cast< void> (0) : __assert_fail ("SourceValue.getValueType().getScalarSizeInBits() > WideNumBits && \"Unexpected store value to merge\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 7357, __PRETTY_FUNCTION__));
7358	SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
7359	}
7360
7361	// Before legalize we can introduce illegal bswaps/rotates which will be later
7362	// converted to an explicit bswap sequence. This way we end up with a single
7363	// store and byte shuffling instead of several stores and byte shuffling.
7364	if (NeedBswap) {
7365	SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
7366	} else if (NeedRotate) {
7367	assert(WideNumBits % 2 == 0 && "Unexpected type for rotate")((WideNumBits % 2 == 0 && "Unexpected type for rotate" ) ? static_cast<void> (0) : __assert_fail ("WideNumBits % 2 == 0 && \"Unexpected type for rotate\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 7367, __PRETTY_FUNCTION__));
7368	SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
7369	SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
7370	}
7371
7372	SDValue NewStore =
7373	DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
7374	FirstStore->getPointerInfo(), FirstStore->getAlign());
7375
7376	// Rely on other DAG combine rules to remove the other individual stores.
7377	DAG.ReplaceAllUsesWith(N, NewStore.getNode());
7378	return NewStore;
7379	}
7380
7381	/// Match a pattern where a wide type scalar value is loaded by several narrow
7382	/// loads and combined by shifts and ors. Fold it into a single load or a load
7383	/// and a BSWAP if the targets supports it.
7384	///
7385	/// Assuming little endian target:
7386	/// i8 *a = ...
7387	/// i32 val = a[0] \| (a[1] << 8) \| (a[2] << 16) \| (a[3] << 24)
7388	/// =>
7389	/// i32 val = *((i32)a)
7390	///
7391	/// i8 *a = ...
7392	/// i32 val = (a[0] << 24) \| (a[1] << 16) \| (a[2] << 8) \| a[3]
7393	/// =>
7394	/// i32 val = BSWAP(*((i32)a))
7395	///
7396	/// TODO: This rule matches complex patterns with OR node roots and doesn't
7397	/// interact well with the worklist mechanism. When a part of the pattern is
7398	/// updated (e.g. one of the loads) its direct users are put into the worklist,
7399	/// but the root node of the pattern which triggers the load combine is not
7400	/// necessarily a direct user of the changed node. For example, once the address
7401	/// of t28 load is reassociated load combine won't be triggered:
7402	/// t25: i32 = add t4, Constant:i32<2>
7403	/// t26: i64 = sign_extend t25
7404	/// t27: i64 = add t2, t26
7405	/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
7406	/// t29: i32 = zero_extend t28
7407	/// t32: i32 = shl t29, Constant:i8<8>
7408	/// t33: i32 = or t23, t32
7409	/// As a possible fix visitLoad can check if the load can be a part of a load
7410	/// combine pattern and add corresponding OR roots to the worklist.
7411	SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
7412	assert(N->getOpcode() == ISD::OR &&((N->getOpcode() == ISD::OR && "Can only match load combining against OR nodes" ) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::OR && \"Can only match load combining against OR nodes\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 7413, __PRETTY_FUNCTION__))
7413	"Can only match load combining against OR nodes")((N->getOpcode() == ISD::OR && "Can only match load combining against OR nodes" ) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::OR && \"Can only match load combining against OR nodes\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 7413, __PRETTY_FUNCTION__));
7414
7415	// Handles simple types only
7416	EVT VT = N->getValueType(0);
7417	if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
7418	return SDValue();
7419	unsigned ByteWidth = VT.getSizeInBits() / 8;
7420
7421	bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
7422	auto MemoryByteOffset = [&] (ByteProvider P) {
7423	assert(P.isMemory() && "Must be a memory byte provider")((P.isMemory() && "Must be a memory byte provider") ? static_cast<void> (0) : __assert_fail ("P.isMemory() && \"Must be a memory byte provider\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 7423, __PRETTY_FUNCTION__));
7424	unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
7425	assert(LoadBitWidth % 8 == 0 &&((LoadBitWidth % 8 == 0 && "can only analyze providers for individual bytes not bit" ) ? static_cast<void> (0) : __assert_fail ("LoadBitWidth % 8 == 0 && \"can only analyze providers for individual bytes not bit\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 7426, __PRETTY_FUNCTION__))
7426	"can only analyze providers for individual bytes not bit")((LoadBitWidth % 8 == 0 && "can only analyze providers for individual bytes not bit" ) ? static_cast<void> (0) : __assert_fail ("LoadBitWidth % 8 == 0 && \"can only analyze providers for individual bytes not bit\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 7426, __PRETTY_FUNCTION__));
7427	unsigned LoadByteWidth = LoadBitWidth / 8;
7428	return IsBigEndianTarget
7429	? bigEndianByteAt(LoadByteWidth, P.ByteOffset)
7430	: littleEndianByteAt(LoadByteWidth, P.ByteOffset);
7431	};
7432
7433	Optional<BaseIndexOffset> Base;
7434	SDValue Chain;
7435
7436	SmallPtrSet<LoadSDNode *, 8> Loads;
7437	Optional<ByteProvider> FirstByteProvider;
7438	int64_t FirstOffset = INT64_MAX(9223372036854775807L);
7439
7440	// Check if all the bytes of the OR we are looking at are loaded from the same
7441	// base address. Collect bytes offsets from Base address in ByteOffsets.
7442	SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
7443	unsigned ZeroExtendedBytes = 0;
7444	for (int i = ByteWidth - 1; i >= 0; --i) {
7445	auto P = calculateByteProvider(SDValue(N, 0), i, 0, /Root=/true);
7446	if (!P)
7447	return SDValue();
7448
7449	if (P->isConstantZero()) {
7450	// It's OK for the N most significant bytes to be 0, we can just
7451	// zero-extend the load.
7452	if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
7453	return SDValue();
7454	continue;
7455	}
7456	assert(P->isMemory() && "provenance should either be memory or zero")((P->isMemory() && "provenance should either be memory or zero" ) ? static_cast<void> (0) : __assert_fail ("P->isMemory() && \"provenance should either be memory or zero\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 7456, __PRETTY_FUNCTION__));
7457
7458	LoadSDNode *L = P->Load;
7459	assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&((L->hasNUsesOfValue(1, 0) && L->isSimple() && !L->isIndexed() && "Must be enforced by calculateByteProvider" ) ? static_cast<void> (0) : __assert_fail ("L->hasNUsesOfValue(1, 0) && L->isSimple() && !L->isIndexed() && \"Must be enforced by calculateByteProvider\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 7461, __PRETTY_FUNCTION__))
7460	!L->isIndexed() &&((L->hasNUsesOfValue(1, 0) && L->isSimple() && !L->isIndexed() && "Must be enforced by calculateByteProvider" ) ? static_cast<void> (0) : __assert_fail ("L->hasNUsesOfValue(1, 0) && L->isSimple() && !L->isIndexed() && \"Must be enforced by calculateByteProvider\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 7461, __PRETTY_FUNCTION__))
7461	"Must be enforced by calculateByteProvider")((L->hasNUsesOfValue(1, 0) && L->isSimple() && !L->isIndexed() && "Must be enforced by calculateByteProvider" ) ? static_cast<void> (0) : __assert_fail ("L->hasNUsesOfValue(1, 0) && L->isSimple() && !L->isIndexed() && \"Must be enforced by calculateByteProvider\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 7461, __PRETTY_FUNCTION__));
7462	assert(L->getOffset().isUndef() && "Unindexed load must have undef offset")((L->getOffset().isUndef() && "Unindexed load must have undef offset" ) ? static_cast<void> (0) : __assert_fail ("L->getOffset().isUndef() && \"Unindexed load must have undef offset\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 7462, __PRETTY_FUNCTION__));
7463
7464	// All loads must share the same chain
7465	SDValue LChain = L->getChain();
7466	if (!Chain)
7467	Chain = LChain;
7468	else if (Chain != LChain)
7469	return SDValue();
7470
7471	// Loads must share the same base address
7472	BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
7473	int64_t ByteOffsetFromBase = 0;
7474	if (!Base)
7475	Base = Ptr;
7476	else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7477	return SDValue();
7478
7479	// Calculate the offset of the current byte from the base address
7480	ByteOffsetFromBase += MemoryByteOffset(*P);
7481	ByteOffsets[i] = ByteOffsetFromBase;
7482
7483	// Remember the first byte load
7484	if (ByteOffsetFromBase < FirstOffset) {
7485	FirstByteProvider = P;
7486	FirstOffset = ByteOffsetFromBase;
7487	}
7488
7489	Loads.insert(L);
7490	}
7491	assert(!Loads.empty() && "All the bytes of the value must be loaded from "((!Loads.empty() && "All the bytes of the value must be loaded from " "memory, so there must be at least one load which produces the value" ) ? static_cast<void> (0) : __assert_fail ("!Loads.empty() && \"All the bytes of the value must be loaded from \" \"memory, so there must be at least one load which produces the value\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 7492, __PRETTY_FUNCTION__))
7492	"memory, so there must be at least one load which produces the value")((!Loads.empty() && "All the bytes of the value must be loaded from " "memory, so there must be at least one load which produces the value" ) ? static_cast<void> (0) : __assert_fail ("!Loads.empty() && \"All the bytes of the value must be loaded from \" \"memory, so there must be at least one load which produces the value\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 7492, __PRETTY_FUNCTION__));
7493	assert(Base && "Base address of the accessed memory location must be set")((Base && "Base address of the accessed memory location must be set" ) ? static_cast<void> (0) : __assert_fail ("Base && \"Base address of the accessed memory location must be set\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 7493, __PRETTY_FUNCTION__));
7494	assert(FirstOffset != INT64_MAX && "First byte offset must be set")((FirstOffset != (9223372036854775807L) && "First byte offset must be set" ) ? static_cast<void> (0) : __assert_fail ("FirstOffset != INT64_MAX && \"First byte offset must be set\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 7494, __PRETTY_FUNCTION__));
7495
7496	bool NeedsZext = ZeroExtendedBytes > 0;
7497
7498	EVT MemVT =
7499	EVT::getIntegerVT(DAG.getContext(), (ByteWidth - ZeroExtendedBytes) 8);
7500
7501	if (!MemVT.isSimple())
7502	return SDValue();
7503
7504	// Before legalize we can introduce too wide illegal loads which will be later
7505	// split into legal sized loads. This enables us to combine i64 load by i8
7506	// patterns to a couple of i32 loads on 32 bit targets.
7507	if (LegalOperations &&
7508	!TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
7509	MemVT))
7510	return SDValue();
7511
7512	// Check if the bytes of the OR we are looking at match with either big or
7513	// little endian value load
7514	Optional<bool> IsBigEndian = isBigEndian(
7515	makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
7516	if (!IsBigEndian.hasValue())
7517	return SDValue();
7518
7519	assert(FirstByteProvider && "must be set")((FirstByteProvider && "must be set") ? static_cast< void> (0) : __assert_fail ("FirstByteProvider && \"must be set\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 7519, __PRETTY_FUNCTION__));
7520
7521	// Ensure that the first byte is loaded from zero offset of the first load.
7522	// So the combined value can be loaded from the first load address.
7523	if (MemoryByteOffset(*FirstByteProvider) != 0)
7524	return SDValue();
7525	LoadSDNode *FirstLoad = FirstByteProvider->Load;
7526
7527	// The node we are looking at matches with the pattern, check if we can
7528	// replace it with a single (possibly zero-extended) load and bswap + shift if
7529	// needed.
7530
7531	// If the load needs byte swap check if the target supports it
7532	bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
7533
7534	// Before legalize we can introduce illegal bswaps which will be later
7535	// converted to an explicit bswap sequence. This way we end up with a single
7536	// load and byte shuffling instead of several loads and byte shuffling.
7537	// We do not introduce illegal bswaps when zero-extending as this tends to
7538	// introduce too many arithmetic instructions.
7539	if (NeedsBswap && (LegalOperations \|\| NeedsZext) &&
7540	!TLI.isOperationLegal(ISD::BSWAP, VT))
7541	return SDValue();
7542
7543	// If we need to bswap and zero extend, we have to insert a shift. Check that
7544	// it is legal.
7545	if (NeedsBswap && NeedsZext && LegalOperations &&
7546	!TLI.isOperationLegal(ISD::SHL, VT))
7547	return SDValue();
7548
7549	// Check that a load of the wide type is both allowed and fast on the target
7550	bool Fast = false;
7551	bool Allowed =
7552	TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
7553	*FirstLoad->getMemOperand(), &Fast);
7554	if (!Allowed \|\| !Fast)
7555	return SDValue();
7556
7557	SDValue NewLoad =
7558	DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
7559	Chain, FirstLoad->getBasePtr(),
7560	FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
7561
7562	// Transfer chain users from old loads to the new load.
7563	for (LoadSDNode *L : Loads)
7564	DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
7565
7566	if (!NeedsBswap)
7567	return NewLoad;
7568
7569	SDValue ShiftedLoad =
7570	NeedsZext
7571	? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
7572	DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
7573	SDLoc(N), LegalOperations))
7574	: NewLoad;
7575	return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
7576	}
7577
7578	// If the target has andn, bsl, or a similar bit-select instruction,
7579	// we want to unfold masked merge, with canonical pattern of:
7580	// \| A \| \|B\|
7581	// ((x ^ y) & m) ^ y
7582	// \| D \|
7583	// Into:
7584	// (x & m) \| (y & ~m)
7585	// If y is a constant, and the 'andn' does not work with immediates,
7586	// we unfold into a different pattern:
7587	// ~(~x & m) & (m \| y)
7588	// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
7589	// the very least that breaks andnpd / andnps patterns, and because those
7590	// patterns are simplified in IR and shouldn't be created in the DAG
7591	SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
7592	assert(N->getOpcode() == ISD::XOR)((N->getOpcode() == ISD::XOR) ? static_cast<void> (0 ) : __assert_fail ("N->getOpcode() == ISD::XOR", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 7592, __PRETTY_FUNCTION__));
7593
7594	// Don't touch 'not' (i.e. where y = -1).
7595	if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
7596	return SDValue();
7597
7598	EVT VT = N->getValueType(0);
7599
7600	// There are 3 commutable operators in the pattern,
7601	// so we have to deal with 8 possible variants of the basic pattern.
7602	SDValue X, Y, M;
7603	auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
7604	if (And.getOpcode() != ISD::AND \|\| !And.hasOneUse())
7605	return false;
7606	SDValue Xor = And.getOperand(XorIdx);
7607	if (Xor.getOpcode() != ISD::XOR \|\| !Xor.hasOneUse())
7608	return false;
7609	SDValue Xor0 = Xor.getOperand(0);
7610	SDValue Xor1 = Xor.getOperand(1);
7611	// Don't touch 'not' (i.e. where y = -1).
7612	if (isAllOnesOrAllOnesSplat(Xor1))
7613	return false;
7614	if (Other == Xor0)
7615	std::swap(Xor0, Xor1);
7616	if (Other != Xor1)
7617	return false;
7618	X = Xor0;
7619	Y = Xor1;
7620	M = And.getOperand(XorIdx ? 0 : 1);
7621	return true;
7622	};
7623
7624	SDValue N0 = N->getOperand(0);
7625	SDValue N1 = N->getOperand(1);
7626	if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
7627	!matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
7628	return SDValue();
7629
7630	// Don't do anything if the mask is constant. This should not be reachable.
7631	// InstCombine should have already unfolded this pattern, and DAGCombiner
7632	// probably shouldn't produce it, too.
7633	if (isa<ConstantSDNode>(M.getNode()))
7634	return SDValue();
7635
7636	// We can transform if the target has AndNot
7637	if (!TLI.hasAndNot(M))
7638	return SDValue();
7639
7640	SDLoc DL(N);
7641
7642	// If Y is a constant, check that 'andn' works with immediates.
7643	if (!TLI.hasAndNot(Y)) {
7644	assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.")((TLI.hasAndNot(X) && "Only mask is a variable? Unreachable." ) ? static_cast<void> (0) : __assert_fail ("TLI.hasAndNot(X) && \"Only mask is a variable? Unreachable.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 7644, __PRETTY_FUNCTION__));
7645	// If not, we need to do a bit more work to make sure andn is still used.
7646	SDValue NotX = DAG.getNOT(DL, X, VT);
7647	SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
7648	SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
7649	SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
7650	return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
7651	}
7652
7653	SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
7654	SDValue NotM = DAG.getNOT(DL, M, VT);
7655	SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
7656
7657	return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
7658	}
7659
7660	SDValue DAGCombiner::visitXOR(SDNode *N) {
7661	SDValue N0 = N->getOperand(0);
7662	SDValue N1 = N->getOperand(1);
7663	EVT VT = N0.getValueType();
7664
7665	// fold vector ops
7666	if (VT.isVector()) {
7667	if (SDValue FoldedVOp = SimplifyVBinOp(N))
7668	return FoldedVOp;
7669
7670	// fold (xor x, 0) -> x, vector edition
7671	if (ISD::isBuildVectorAllZeros(N0.getNode()))
7672	return N1;
7673	if (ISD::isBuildVectorAllZeros(N1.getNode()))
7674	return N0;
7675	}
7676
7677	// fold (xor undef, undef) -> 0. This is a common idiom (misuse).
7678	SDLoc DL(N);
7679	if (N0.isUndef() && N1.isUndef())
7680	return DAG.getConstant(0, DL, VT);
7681
7682	// fold (xor x, undef) -> undef
7683	if (N0.isUndef())
7684	return N0;
7685	if (N1.isUndef())
7686	return N1;
7687
7688	// fold (xor c1, c2) -> c1^c2
7689	if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
7690	return C;
7691
7692	// canonicalize constant to RHS
7693	if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
7694	!DAG.isConstantIntBuildVectorOrConstantInt(N1))
7695	return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
7696
7697	// fold (xor x, 0) -> x
7698	if (isNullConstant(N1))
7699	return N0;
7700
7701	if (SDValue NewSel = foldBinOpIntoSelect(N))
7702	return NewSel;
7703
7704	// reassociate xor
7705	if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
7706	return RXOR;
7707
7708	// fold !(x cc y) -> (x !cc y)
7709	unsigned N0Opcode = N0.getOpcode();
7710	SDValue LHS, RHS, CC;
7711	if (TLI.isConstTrueVal(N1.getNode()) &&
7712	isSetCCEquivalent(N0, LHS, RHS, CC, /MatchStrict/true)) {
7713	ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
7714	LHS.getValueType());
7715	if (!LegalOperations \|\|
7716	TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
7717	switch (N0Opcode) {
7718	default:
7719	llvm_unreachable("Unhandled SetCC Equivalent!")::llvm::llvm_unreachable_internal("Unhandled SetCC Equivalent!" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 7719);
7720	case ISD::SETCC:
7721	return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
7722	case ISD::SELECT_CC:
7723	return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
7724	N0.getOperand(3), NotCC);
7725	case ISD::STRICT_FSETCC:
7726	case ISD::STRICT_FSETCCS: {
7727	if (N0.hasOneUse()) {
7728	// FIXME Can we handle multiple uses? Could we token factor the chain
7729	// results from the new/old setcc?
7730	SDValue SetCC =
7731	DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
7732	N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
7733	CombineTo(N, SetCC);
7734	DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
7735	recursivelyDeleteUnusedNodes(N0.getNode());
7736	return SDValue(N, 0); // Return N so it doesn't get rechecked!
7737	}
7738	break;
7739	}
7740	}
7741	}
7742	}
7743
7744	// fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
7745	if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
7746	isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
7747	SDValue V = N0.getOperand(0);
7748	SDLoc DL0(N0);
7749	V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
7750	DAG.getConstant(1, DL0, V.getValueType()));
7751	AddToWorklist(V.getNode());
7752	return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
7753	}
7754
7755	// fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
7756	if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
7757	(N0Opcode == ISD::OR \|\| N0Opcode == ISD::AND)) {
7758	SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7759	if (isOneUseSetCC(N01) \|\| isOneUseSetCC(N00)) {
7760	unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7761	N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7762	N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7763	AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7764	return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7765	}
7766	}
7767	// fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
7768	if (isAllOnesConstant(N1) && N0.hasOneUse() &&
7769	(N0Opcode == ISD::OR \|\| N0Opcode == ISD::AND)) {
7770	SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7771	if (isa<ConstantSDNode>(N01) \|\| isa<ConstantSDNode>(N00)) {
7772	unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7773	N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7774	N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7775	AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7776	return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7777	}
7778	}
7779
7780	// fold (not (neg x)) -> (add X, -1)
7781	// FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
7782	// Y is a constant or the subtract has a single use.
7783	if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
7784	isNullConstant(N0.getOperand(0))) {
7785	return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
7786	DAG.getAllOnesConstant(DL, VT));
7787	}
7788
7789	// fold (not (add X, -1)) -> (neg X)
7790	if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
7791	isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
7792	return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
7793	N0.getOperand(0));
7794	}
7795
7796	// fold (xor (and x, y), y) -> (and (not x), y)
7797	if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
7798	SDValue X = N0.getOperand(0);
7799	SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
7800	AddToWorklist(NotX.getNode());
7801	return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
7802	}
7803
7804	if ((N0Opcode == ISD::SRL \|\| N0Opcode == ISD::SHL) && N0.hasOneUse()) {
7805	ConstantSDNode *XorC = isConstOrConstSplat(N1);
7806	ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
7807	unsigned BitWidth = VT.getScalarSizeInBits();
7808	if (XorC && ShiftC) {
7809	// Don't crash on an oversized shift. We can not guarantee that a bogus
7810	// shift has been simplified to undef.
7811	uint64_t ShiftAmt = ShiftC->getLimitedValue();
7812	if (ShiftAmt < BitWidth) {
7813	APInt Ones = APInt::getAllOnesValue(BitWidth);
7814	Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
7815	if (XorC->getAPIntValue() == Ones) {
7816	// If the xor constant is a shifted -1, do a 'not' before the shift:
7817	// xor (X << ShiftC), XorC --> (not X) << ShiftC
7818	// xor (X >> ShiftC), XorC --> (not X) >> ShiftC
7819	SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
7820	return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
7821	}
7822	}
7823	}
7824	}
7825
7826	// fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
7827	if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
7828	SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
7829	SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
7830	if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
7831	SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
7832	SDValue S0 = S.getOperand(0);
7833	if ((A0 == S && A1 == S0) \|\| (A1 == S && A0 == S0))
7834	if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
7835	if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
7836	return DAG.getNode(ISD::ABS, DL, VT, S0);
7837	}
7838	}
7839
7840	// fold (xor x, x) -> 0
7841	if (N0 == N1)
7842	return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
7843
7844	// fold (xor (shl 1, x), -1) -> (rotl ~1, x)
7845	// Here is a concrete example of this equivalence:
7846	// i16 x == 14
7847	// i16 shl == 1 << 14 == 16384 == 0b0100000000000000
7848	// i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
7849	//
7850	// =>
7851	//
7852	// i16 ~1 == 0b1111111111111110
7853	// i16 rol(~1, 14) == 0b1011111111111111
7854	//
7855	// Some additional tips to help conceptualize this transform:
7856	// - Try to see the operation as placing a single zero in a value of all ones.
7857	// - There exists no value for x which would allow the result to contain zero.
7858	// - Values of x larger than the bitwidth are undefined and do not require a
7859	// consistent result.
7860	// - Pushing the zero left requires shifting one bits in from the right.
7861	// A rotate left of ~1 is a nice way of achieving the desired result.
7862	if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
7863	isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
7864	return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
7865	N0.getOperand(1));
7866	}
7867
7868	// Simplify: xor (op x...), (op y...) -> (op (xor x, y))
7869	if (N0Opcode == N1.getOpcode())
7870	if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7871	return V;
7872
7873	// Unfold ((x ^ y) & m) ^ y into (x & m) \| (y & ~m) if profitable
7874	if (SDValue MM = unfoldMaskedMerge(N))
7875	return MM;
7876
7877	// Simplify the expression using non-local knowledge.
7878	if (SimplifyDemandedBits(SDValue(N, 0)))
7879	return SDValue(N, 0);
7880
7881	if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
7882	return Combined;
7883
7884	return SDValue();
7885	}
7886
7887	/// If we have a shift-by-constant of a bitwise logic op that itself has a
7888	/// shift-by-constant operand with identical opcode, we may be able to convert
7889	/// that into 2 independent shifts followed by the logic op. This is a
7890	/// throughput improvement.
7891	static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
7892	// Match a one-use bitwise logic op.
7893	SDValue LogicOp = Shift->getOperand(0);
7894	if (!LogicOp.hasOneUse())
7895	return SDValue();
7896
7897	unsigned LogicOpcode = LogicOp.getOpcode();
7898	if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
7899	LogicOpcode != ISD::XOR)
7900	return SDValue();
7901
7902	// Find a matching one-use shift by constant.
7903	unsigned ShiftOpcode = Shift->getOpcode();
7904	SDValue C1 = Shift->getOperand(1);
7905	ConstantSDNode *C1Node = isConstOrConstSplat(C1);
7906	assert(C1Node && "Expected a shift with constant operand")((C1Node && "Expected a shift with constant operand") ? static_cast<void> (0) : __assert_fail ("C1Node && \"Expected a shift with constant operand\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 7906, __PRETTY_FUNCTION__));
7907	const APInt &C1Val = C1Node->getAPIntValue();
7908	auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
7909	const APInt *&ShiftAmtVal) {
7910	if (V.getOpcode() != ShiftOpcode \|\| !V.hasOneUse())
7911	return false;
7912
7913	ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
7914	if (!ShiftCNode)
7915	return false;
7916
7917	// Capture the shifted operand and shift amount value.
7918	ShiftOp = V.getOperand(0);
7919	ShiftAmtVal = &ShiftCNode->getAPIntValue();
7920
7921	// Shift amount types do not have to match their operand type, so check that
7922	// the constants are the same width.
7923	if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
7924	return false;
7925
7926	// The fold is not valid if the sum of the shift values exceeds bitwidth.
7927	if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
7928	return false;
7929
7930	return true;
7931	};
7932
7933	// Logic ops are commutative, so check each operand for a match.
7934	SDValue X, Y;
7935	const APInt *C0Val;
7936	if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
7937	Y = LogicOp.getOperand(1);
7938	else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
7939	Y = LogicOp.getOperand(0);
7940	else
7941	return SDValue();
7942
7943	// shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
7944	SDLoc DL(Shift);
7945	EVT VT = Shift->getValueType(0);
7946	EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
7947	SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
7948	SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
7949	SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
7950	return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
7951	}
7952
7953	/// Handle transforms common to the three shifts, when the shift amount is a
7954	/// constant.
7955	/// We are looking for: (shift being one of shl/sra/srl)
7956	/// shift (binop X, C0), C1
7957	/// And want to transform into:
7958	/// binop (shift X, C1), (shift C0, C1)
7959	SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
7960	assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand")((isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand" ) ? static_cast<void> (0) : __assert_fail ("isConstOrConstSplat(N->getOperand(1)) && \"Expected constant operand\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 7960, __PRETTY_FUNCTION__));
7961
7962	// Do not turn a 'not' into a regular xor.
7963	if (isBitwiseNot(N->getOperand(0)))
7964	return SDValue();
7965
7966	// The inner binop must be one-use, since we want to replace it.
7967	SDValue LHS = N->getOperand(0);
7968	if (!LHS.hasOneUse() \|\| !TLI.isDesirableToCommuteWithShift(N, Level))
7969	return SDValue();
7970
7971	// TODO: This is limited to early combining because it may reveal regressions
7972	// otherwise. But since we just checked a target hook to see if this is
7973	// desirable, that should have filtered out cases where this interferes
7974	// with some other pattern matching.
7975	if (!LegalTypes)
7976	if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
7977	return R;
7978
7979	// We want to pull some binops through shifts, so that we have (and (shift))
7980	// instead of (shift (and)), likewise for add, or, xor, etc. This sort of
7981	// thing happens with address calculations, so it's important to canonicalize
7982	// it.
7983	switch (LHS.getOpcode()) {
7984	default:
7985	return SDValue();
7986	case ISD::OR:
7987	case ISD::XOR:
7988	case ISD::AND:
7989	break;
7990	case ISD::ADD:
7991	if (N->getOpcode() != ISD::SHL)
7992	return SDValue(); // only shl(add) not sr[al](add).
7993	break;
7994	}
7995
7996	// We require the RHS of the binop to be a constant and not opaque as well.
7997	ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
7998	if (!BinOpCst)
7999	return SDValue();
8000
8001	// FIXME: disable this unless the input to the binop is a shift by a constant
8002	// or is copy/select. Enable this in other cases when figure out it's exactly
8003	// profitable.
8004	SDValue BinOpLHSVal = LHS.getOperand(0);
8005	bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL \|\|
8006	BinOpLHSVal.getOpcode() == ISD::SRA \|\|
8007	BinOpLHSVal.getOpcode() == ISD::SRL) &&
8008	isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
8009	bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg \|\|
8010	BinOpLHSVal.getOpcode() == ISD::SELECT;
8011
8012	if (!IsShiftByConstant && !IsCopyOrSelect)
8013	return SDValue();
8014
8015	if (IsCopyOrSelect && N->hasOneUse())
8016	return SDValue();
8017
8018	// Fold the constants, shifting the binop RHS by the shift amount.
8019	SDLoc DL(N);
8020	EVT VT = N->getValueType(0);
8021	SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
8022	N->getOperand(1));
8023	assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!")((isa<ConstantSDNode>(NewRHS) && "Folding was not successful!" ) ? static_cast<void> (0) : __assert_fail ("isa<ConstantSDNode>(NewRHS) && \"Folding was not successful!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 8023, __PRETTY_FUNCTION__));
8024
8025	SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
8026	N->getOperand(1));
8027	return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
8028	}
8029
8030	SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
8031	assert(N->getOpcode() == ISD::TRUNCATE)((N->getOpcode() == ISD::TRUNCATE) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::TRUNCATE", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 8031, __PRETTY_FUNCTION__));
8032	assert(N->getOperand(0).getOpcode() == ISD::AND)((N->getOperand(0).getOpcode() == ISD::AND) ? static_cast< void> (0) : __assert_fail ("N->getOperand(0).getOpcode() == ISD::AND" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 8032, __PRETTY_FUNCTION__));
8033
8034	// (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
8035	EVT TruncVT = N->getValueType(0);
8036	if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
8037	TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
8038	SDValue N01 = N->getOperand(0).getOperand(1);
8039	if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
8040	SDLoc DL(N);
8041	SDValue N00 = N->getOperand(0).getOperand(0);
8042	SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
8043	SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
8044	AddToWorklist(Trunc00.getNode());
8045	AddToWorklist(Trunc01.getNode());
8046	return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
8047	}
8048	}
8049
8050	return SDValue();
8051	}
8052
8053	SDValue DAGCombiner::visitRotate(SDNode *N) {
8054	SDLoc dl(N);
8055	SDValue N0 = N->getOperand(0);
8056	SDValue N1 = N->getOperand(1);
8057	EVT VT = N->getValueType(0);
8058	unsigned Bitsize = VT.getScalarSizeInBits();
8059
8060	// fold (rot x, 0) -> x
8061	if (isNullOrNullSplat(N1))
8062	return N0;
8063
8064	// fold (rot x, c) -> x iff (c % BitSize) == 0
8065	if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
8066	APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
8067	if (DAG.MaskedValueIsZero(N1, ModuloMask))
8068	return N0;
8069	}
8070
8071	// fold (rot x, c) -> (rot x, c % BitSize)
8072	bool OutOfRange = false;
8073	auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
8074	OutOfRange \|= C->getAPIntValue().uge(Bitsize);
8075	return true;
8076	};
8077	if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
8078	EVT AmtVT = N1.getValueType();
8079	SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
8080	if (SDValue Amt =
8081	DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
8082	return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
8083	}
8084
8085	// rot i16 X, 8 --> bswap X
8086	auto *RotAmtC = isConstOrConstSplat(N1);
8087	if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
8088	VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
8089	return DAG.getNode(ISD::BSWAP, dl, VT, N0);
8090
8091	// Simplify the operands using demanded-bits information.
8092	if (SimplifyDemandedBits(SDValue(N, 0)))
8093	return SDValue(N, 0);
8094
8095	// fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
8096	if (N1.getOpcode() == ISD::TRUNCATE &&
8097	N1.getOperand(0).getOpcode() == ISD::AND) {
8098	if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8099	return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
8100	}
8101
8102	unsigned NextOp = N0.getOpcode();
8103	// fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
8104	if (NextOp == ISD::ROTL \|\| NextOp == ISD::ROTR) {
8105	SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
8106	SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
8107	if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
8108	EVT ShiftVT = C1->getValueType(0);
8109	bool SameSide = (N->getOpcode() == NextOp);
8110	unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
8111	if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
8112	CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
8113	SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
8114	SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
8115	ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC});
8116	return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
8117	CombinedShiftNorm);
8118	}
8119	}
8120	}
8121	return SDValue();
8122	}
8123
8124	SDValue DAGCombiner::visitSHL(SDNode *N) {
8125	SDValue N0 = N->getOperand(0);
8126	SDValue N1 = N->getOperand(1);
8127	if (SDValue V = DAG.simplifyShift(N0, N1))
8128	return V;
8129
8130	EVT VT = N0.getValueType();
8131	EVT ShiftVT = N1.getValueType();
8132	unsigned OpSizeInBits = VT.getScalarSizeInBits();
8133
8134	// fold vector ops
8135	if (VT.isVector()) {
8136	if (SDValue FoldedVOp = SimplifyVBinOp(N))
8137	return FoldedVOp;
8138
8139	BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
8140	// If setcc produces all-one true value then:
8141	// (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
8142	if (N1CV && N1CV->isConstant()) {
8143	if (N0.getOpcode() == ISD::AND) {
8144	SDValue N00 = N0->getOperand(0);
8145	SDValue N01 = N0->getOperand(1);
8146	BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
8147
8148	if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
8149	TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
8150	TargetLowering::ZeroOrNegativeOneBooleanContent) {
8151	if (SDValue C =
8152	DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
8153	return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
8154	}
8155	}
8156	}
8157	}
8158
8159	ConstantSDNode *N1C = isConstOrConstSplat(N1);
8160
8161	// fold (shl c1, c2) -> c1<<c2
8162	if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
8163	return C;
8164
8165	if (SDValue NewSel = foldBinOpIntoSelect(N))
8166	return NewSel;
8167
8168	// if (shl x, c) is known to be zero, return 0
8169	if (DAG.MaskedValueIsZero(SDValue(N, 0),
8170	APInt::getAllOnesValue(OpSizeInBits)))
8171	return DAG.getConstant(0, SDLoc(N), VT);
8172
8173	// fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
8174	if (N1.getOpcode() == ISD::TRUNCATE &&
8175	N1.getOperand(0).getOpcode() == ISD::AND) {
8176	if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8177	return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
8178	}
8179
8180	if (SimplifyDemandedBits(SDValue(N, 0)))
8181	return SDValue(N, 0);
8182
8183	// fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
8184	if (N0.getOpcode() == ISD::SHL) {
8185	auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8186	ConstantSDNode *RHS) {
8187	APInt c1 = LHS->getAPIntValue();
8188	APInt c2 = RHS->getAPIntValue();
8189	zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8190	return (c1 + c2).uge(OpSizeInBits);
8191	};
8192	if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8193	return DAG.getConstant(0, SDLoc(N), VT);
8194
8195	auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8196	ConstantSDNode *RHS) {
8197	APInt c1 = LHS->getAPIntValue();
8198	APInt c2 = RHS->getAPIntValue();
8199	zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8200	return (c1 + c2).ult(OpSizeInBits);
8201	};
8202	if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8203	SDLoc DL(N);
8204	SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8205	return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
8206	}
8207	}
8208
8209	// fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
8210	// For this to be valid, the second form must not preserve any of the bits
8211	// that are shifted out by the inner shift in the first form. This means
8212	// the outer shift size must be >= the number of bits added by the ext.
8213	// As a corollary, we don't care what kind of ext it is.
8214	if ((N0.getOpcode() == ISD::ZERO_EXTEND \|\|
8215	N0.getOpcode() == ISD::ANY_EXTEND \|\|
8216	N0.getOpcode() == ISD::SIGN_EXTEND) &&
8217	N0.getOperand(0).getOpcode() == ISD::SHL) {
8218	SDValue N0Op0 = N0.getOperand(0);
8219	SDValue InnerShiftAmt = N0Op0.getOperand(1);
8220	EVT InnerVT = N0Op0.getValueType();
8221	uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
8222
8223	auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8224	ConstantSDNode *RHS) {
8225	APInt c1 = LHS->getAPIntValue();
8226	APInt c2 = RHS->getAPIntValue();
8227	zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8228	return c2.uge(OpSizeInBits - InnerBitwidth) &&
8229	(c1 + c2).uge(OpSizeInBits);
8230	};
8231	if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
8232	/AllowUndefs/ false,
8233	/AllowTypeMismatch/ true))
8234	return DAG.getConstant(0, SDLoc(N), VT);
8235
8236	auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8237	ConstantSDNode *RHS) {
8238	APInt c1 = LHS->getAPIntValue();
8239	APInt c2 = RHS->getAPIntValue();
8240	zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8241	return c2.uge(OpSizeInBits - InnerBitwidth) &&
8242	(c1 + c2).ult(OpSizeInBits);
8243	};
8244	if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
8245	/AllowUndefs/ false,
8246	/AllowTypeMismatch/ true)) {
8247	SDLoc DL(N);
8248	SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
8249	SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
8250	Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
8251	return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
8252	}
8253	}
8254
8255	// fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
8256	// Only fold this if the inner zext has no other uses to avoid increasing
8257	// the total number of instructions.
8258	if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
8259	N0.getOperand(0).getOpcode() == ISD::SRL) {
8260	SDValue N0Op0 = N0.getOperand(0);
8261	SDValue InnerShiftAmt = N0Op0.getOperand(1);
8262
8263	auto MatchEqual = [VT](ConstantSDNode LHS, ConstantSDNode RHS) {
8264	APInt c1 = LHS->getAPIntValue();
8265	APInt c2 = RHS->getAPIntValue();
8266	zeroExtendToMatch(c1, c2);
8267	return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
8268	};
8269	if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
8270	/AllowUndefs/ false,
8271	/AllowTypeMismatch/ true)) {
8272	SDLoc DL(N);
8273	EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
8274	SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
8275	NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
8276	AddToWorklist(NewSHL.getNode());
8277	return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
8278	}
8279	}
8280
8281	// fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
8282	// fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
8283	// TODO - support non-uniform vector shift amounts.
8284	if (N1C && (N0.getOpcode() == ISD::SRL \|\| N0.getOpcode() == ISD::SRA) &&
8285	N0->getFlags().hasExact()) {
8286	if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8287	uint64_t C1 = N0C1->getZExtValue();
8288	uint64_t C2 = N1C->getZExtValue();
8289	SDLoc DL(N);
8290	if (C1 <= C2)
8291	return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8292	DAG.getConstant(C2 - C1, DL, ShiftVT));
8293	return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
8294	DAG.getConstant(C1 - C2, DL, ShiftVT));
8295	}
8296	}
8297
8298	// fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
8299	// (and (srl x, (sub c1, c2), MASK)
8300	// Only fold this if the inner shift has no other uses -- if it does, folding
8301	// this will increase the total number of instructions.
8302	// TODO - drop hasOneUse requirement if c1 == c2?
8303	// TODO - support non-uniform vector shift amounts.
8304	if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
8305	TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
8306	if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8307	if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
8308	uint64_t c1 = N0C1->getZExtValue();
8309	uint64_t c2 = N1C->getZExtValue();
8310	APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
8311	SDValue Shift;
8312	if (c2 > c1) {
8313	Mask <<= c2 - c1;
8314	SDLoc DL(N);
8315	Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8316	DAG.getConstant(c2 - c1, DL, ShiftVT));
8317	} else {
8318	Mask.lshrInPlace(c1 - c2);
8319	SDLoc DL(N);
8320	Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
8321	DAG.getConstant(c1 - c2, DL, ShiftVT));
8322	}
8323	SDLoc DL(N0);
8324	return DAG.getNode(ISD::AND, DL, VT, Shift,
8325	DAG.getConstant(Mask, DL, VT));
8326	}
8327	}
8328	}
8329
8330	// fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
8331	if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
8332	isConstantOrConstantVector(N1, /* No Opaques */ true)) {
8333	SDLoc DL(N);
8334	SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
8335	SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
8336	return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
8337	}
8338
8339	// fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
8340	// fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
8341	// Variant of version done on multiply, except mul by a power of 2 is turned
8342	// into a shift.
8343	if ((N0.getOpcode() == ISD::ADD \|\| N0.getOpcode() == ISD::OR) &&
8344	N0.getNode()->hasOneUse() &&
8345	isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8346	isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
8347	TLI.isDesirableToCommuteWithShift(N, Level)) {
8348	SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
8349	SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8350	AddToWorklist(Shl0.getNode());
8351	AddToWorklist(Shl1.getNode());
8352	return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
8353	}
8354
8355	// fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
8356	if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
8357	isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8358	isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
8359	SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8360	if (isConstantOrConstantVector(Shl))
8361	return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
8362	}
8363
8364	if (N1C && !N1C->isOpaque())
8365	if (SDValue NewSHL = visitShiftByConstant(N))
8366	return NewSHL;
8367
8368	// Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
8369	if (N0.getOpcode() == ISD::VSCALE)
8370	if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
8371	const APInt &C0 = N0.getConstantOperandAPInt(0);
8372	const APInt &C1 = NC1->getAPIntValue();
8373	return DAG.getVScale(SDLoc(N), VT, C0 << C1);
8374	}
8375
8376	return SDValue();
8377	}
8378
8379	// Transform a right shift of a multiply into a multiply-high.
8380	// Examples:
8381	// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
8382	// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
8383	static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
8384	const TargetLowering &TLI) {
8385	assert((N->getOpcode() == ISD::SRL \|\| N->getOpcode() == ISD::SRA) &&(((N->getOpcode() == ISD::SRL \|\| N->getOpcode() == ISD:: SRA) && "SRL or SRA node is required here!") ? static_cast <void> (0) : __assert_fail ("(N->getOpcode() == ISD::SRL \|\| N->getOpcode() == ISD::SRA) && \"SRL or SRA node is required here!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 8386, __PRETTY_FUNCTION__))
8386	"SRL or SRA node is required here!")(((N->getOpcode() == ISD::SRL \|\| N->getOpcode() == ISD:: SRA) && "SRL or SRA node is required here!") ? static_cast <void> (0) : __assert_fail ("(N->getOpcode() == ISD::SRL \|\| N->getOpcode() == ISD::SRA) && \"SRL or SRA node is required here!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 8386, __PRETTY_FUNCTION__));
8387
8388	// Check the shift amount. Proceed with the transformation if the shift
8389	// amount is constant.
8390	ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
8391	if (!ShiftAmtSrc)
8392	return SDValue();
8393
8394	SDLoc DL(N);
8395
8396	// The operation feeding into the shift must be a multiply.
8397	SDValue ShiftOperand = N->getOperand(0);
8398	if (ShiftOperand.getOpcode() != ISD::MUL)
8399	return SDValue();
8400
8401	// Both operands must be equivalent extend nodes.
8402	SDValue LeftOp = ShiftOperand.getOperand(0);
8403	SDValue RightOp = ShiftOperand.getOperand(1);
8404	bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
8405	bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
8406
8407	if ((!(IsSignExt \|\| IsZeroExt)) \|\| LeftOp.getOpcode() != RightOp.getOpcode())
8408	return SDValue();
8409
8410	EVT WideVT1 = LeftOp.getValueType();
8411	EVT WideVT2 = RightOp.getValueType();
8412	(void)WideVT2;
8413	// Proceed with the transformation if the wide types match.
8414	assert((WideVT1 == WideVT2) &&(((WideVT1 == WideVT2) && "Cannot have a multiply node with two different operand types." ) ? static_cast<void> (0) : __assert_fail ("(WideVT1 == WideVT2) && \"Cannot have a multiply node with two different operand types.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 8415, __PRETTY_FUNCTION__))
8415	"Cannot have a multiply node with two different operand types.")(((WideVT1 == WideVT2) && "Cannot have a multiply node with two different operand types." ) ? static_cast<void> (0) : __assert_fail ("(WideVT1 == WideVT2) && \"Cannot have a multiply node with two different operand types.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 8415, __PRETTY_FUNCTION__));
8416
8417	EVT NarrowVT = LeftOp.getOperand(0).getValueType();
8418	// Check that the two extend nodes are the same type.
8419	if (NarrowVT != RightOp.getOperand(0).getValueType())
8420	return SDValue();
8421
8422	// Proceed with the transformation if the wide type is twice as large
8423	// as the narrow type.
8424	unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
8425	if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize)
8426	return SDValue();
8427
8428	// Check the shift amount with the narrow type size.
8429	// Proceed with the transformation if the shift amount is the width
8430	// of the narrow type.
8431	unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
8432	if (ShiftAmt != NarrowVTSize)
8433	return SDValue();
8434
8435	// If the operation feeding into the MUL is a sign extend (sext),
8436	// we use mulhs. Othewise, zero extends (zext) use mulhu.
8437	unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
8438
8439	// Combine to mulh if mulh is legal/custom for the narrow type on the target.
8440	if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
8441	return SDValue();
8442
8443	SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0),
8444	RightOp.getOperand(0));
8445	return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1)
8446	: DAG.getZExtOrTrunc(Result, DL, WideVT1));
8447	}
8448
8449	SDValue DAGCombiner::visitSRA(SDNode *N) {
8450	SDValue N0 = N->getOperand(0);
8451	SDValue N1 = N->getOperand(1);
8452	if (SDValue V = DAG.simplifyShift(N0, N1))
8453	return V;
8454
8455	EVT VT = N0.getValueType();
8456	unsigned OpSizeInBits = VT.getScalarSizeInBits();
8457
8458	// Arithmetic shifting an all-sign-bit value is a no-op.
8459	// fold (sra 0, x) -> 0
8460	// fold (sra -1, x) -> -1
8461	if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
8462	return N0;
8463
8464	// fold vector ops
8465	if (VT.isVector())
8466	if (SDValue FoldedVOp = SimplifyVBinOp(N))
8467	return FoldedVOp;
8468
8469	ConstantSDNode *N1C = isConstOrConstSplat(N1);
8470
8471	// fold (sra c1, c2) -> (sra c1, c2)
8472	if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
8473	return C;
8474
8475	if (SDValue NewSel = foldBinOpIntoSelect(N))
8476	return NewSel;
8477
8478	// fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
8479	// sext_inreg.
8480	if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
8481	unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
8482	EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
8483	if (VT.isVector())
8484	ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
8485	VT.getVectorElementCount());
8486	if (!LegalOperations \|\|
8487	TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
8488	TargetLowering::Legal)
8489	return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8490	N0.getOperand(0), DAG.getValueType(ExtVT));
8491	// Even if we can't convert to sext_inreg, we might be able to remove
8492	// this shift pair if the input is already sign extended.
8493	if (DAG.ComputeNumSignBits(N0.getOperand(0)) > N1C->getZExtValue())
8494	return N0.getOperand(0);
8495	}
8496
8497	// fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
8498	// clamp (add c1, c2) to max shift.
8499	if (N0.getOpcode() == ISD::SRA) {
8500	SDLoc DL(N);
8501	EVT ShiftVT = N1.getValueType();
8502	EVT ShiftSVT = ShiftVT.getScalarType();
8503	SmallVector<SDValue, 16> ShiftValues;
8504
8505	auto SumOfShifts = [&](ConstantSDNode LHS, ConstantSDNode RHS) {
8506	APInt c1 = LHS->getAPIntValue();
8507	APInt c2 = RHS->getAPIntValue();
8508	zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8509	APInt Sum = c1 + c2;
8510	unsigned ShiftSum =
8511	Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
8512	ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
8513	return true;
8514	};
8515	if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
8516	SDValue ShiftValue;
8517	if (VT.isVector())
8518	ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
8519	else
8520	ShiftValue = ShiftValues[0];
8521	return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
8522	}
8523	}
8524
8525	// fold (sra (shl X, m), (sub result_size, n))
8526	// -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
8527	// result_size - n != m.
8528	// If truncate is free for the target sext(shl) is likely to result in better
8529	// code.
8530	if (N0.getOpcode() == ISD::SHL && N1C) {
8531	// Get the two constanst of the shifts, CN0 = m, CN = n.
8532	const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
8533	if (N01C) {
8534	LLVMContext &Ctx = *DAG.getContext();
8535	// Determine what the truncate's result bitsize and type would be.
8536	EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
8537
8538	if (VT.isVector())
8539	TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
8540
8541	// Determine the residual right-shift amount.
8542	int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
8543
8544	// If the shift is not a no-op (in which case this should be just a sign
8545	// extend already), the truncated to type is legal, sign_extend is legal
8546	// on that type, and the truncate to that type is both legal and free,
8547	// perform the transform.
8548	if ((ShiftAmt > 0) &&
8549	TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
8550	TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
8551	TLI.isTruncateFree(VT, TruncVT)) {
8552	SDLoc DL(N);
8553	SDValue Amt = DAG.getConstant(ShiftAmt, DL,
8554	getShiftAmountTy(N0.getOperand(0).getValueType()));
8555	SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
8556	N0.getOperand(0), Amt);
8557	SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
8558	Shift);
8559	return DAG.getNode(ISD::SIGN_EXTEND, DL,
8560	N->getValueType(0), Trunc);
8561	}
8562	}
8563	}
8564
8565	// We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
8566	// sra (add (shl X, N1C), AddC), N1C -->
8567	// sext (add (trunc X to (width - N1C)), AddC')
8568	if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
8569	N0.getOperand(0).getOpcode() == ISD::SHL &&
8570	N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
8571	if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
8572	SDValue Shl = N0.getOperand(0);
8573	// Determine what the truncate's type would be and ask the target if that
8574	// is a free operation.
8575	LLVMContext &Ctx = *DAG.getContext();
8576	unsigned ShiftAmt = N1C->getZExtValue();
8577	EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
8578	if (VT.isVector())
8579	TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
8580
8581	// TODO: The simple type check probably belongs in the default hook
8582	// implementation and/or target-specific overrides (because
8583	// non-simple types likely require masking when legalized), but that
8584	// restriction may conflict with other transforms.
8585	if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
8586	TLI.isTruncateFree(VT, TruncVT)) {
8587	SDLoc DL(N);
8588	SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
8589	SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
8590	trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
8591	SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
8592	return DAG.getSExtOrTrunc(Add, DL, VT);
8593	}
8594	}
8595	}
8596
8597	// fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
8598	if (N1.getOpcode() == ISD::TRUNCATE &&
8599	N1.getOperand(0).getOpcode() == ISD::AND) {
8600	if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8601	return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
8602	}
8603
8604	// fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
8605	// fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
8606	// if c1 is equal to the number of bits the trunc removes
8607	// TODO - support non-uniform vector shift amounts.
8608	if (N0.getOpcode() == ISD::TRUNCATE &&
8609	(N0.getOperand(0).getOpcode() == ISD::SRL \|\|
8610	N0.getOperand(0).getOpcode() == ISD::SRA) &&
8611	N0.getOperand(0).hasOneUse() &&
8612	N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
8613	SDValue N0Op0 = N0.getOperand(0);
8614	if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
8615	EVT LargeVT = N0Op0.getValueType();
8616	unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
8617	if (LargeShift->getAPIntValue() == TruncBits) {
8618	SDLoc DL(N);
8619	SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
8620	getShiftAmountTy(LargeVT));
8621	SDValue SRA =
8622	DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
8623	return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
8624	}
8625	}
8626	}
8627
8628	// Simplify, based on bits shifted out of the LHS.
8629	if (SimplifyDemandedBits(SDValue(N, 0)))
8630	return SDValue(N, 0);
8631
8632	// If the sign bit is known to be zero, switch this to a SRL.
8633	if (DAG.SignBitIsZero(N0))
8634	return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
8635
8636	if (N1C && !N1C->isOpaque())
8637	if (SDValue NewSRA = visitShiftByConstant(N))
8638	return NewSRA;
8639
8640	// Try to transform this shift into a multiply-high if
8641	// it matches the appropriate pattern detected in combineShiftToMULH.
8642	if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8643	return MULH;
8644
8645	return SDValue();
8646	}
8647
8648	SDValue DAGCombiner::visitSRL(SDNode *N) {
8649	SDValue N0 = N->getOperand(0);
8650	SDValue N1 = N->getOperand(1);
8651	if (SDValue V = DAG.simplifyShift(N0, N1))
8652	return V;
8653
8654	EVT VT = N0.getValueType();
8655	unsigned OpSizeInBits = VT.getScalarSizeInBits();
8656
8657	// fold vector ops
8658	if (VT.isVector())
8659	if (SDValue FoldedVOp = SimplifyVBinOp(N))
8660	return FoldedVOp;
8661
8662	ConstantSDNode *N1C = isConstOrConstSplat(N1);
8663
8664	// fold (srl c1, c2) -> c1 >>u c2
8665	if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
8666	return C;
8667
8668	if (SDValue NewSel = foldBinOpIntoSelect(N))
8669	return NewSel;
8670
8671	// if (srl x, c) is known to be zero, return 0
8672	if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
8673	APInt::getAllOnesValue(OpSizeInBits)))
8674	return DAG.getConstant(0, SDLoc(N), VT);
8675
8676	// fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
8677	if (N0.getOpcode() == ISD::SRL) {
8678	auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8679	ConstantSDNode *RHS) {
8680	APInt c1 = LHS->getAPIntValue();
8681	APInt c2 = RHS->getAPIntValue();
8682	zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8683	return (c1 + c2).uge(OpSizeInBits);
8684	};
8685	if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8686	return DAG.getConstant(0, SDLoc(N), VT);
8687
8688	auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8689	ConstantSDNode *RHS) {
8690	APInt c1 = LHS->getAPIntValue();
8691	APInt c2 = RHS->getAPIntValue();
8692	zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8693	return (c1 + c2).ult(OpSizeInBits);
8694	};
8695	if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8696	SDLoc DL(N);
8697	EVT ShiftVT = N1.getValueType();
8698	SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8699	return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
8700	}
8701	}
8702
8703	if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
8704	N0.getOperand(0).getOpcode() == ISD::SRL) {
8705	SDValue InnerShift = N0.getOperand(0);
8706	// TODO - support non-uniform vector shift amounts.
8707	if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
8708	uint64_t c1 = N001C->getZExtValue();
8709	uint64_t c2 = N1C->getZExtValue();
8710	EVT InnerShiftVT = InnerShift.getValueType();
8711	EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
8712	uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
8713	// srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
8714	// This is only valid if the OpSizeInBits + c1 = size of inner shift.
8715	if (c1 + OpSizeInBits == InnerShiftSize) {
8716	SDLoc DL(N);
8717	if (c1 + c2 >= InnerShiftSize)
8718	return DAG.getConstant(0, DL, VT);
8719	SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8720	SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8721	InnerShift.getOperand(0), NewShiftAmt);
8722	return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
8723	}
8724	// In the more general case, we can clear the high bits after the shift:
8725	// srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
8726	if (N0.hasOneUse() && InnerShift.hasOneUse() &&
8727	c1 + c2 < InnerShiftSize) {
8728	SDLoc DL(N);
8729	SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8730	SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8731	InnerShift.getOperand(0), NewShiftAmt);
8732	SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
8733	OpSizeInBits - c2),
8734	DL, InnerShiftVT);
8735	SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
8736	return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
8737	}
8738	}
8739	}
8740
8741	// fold (srl (shl x, c), c) -> (and x, cst2)
8742	// TODO - (srl (shl x, c1), c2).
8743	if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
8744	isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
8745	SDLoc DL(N);
8746	SDValue Mask =
8747	DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
8748	AddToWorklist(Mask.getNode());
8749	return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
8750	}
8751
8752	// fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
8753	// TODO - support non-uniform vector shift amounts.
8754	if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
8755	// Shifting in all undef bits?
8756	EVT SmallVT = N0.getOperand(0).getValueType();
8757	unsigned BitSize = SmallVT.getScalarSizeInBits();
8758	if (N1C->getAPIntValue().uge(BitSize))
8759	return DAG.getUNDEF(VT);
8760
8761	if (!LegalTypes \|\| TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
8762	uint64_t ShiftAmt = N1C->getZExtValue();
8763	SDLoc DL0(N0);
8764	SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
8765	N0.getOperand(0),
8766	DAG.getConstant(ShiftAmt, DL0,
8767	getShiftAmountTy(SmallVT)));
8768	AddToWorklist(SmallShift.getNode());
8769	APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
8770	SDLoc DL(N);
8771	return DAG.getNode(ISD::AND, DL, VT,
8772	DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
8773	DAG.getConstant(Mask, DL, VT));
8774	}
8775	}
8776
8777	// fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
8778	// bit, which is unmodified by sra.
8779	if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
8780	if (N0.getOpcode() == ISD::SRA)
8781	return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
8782	}
8783
8784	// fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
8785	if (N1C && N0.getOpcode() == ISD::CTLZ &&
8786	N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
8787	KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
8788
8789	// If any of the input bits are KnownOne, then the input couldn't be all
8790	// zeros, thus the result of the srl will always be zero.
8791	if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
8792
8793	// If all of the bits input the to ctlz node are known to be zero, then
8794	// the result of the ctlz is "32" and the result of the shift is one.
8795	APInt UnknownBits = ~Known.Zero;
8796	if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
8797
8798	// Otherwise, check to see if there is exactly one bit input to the ctlz.
8799	if (UnknownBits.isPowerOf2()) {
8800	// Okay, we know that only that the single bit specified by UnknownBits
8801	// could be set on input to the CTLZ node. If this bit is set, the SRL
8802	// will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
8803	// to an SRL/XOR pair, which is likely to simplify more.
8804	unsigned ShAmt = UnknownBits.countTrailingZeros();
8805	SDValue Op = N0.getOperand(0);
8806
8807	if (ShAmt) {
8808	SDLoc DL(N0);
8809	Op = DAG.getNode(ISD::SRL, DL, VT, Op,
8810	DAG.getConstant(ShAmt, DL,
8811	getShiftAmountTy(Op.getValueType())));
8812	AddToWorklist(Op.getNode());
8813	}
8814
8815	SDLoc DL(N);
8816	return DAG.getNode(ISD::XOR, DL, VT,
8817	Op, DAG.getConstant(1, DL, VT));
8818	}
8819	}
8820
8821	// fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
8822	if (N1.getOpcode() == ISD::TRUNCATE &&
8823	N1.getOperand(0).getOpcode() == ISD::AND) {
8824	if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8825	return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
8826	}
8827
8828	// fold operands of srl based on knowledge that the low bits are not
8829	// demanded.
8830	if (SimplifyDemandedBits(SDValue(N, 0)))
8831	return SDValue(N, 0);
8832
8833	if (N1C && !N1C->isOpaque())
8834	if (SDValue NewSRL = visitShiftByConstant(N))
8835	return NewSRL;
8836
8837	// Attempt to convert a srl of a load into a narrower zero-extending load.
8838	if (SDValue NarrowLoad = ReduceLoadWidth(N))
8839	return NarrowLoad;
8840
8841	// Here is a common situation. We want to optimize:
8842	//
8843	// %a = ...
8844	// %b = and i32 %a, 2
8845	// %c = srl i32 %b, 1
8846	// brcond i32 %c ...
8847	//
8848	// into
8849	//
8850	// %a = ...
8851	// %b = and %a, 2
8852	// %c = setcc eq %b, 0
8853	// brcond %c ...
8854	//
8855	// However when after the source operand of SRL is optimized into AND, the SRL
8856	// itself may not be optimized further. Look for it and add the BRCOND into
8857	// the worklist.
8858	if (N->hasOneUse()) {
8859	SDNode Use = N->use_begin();
8860	if (Use->getOpcode() == ISD::BRCOND)
8861	AddToWorklist(Use);
8862	else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
8863	// Also look pass the truncate.
8864	Use = *Use->use_begin();
8865	if (Use->getOpcode() == ISD::BRCOND)
8866	AddToWorklist(Use);
8867	}
8868	}
8869
8870	// Try to transform this shift into a multiply-high if
8871	// it matches the appropriate pattern detected in combineShiftToMULH.
8872	if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8873	return MULH;
8874
8875	return SDValue();
8876	}
8877
8878	SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
8879	EVT VT = N->getValueType(0);
8880	SDValue N0 = N->getOperand(0);
8881	SDValue N1 = N->getOperand(1);
8882	SDValue N2 = N->getOperand(2);
8883	bool IsFSHL = N->getOpcode() == ISD::FSHL;
8884	unsigned BitWidth = VT.getScalarSizeInBits();
8885
8886	// fold (fshl N0, N1, 0) -> N0
8887	// fold (fshr N0, N1, 0) -> N1
8888	if (isPowerOf2_32(BitWidth))
8889	if (DAG.MaskedValueIsZero(
8890	N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
8891	return IsFSHL ? N0 : N1;
8892
8893	auto IsUndefOrZero = [](SDValue V) {
8894	return V.isUndef() \|\| isNullOrNullSplat(V, /AllowUndefs/ true);
8895	};
8896
8897	// TODO - support non-uniform vector shift amounts.
8898	if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
8899	EVT ShAmtTy = N2.getValueType();
8900
8901	// fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
8902	if (Cst->getAPIntValue().uge(BitWidth)) {
8903	uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
8904	return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
8905	DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
8906	}
8907
8908	unsigned ShAmt = Cst->getZExtValue();
8909	if (ShAmt == 0)
8910	return IsFSHL ? N0 : N1;
8911
8912	// fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
8913	// fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
8914	// fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
8915	// fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
8916	if (IsUndefOrZero(N0))
8917	return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
8918	DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
8919	SDLoc(N), ShAmtTy));
8920	if (IsUndefOrZero(N1))
8921	return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
8922	DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
8923	SDLoc(N), ShAmtTy));
8924
8925	// fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
8926	// fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
8927	// TODO - bigendian support once we have test coverage.
8928	// TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
8929	// TODO - permit LHS EXTLOAD if extensions are shifted out.
8930	if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
8931	!DAG.getDataLayout().isBigEndian()) {
8932	auto *LHS = dyn_cast<LoadSDNode>(N0);
8933	auto *RHS = dyn_cast<LoadSDNode>(N1);
8934	if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
8935	LHS->getAddressSpace() == RHS->getAddressSpace() &&
8936	(LHS->hasOneUse() \|\| RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
8937	ISD::isNON_EXTLoad(LHS)) {
8938	if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
8939	SDLoc DL(RHS);
8940	uint64_t PtrOff =
8941	IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
8942	Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
8943	bool Fast = false;
8944	if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
8945	RHS->getAddressSpace(), NewAlign,
8946	RHS->getMemOperand()->getFlags(), &Fast) &&
8947	Fast) {
8948	SDValue NewPtr = DAG.getMemBasePlusOffset(
8949	RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL);
8950	AddToWorklist(NewPtr.getNode());
8951	SDValue Load = DAG.getLoad(
8952	VT, DL, RHS->getChain(), NewPtr,
8953	RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
8954	RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
8955	// Replace the old load's chain with the new load's chain.
8956	WorklistRemover DeadNodes(*this);
8957	DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
8958	return Load;
8959	}
8960	}
8961	}
8962	}
8963	}
8964
8965	// fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
8966	// fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
8967	// iff We know the shift amount is in range.
8968	// TODO: when is it worth doing SUB(BW, N2) as well?
8969	if (isPowerOf2_32(BitWidth)) {
8970	APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
8971	if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
8972	return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
8973	if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
8974	return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
8975	}
8976
8977	// fold (fshl N0, N0, N2) -> (rotl N0, N2)
8978	// fold (fshr N0, N0, N2) -> (rotr N0, N2)
8979	// TODO: Investigate flipping this rotate if only one is legal, if funnel shift
8980	// is legal as well we might be better off avoiding non-constant (BW - N2).
8981	unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
8982	if (N0 == N1 && hasOperation(RotOpc, VT))
8983	return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
8984
8985	// Simplify, based on bits shifted out of N0/N1.
8986	if (SimplifyDemandedBits(SDValue(N, 0)))
8987	return SDValue(N, 0);
8988
8989	return SDValue();
8990	}
8991
8992	SDValue DAGCombiner::visitABS(SDNode *N) {
8993	SDValue N0 = N->getOperand(0);
8994	EVT VT = N->getValueType(0);
8995
8996	// fold (abs c1) -> c2
8997	if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8998	return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
8999	// fold (abs (abs x)) -> (abs x)
9000	if (N0.getOpcode() == ISD::ABS)
9001	return N0;
9002	// fold (abs x) -> x iff not-negative
9003	if (DAG.SignBitIsZero(N0))
9004	return N0;
9005	return SDValue();
9006	}
9007
9008	SDValue DAGCombiner::visitBSWAP(SDNode *N) {
9009	SDValue N0 = N->getOperand(0);
9010	EVT VT = N->getValueType(0);
9011
9012	// fold (bswap c1) -> c2
9013	if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9014	return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
9015	// fold (bswap (bswap x)) -> x
9016	if (N0.getOpcode() == ISD::BSWAP)
9017	return N0->getOperand(0);
9018	return SDValue();
9019	}
9020
9021	SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
9022	SDValue N0 = N->getOperand(0);
9023	EVT VT = N->getValueType(0);
9024
9025	// fold (bitreverse c1) -> c2
9026	if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9027	return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
9028	// fold (bitreverse (bitreverse x)) -> x
9029	if (N0.getOpcode() == ISD::BITREVERSE)
9030	return N0.getOperand(0);
9031	return SDValue();
9032	}
9033
9034	SDValue DAGCombiner::visitCTLZ(SDNode *N) {
9035	SDValue N0 = N->getOperand(0);
9036	EVT VT = N->getValueType(0);
9037
9038	// fold (ctlz c1) -> c2
9039	if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9040	return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
9041
9042	// If the value is known never to be zero, switch to the undef version.
9043	if (!LegalOperations \|\| TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
9044	if (DAG.isKnownNeverZero(N0))
9045	return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9046	}
9047
9048	return SDValue();
9049	}
9050
9051	SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
9052	SDValue N0 = N->getOperand(0);
9053	EVT VT = N->getValueType(0);
9054
9055	// fold (ctlz_zero_undef c1) -> c2
9056	if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9057	return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9058	return SDValue();
9059	}
9060
9061	SDValue DAGCombiner::visitCTTZ(SDNode *N) {
9062	SDValue N0 = N->getOperand(0);
9063	EVT VT = N->getValueType(0);
9064
9065	// fold (cttz c1) -> c2
9066	if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9067	return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
9068
9069	// If the value is known never to be zero, switch to the undef version.
9070	if (!LegalOperations \|\| TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
9071	if (DAG.isKnownNeverZero(N0))
9072	return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9073	}
9074
9075	return SDValue();
9076	}
9077
9078	SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
9079	SDValue N0 = N->getOperand(0);
9080	EVT VT = N->getValueType(0);
9081
9082	// fold (cttz_zero_undef c1) -> c2
9083	if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9084	return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9085	return SDValue();
9086	}
9087
9088	SDValue DAGCombiner::visitCTPOP(SDNode *N) {
9089	SDValue N0 = N->getOperand(0);
9090	EVT VT = N->getValueType(0);
9091
9092	// fold (ctpop c1) -> c2
9093	if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9094	return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
9095	return SDValue();
9096	}
9097
9098	// FIXME: This should be checking for no signed zeros on individual operands, as
9099	// well as no nans.
9100	static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
9101	SDValue RHS,
9102	const TargetLowering &TLI) {
9103	const TargetOptions &Options = DAG.getTarget().Options;
9104	EVT VT = LHS.getValueType();
9105
9106	return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
9107	TLI.isProfitableToCombineMinNumMaxNum(VT) &&
9108	DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
9109	}
9110
9111	/// Generate Min/Max node
9112	static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
9113	SDValue RHS, SDValue True, SDValue False,
9114	ISD::CondCode CC, const TargetLowering &TLI,
9115	SelectionDAG &DAG) {
9116	if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
9117	return SDValue();
9118
9119	EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
9120	switch (CC) {
9121	case ISD::SETOLT:
9122	case ISD::SETOLE:
9123	case ISD::SETLT:
9124	case ISD::SETLE:
9125	case ISD::SETULT:
9126	case ISD::SETULE: {
9127	// Since it's known never nan to get here already, either fminnum or
9128	// fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
9129	// expanded in terms of it.
9130	unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
9131	if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9132	return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9133
9134	unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
9135	if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9136	return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9137	return SDValue();
9138	}
9139	case ISD::SETOGT:
9140	case ISD::SETOGE:
9141	case ISD::SETGT:
9142	case ISD::SETGE:
9143	case ISD::SETUGT:
9144	case ISD::SETUGE: {
9145	unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
9146	if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9147	return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9148
9149	unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
9150	if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9151	return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9152	return SDValue();
9153	}
9154	default:
9155	return SDValue();
9156	}
9157	}
9158
9159	/// If a (v)select has a condition value that is a sign-bit test, try to smear
9160	/// the condition operand sign-bit across the value width and use it as a mask.
9161	static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
9162	SDValue Cond = N->getOperand(0);
9163	SDValue C1 = N->getOperand(1);
9164	SDValue C2 = N->getOperand(2);
9165	assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) &&((isConstantOrConstantVector(C1) && isConstantOrConstantVector (C2) && "Expected select-of-constants") ? static_cast <void> (0) : __assert_fail ("isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) && \"Expected select-of-constants\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 9166, __PRETTY_FUNCTION__))
9166	"Expected select-of-constants")((isConstantOrConstantVector(C1) && isConstantOrConstantVector (C2) && "Expected select-of-constants") ? static_cast <void> (0) : __assert_fail ("isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) && \"Expected select-of-constants\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 9166, __PRETTY_FUNCTION__));
9167
9168	EVT VT = N->getValueType(0);
9169	if (Cond.getOpcode() != ISD::SETCC \|\| !Cond.hasOneUse() \|\|
9170	VT != Cond.getOperand(0).getValueType())
9171	return SDValue();
9172
9173	// The inverted-condition + commuted-select variants of these patterns are
9174	// canonicalized to these forms in IR.
9175	SDValue X = Cond.getOperand(0);
9176	SDValue CondC = Cond.getOperand(1);
9177	ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
9178	if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
9179	isAllOnesOrAllOnesSplat(C2)) {
9180	// i32 X > -1 ? C1 : -1 --> (X >>s 31) \| C1
9181	SDLoc DL(N);
9182	SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9183	SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9184	return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
9185	}
9186	if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
9187	// i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
9188	SDLoc DL(N);
9189	SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9190	SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9191	return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
9192	}
9193	return SDValue();
9194	}
9195
9196	SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
9197	SDValue Cond = N->getOperand(0);
9198	SDValue N1 = N->getOperand(1);
9199	SDValue N2 = N->getOperand(2);
9200	EVT VT = N->getValueType(0);
9201	EVT CondVT = Cond.getValueType();
9202	SDLoc DL(N);
9203
9204	if (!VT.isInteger())
9205	return SDValue();
9206
9207	auto *C1 = dyn_cast<ConstantSDNode>(N1);
9208	auto *C2 = dyn_cast<ConstantSDNode>(N2);
9209	if (!C1 \|\| !C2)
9210	return SDValue();
9211
9212	// Only do this before legalization to avoid conflicting with target-specific
9213	// transforms in the other direction (create a select from a zext/sext). There
9214	// is also a target-independent combine here in DAGCombiner in the other
9215	// direction for (select Cond, -1, 0) when the condition is not i1.
9216	if (CondVT == MVT::i1 && !LegalOperations) {
9217	if (C1->isNullValue() && C2->isOne()) {
9218	// select Cond, 0, 1 --> zext (!Cond)
9219	SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
9220	if (VT != MVT::i1)
9221	NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
9222	return NotCond;
9223	}
9224	if (C1->isNullValue() && C2->isAllOnesValue()) {
9225	// select Cond, 0, -1 --> sext (!Cond)
9226	SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
9227	if (VT != MVT::i1)
9228	NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
9229	return NotCond;
9230	}
9231	if (C1->isOne() && C2->isNullValue()) {
9232	// select Cond, 1, 0 --> zext (Cond)
9233	if (VT != MVT::i1)
9234	Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9235	return Cond;
9236	}
9237	if (C1->isAllOnesValue() && C2->isNullValue()) {
9238	// select Cond, -1, 0 --> sext (Cond)
9239	if (VT != MVT::i1)
9240	Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9241	return Cond;
9242	}
9243
9244	// Use a target hook because some targets may prefer to transform in the
9245	// other direction.
9246	if (TLI.convertSelectOfConstantsToMath(VT)) {
9247	// For any constants that differ by 1, we can transform the select into an
9248	// extend and add.
9249	const APInt &C1Val = C1->getAPIntValue();
9250	const APInt &C2Val = C2->getAPIntValue();
9251	if (C1Val - 1 == C2Val) {
9252	// select Cond, C1, C1-1 --> add (zext Cond), C1-1
9253	if (VT != MVT::i1)
9254	Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9255	return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9256	}
9257	if (C1Val + 1 == C2Val) {
9258	// select Cond, C1, C1+1 --> add (sext Cond), C1+1
9259	if (VT != MVT::i1)
9260	Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9261	return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9262	}
9263
9264	// select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
9265	if (C1Val.isPowerOf2() && C2Val.isNullValue()) {
9266	if (VT != MVT::i1)
9267	Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9268	SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
9269	return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
9270	}
9271
9272	if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
9273	return V;
9274	}
9275
9276	return SDValue();
9277	}
9278
9279	// fold (select Cond, 0, 1) -> (xor Cond, 1)
9280	// We can't do this reliably if integer based booleans have different contents
9281	// to floating point based booleans. This is because we can't tell whether we
9282	// have an integer-based boolean or a floating-point-based boolean unless we
9283	// can find the SETCC that produced it and inspect its operands. This is
9284	// fairly easy if C is the SETCC node, but it can potentially be
9285	// undiscoverable (or not reasonably discoverable). For example, it could be
9286	// in another basic block or it could require searching a complicated
9287	// expression.
9288	if (CondVT.isInteger() &&
9289	TLI.getBooleanContents(/isVec/false, /isFloat/true) ==
9290	TargetLowering::ZeroOrOneBooleanContent &&
9291	TLI.getBooleanContents(/isVec/false, /isFloat/false) ==
9292	TargetLowering::ZeroOrOneBooleanContent &&
9293	C1->isNullValue() && C2->isOne()) {
9294	SDValue NotCond =
9295	DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
9296	if (VT.bitsEq(CondVT))
9297	return NotCond;
9298	return DAG.getZExtOrTrunc(NotCond, DL, VT);
9299	}
9300
9301	return SDValue();
9302	}
9303
9304	SDValue DAGCombiner::visitSELECT(SDNode *N) {
9305	SDValue N0 = N->getOperand(0);
9306	SDValue N1 = N->getOperand(1);
9307	SDValue N2 = N->getOperand(2);
9308	EVT VT = N->getValueType(0);
9309	EVT VT0 = N0.getValueType();
9310	SDLoc DL(N);
9311	SDNodeFlags Flags = N->getFlags();
9312
9313	if (SDValue V = DAG.simplifySelect(N0, N1, N2))
9314	return V;
9315
9316	// fold (select X, X, Y) -> (or X, Y)
9317	// fold (select X, 1, Y) -> (or C, Y)
9318	if (VT == VT0 && VT == MVT::i1 && (N0 == N1 \|\| isOneConstant(N1)))
9319	return DAG.getNode(ISD::OR, DL, VT, N0, N2);
9320
9321	if (SDValue V = foldSelectOfConstants(N))
9322	return V;
9323
9324	// fold (select C, 0, X) -> (and (not C), X)
9325	if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
9326	SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
9327	AddToWorklist(NOTNode.getNode());
9328	return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
9329	}
9330	// fold (select C, X, 1) -> (or (not C), X)
9331	if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
9332	SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
9333	AddToWorklist(NOTNode.getNode());
9334	return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
9335	}
9336	// fold (select X, Y, X) -> (and X, Y)
9337	// fold (select X, Y, 0) -> (and X, Y)
9338	if (VT == VT0 && VT == MVT::i1 && (N0 == N2 \|\| isNullConstant(N2)))
9339	return DAG.getNode(ISD::AND, DL, VT, N0, N1);
9340
9341	// If we can fold this based on the true/false value, do so.
9342	if (SimplifySelectOps(N, N1, N2))
9343	return SDValue(N, 0); // Don't revisit N.
9344
9345	if (VT0 == MVT::i1) {
9346	// The code in this block deals with the following 2 equivalences:
9347	// select(C0\|C1, x, y) <=> select(C0, x, select(C1, x, y))
9348	// select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
9349	// The target can specify its preferred form with the
9350	// shouldNormalizeToSelectSequence() callback. However we always transform
9351	// to the right anyway if we find the inner select exists in the DAG anyway
9352	// and we always transform to the left side if we know that we can further
9353	// optimize the combination of the conditions.
9354	bool normalizeToSequence =
9355	TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
9356	// select (and Cond0, Cond1), X, Y
9357	// -> select Cond0, (select Cond1, X, Y), Y
9358	if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
9359	SDValue Cond0 = N0->getOperand(0);
9360	SDValue Cond1 = N0->getOperand(1);
9361	SDValue InnerSelect =
9362	DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
9363	if (normalizeToSequence \|\| !InnerSelect.use_empty())
9364	return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
9365	InnerSelect, N2, Flags);
9366	// Cleanup on failure.
9367	if (InnerSelect.use_empty())
9368	recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9369	}
9370	// select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
9371	if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
9372	SDValue Cond0 = N0->getOperand(0);
9373	SDValue Cond1 = N0->getOperand(1);
9374	SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
9375	Cond1, N1, N2, Flags);
9376	if (normalizeToSequence \|\| !InnerSelect.use_empty())
9377	return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
9378	InnerSelect, Flags);
9379	// Cleanup on failure.
9380	if (InnerSelect.use_empty())
9381	recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9382	}
9383
9384	// select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
9385	if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
9386	SDValue N1_0 = N1->getOperand(0);
9387	SDValue N1_1 = N1->getOperand(1);
9388	SDValue N1_2 = N1->getOperand(2);
9389	if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
9390	// Create the actual and node if we can generate good code for it.
9391	if (!normalizeToSequence) {
9392	SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
9393	return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
9394	N2, Flags);
9395	}
9396	// Otherwise see if we can optimize the "and" to a better pattern.
9397	if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
9398	return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
9399	N2, Flags);
9400	}
9401	}
9402	}
9403	// select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
9404	if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
9405	SDValue N2_0 = N2->getOperand(0);
9406	SDValue N2_1 = N2->getOperand(1);
9407	SDValue N2_2 = N2->getOperand(2);
9408	if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
9409	// Create the actual or node if we can generate good code for it.
9410	if (!normalizeToSequence) {
9411	SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
9412	return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
9413	N2_2, Flags);
9414	}
9415	// Otherwise see if we can optimize to a better pattern.
9416	if (SDValue Combined = visitORLike(N0, N2_0, N))
9417	return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
9418	N2_2, Flags);
9419	}
9420	}
9421	}
9422
9423	// select (not Cond), N1, N2 -> select Cond, N2, N1
9424	if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
9425	SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
9426	SelectOp->setFlags(Flags);
9427	return SelectOp;
9428	}
9429
9430	// Fold selects based on a setcc into other things, such as min/max/abs.
9431	if (N0.getOpcode() == ISD::SETCC) {
9432	SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
9433	ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9434
9435	// select (fcmp lt x, y), x, y -> fminnum x, y
9436	// select (fcmp gt x, y), x, y -> fmaxnum x, y
9437	//
9438	// This is OK if we don't care what happens if either operand is a NaN.
9439	if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
9440	if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
9441	CC, TLI, DAG))
9442	return FMinMax;
9443
9444	// Use 'unsigned add with overflow' to optimize an unsigned saturating add.
9445	// This is conservatively limited to pre-legal-operations to give targets
9446	// a chance to reverse the transform if they want to do that. Also, it is
9447	// unlikely that the pattern would be formed late, so it's probably not
9448	// worth going through the other checks.
9449	if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
9450	CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
9451	N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
9452	auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
9453	auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
9454	if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
9455	// select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
9456	// uaddo Cond0, C; select uaddo.1, -1, uaddo.0
9457	//
9458	// The IR equivalent of this transform would have this form:
9459	// %a = add %x, C
9460	// %c = icmp ugt %x, ~C
9461	// %r = select %c, -1, %a
9462	// =>
9463	// %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
9464	// %u0 = extractvalue %u, 0
9465	// %u1 = extractvalue %u, 1
9466	// %r = select %u1, -1, %u0
9467	SDVTList VTs = DAG.getVTList(VT, VT0);
9468	SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
9469	return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
9470	}
9471	}
9472
9473	if (TLI.isOperationLegal(ISD::SELECT_CC, VT) \|\|
9474	(!LegalOperations &&
9475	TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
9476	// Any flags available in a select/setcc fold will be on the setcc as they
9477	// migrated from fcmp
9478	Flags = N0.getNode()->getFlags();
9479	SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
9480	N2, N0.getOperand(2));
9481	SelectNode->setFlags(Flags);
9482	return SelectNode;
9483	}
9484
9485	return SimplifySelect(DL, N0, N1, N2);
9486	}
9487
9488	return SDValue();
9489	}
9490
9491	// This function assumes all the vselect's arguments are CONCAT_VECTOR
9492	// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
9493	static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
9494	SDLoc DL(N);
9495	SDValue Cond = N->getOperand(0);
9496	SDValue LHS = N->getOperand(1);
9497	SDValue RHS = N->getOperand(2);
9498	EVT VT = N->getValueType(0);
9499	int NumElems = VT.getVectorNumElements();
9500	assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&((LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode () == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD:: BUILD_VECTOR) ? static_cast<void> (0) : __assert_fail ( "LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::BUILD_VECTOR" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 9502, __PRETTY_FUNCTION__))
9501	RHS.getOpcode() == ISD::CONCAT_VECTORS &&((LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode () == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD:: BUILD_VECTOR) ? static_cast<void> (0) : __assert_fail ( "LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::BUILD_VECTOR" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 9502, __PRETTY_FUNCTION__))
9502	Cond.getOpcode() == ISD::BUILD_VECTOR)((LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode () == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD:: BUILD_VECTOR) ? static_cast<void> (0) : __assert_fail ( "LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::BUILD_VECTOR" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 9502, __PRETTY_FUNCTION__));
9503
9504	// CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
9505	// binary ones here.
9506	if (LHS->getNumOperands() != 2 \|\| RHS->getNumOperands() != 2)
9507	return SDValue();
9508
9509	// We're sure we have an even number of elements due to the
9510	// concat_vectors we have as arguments to vselect.
9511	// Skip BV elements until we find one that's not an UNDEF
9512	// After we find an UNDEF element, keep looping until we get to half the
9513	// length of the BV and see if all the non-undef nodes are the same.
9514	ConstantSDNode *BottomHalf = nullptr;
9515	for (int i = 0; i < NumElems / 2; ++i) {
9516	if (Cond->getOperand(i)->isUndef())
9517	continue;
9518
9519	if (BottomHalf == nullptr)
9520	BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9521	else if (Cond->getOperand(i).getNode() != BottomHalf)
9522	return SDValue();
9523	}
9524
9525	// Do the same for the second half of the BuildVector
9526	ConstantSDNode *TopHalf = nullptr;
9527	for (int i = NumElems / 2; i < NumElems; ++i) {
9528	if (Cond->getOperand(i)->isUndef())
9529	continue;
9530
9531	if (TopHalf == nullptr)
9532	TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9533	else if (Cond->getOperand(i).getNode() != TopHalf)
9534	return SDValue();
9535	}
9536
9537	assert(TopHalf && BottomHalf &&((TopHalf && BottomHalf && "One half of the selector was all UNDEFs and the other was all the " "same value. This should have been addressed before this function." ) ? static_cast<void> (0) : __assert_fail ("TopHalf && BottomHalf && \"One half of the selector was all UNDEFs and the other was all the \" \"same value. This should have been addressed before this function.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 9539, __PRETTY_FUNCTION__))
9538	"One half of the selector was all UNDEFs and the other was all the "((TopHalf && BottomHalf && "One half of the selector was all UNDEFs and the other was all the " "same value. This should have been addressed before this function." ) ? static_cast<void> (0) : __assert_fail ("TopHalf && BottomHalf && \"One half of the selector was all UNDEFs and the other was all the \" \"same value. This should have been addressed before this function.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 9539, __PRETTY_FUNCTION__))
9539	"same value. This should have been addressed before this function.")((TopHalf && BottomHalf && "One half of the selector was all UNDEFs and the other was all the " "same value. This should have been addressed before this function." ) ? static_cast<void> (0) : __assert_fail ("TopHalf && BottomHalf && \"One half of the selector was all UNDEFs and the other was all the \" \"same value. This should have been addressed before this function.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 9539, __PRETTY_FUNCTION__));
9540	return DAG.getNode(
9541	ISD::CONCAT_VECTORS, DL, VT,
9542	BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
9543	TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
9544	}
9545
9546	bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
9547	if (!isNullConstant(BasePtr) \|\| Index.getOpcode() != ISD::ADD)
9548	return false;
9549
9550	// For now we check only the LHS of the add.
9551	SDValue LHS = Index.getOperand(0);
9552	SDValue SplatVal = DAG.getSplatValue(LHS);
9553	if (!SplatVal)
9554	return false;
9555
9556	BasePtr = SplatVal;
9557	Index = Index.getOperand(1);
9558	return true;
9559	}
9560
9561	// Fold sext/zext of index into index type.
9562	bool refineIndexType(MaskedGatherScatterSDNode *MGS, SDValue &Index,
9563	bool Scaled, SelectionDAG &DAG) {
9564	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9565
9566	if (Index.getOpcode() == ISD::ZERO_EXTEND) {
9567	SDValue Op = Index.getOperand(0);
9568	MGS->setIndexType(Scaled ? ISD::UNSIGNED_SCALED : ISD::UNSIGNED_UNSCALED);
9569	if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
9570	Index = Op;
9571	return true;
9572	}
9573	}
9574
9575	if (Index.getOpcode() == ISD::SIGN_EXTEND) {
9576	SDValue Op = Index.getOperand(0);
9577	MGS->setIndexType(Scaled ? ISD::SIGNED_SCALED : ISD::SIGNED_UNSCALED);
9578	if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
9579	Index = Op;
9580	return true;
9581	}
9582	}
9583
9584	return false;
9585	}
9586
9587	SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
9588	MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
9589	SDValue Mask = MSC->getMask();
9590	SDValue Chain = MSC->getChain();
9591	SDValue Index = MSC->getIndex();
9592	SDValue Scale = MSC->getScale();
9593	SDValue StoreVal = MSC->getValue();
9594	SDValue BasePtr = MSC->getBasePtr();
9595	SDLoc DL(N);
9596
9597	// Zap scatters with a zero mask.
9598	if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9599	return Chain;
9600
9601	if (refineUniformBase(BasePtr, Index, DAG)) {
9602	SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
9603	return DAG.getMaskedScatter(
9604	DAG.getVTList(MVT::Other), StoreVal.getValueType(), DL, Ops,
9605	MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
9606	}
9607
9608	if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) {
9609	SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
9610	return DAG.getMaskedScatter(
9611	DAG.getVTList(MVT::Other), StoreVal.getValueType(), DL, Ops,
9612	MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
9613	}
9614
9615	return SDValue();
9616	}
9617
9618	SDValue DAGCombiner::visitMSTORE(SDNode *N) {
9619	MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
9620	SDValue Mask = MST->getMask();
9621	SDValue Chain = MST->getChain();
9622	SDLoc DL(N);
9623
9624	// Zap masked stores with a zero mask.
9625	if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9626	return Chain;
9627
9628	// If this is a masked load with an all ones mask, we can use a unmasked load.
9629	// FIXME: Can we do this for indexed, compressing, or truncating stores?
9630	if (ISD::isBuildVectorAllOnes(Mask.getNode()) &&
9631	MST->isUnindexed() && !MST->isCompressingStore() &&
9632	!MST->isTruncatingStore())
9633	return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
9634	MST->getBasePtr(), MST->getMemOperand());
9635
9636	// Try transforming N to an indexed store.
9637	if (CombineToPreIndexedLoadStore(N) \|\| CombineToPostIndexedLoadStore(N))
9638	return SDValue(N, 0);
9639
9640	return SDValue();
9641	}
9642
9643	SDValue DAGCombiner::visitMGATHER(SDNode *N) {
9644	MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
9645	SDValue Mask = MGT->getMask();
9646	SDValue Chain = MGT->getChain();
9647	SDValue Index = MGT->getIndex();
9648	SDValue Scale = MGT->getScale();
9649	SDValue PassThru = MGT->getPassThru();
9650	SDValue BasePtr = MGT->getBasePtr();
9651	SDLoc DL(N);
9652
9653	// Zap gathers with a zero mask.
9654	if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9655	return CombineTo(N, PassThru, MGT->getChain());
9656
9657	if (refineUniformBase(BasePtr, Index, DAG)) {
9658	SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
9659	return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
9660	PassThru.getValueType(), DL, Ops,
9661	MGT->getMemOperand(), MGT->getIndexType(),
9662	MGT->getExtensionType());
9663	}
9664
9665	if (refineIndexType(MGT, Index, MGT->isIndexScaled(), DAG)) {
9666	SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
9667	return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
9668	PassThru.getValueType(), DL, Ops,
9669	MGT->getMemOperand(), MGT->getIndexType(),
9670	MGT->getExtensionType());
9671	}
9672
9673	return SDValue();
9674	}
9675
9676	SDValue DAGCombiner::visitMLOAD(SDNode *N) {
9677	MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
9678	SDValue Mask = MLD->getMask();
9679	SDLoc DL(N);
9680
9681	// Zap masked loads with a zero mask.
9682	if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9683	return CombineTo(N, MLD->getPassThru(), MLD->getChain());
9684
9685	// If this is a masked load with an all ones mask, we can use a unmasked load.
9686	// FIXME: Can we do this for indexed, expanding, or extending loads?
9687	if (ISD::isBuildVectorAllOnes(Mask.getNode()) &&
9688	MLD->isUnindexed() && !MLD->isExpandingLoad() &&
9689	MLD->getExtensionType() == ISD::NON_EXTLOAD) {
9690	SDValue NewLd = DAG.getLoad(N->getValueType(0), SDLoc(N), MLD->getChain(),
9691	MLD->getBasePtr(), MLD->getMemOperand());
9692	return CombineTo(N, NewLd, NewLd.getValue(1));
9693	}
9694
9695	// Try transforming N to an indexed load.
9696	if (CombineToPreIndexedLoadStore(N) \|\| CombineToPostIndexedLoadStore(N))
9697	return SDValue(N, 0);
9698
9699	return SDValue();
9700	}
9701
9702	/// A vector select of 2 constant vectors can be simplified to math/logic to
9703	/// avoid a variable select instruction and possibly avoid constant loads.
9704	SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
9705	SDValue Cond = N->getOperand(0);
9706	SDValue N1 = N->getOperand(1);
9707	SDValue N2 = N->getOperand(2);
9708	EVT VT = N->getValueType(0);
9709	if (!Cond.hasOneUse() \|\| Cond.getScalarValueSizeInBits() != 1 \|\|
9710	!TLI.convertSelectOfConstantsToMath(VT) \|\|
9711	!ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) \|\|
9712	!ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
9713	return SDValue();
9714
9715	// Check if we can use the condition value to increment/decrement a single
9716	// constant value. This simplifies a select to an add and removes a constant
9717	// load/materialization from the general case.
9718	bool AllAddOne = true;
9719	bool AllSubOne = true;
9720	unsigned Elts = VT.getVectorNumElements();
9721	for (unsigned i = 0; i != Elts; ++i) {
9722	SDValue N1Elt = N1.getOperand(i);
9723	SDValue N2Elt = N2.getOperand(i);
9724	if (N1Elt.isUndef() \|\| N2Elt.isUndef())
9725	continue;
9726	if (N1Elt.getValueType() != N2Elt.getValueType())
9727	continue;
9728
9729	const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
9730	const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
9731	if (C1 != C2 + 1)
9732	AllAddOne = false;
9733	if (C1 != C2 - 1)
9734	AllSubOne = false;
9735	}
9736
9737	// Further simplifications for the extra-special cases where the constants are
9738	// all 0 or all -1 should be implemented as folds of these patterns.
9739	SDLoc DL(N);
9740	if (AllAddOne \|\| AllSubOne) {
9741	// vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
9742	// vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
9743	auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
9744	SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
9745	return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
9746	}
9747
9748	// select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
9749	APInt Pow2C;
9750	if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
9751	isNullOrNullSplat(N2)) {
9752	SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
9753	SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
9754	return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
9755	}
9756
9757	if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
9758	return V;
9759
9760	// The general case for select-of-constants:
9761	// vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
9762	// ...but that only makes sense if a vselect is slower than 2 logic ops, so
9763	// leave that to a machine-specific pass.
9764	return SDValue();
9765	}
9766
9767	SDValue DAGCombiner::visitVSELECT(SDNode *N) {
9768	SDValue N0 = N->getOperand(0);
9769	SDValue N1 = N->getOperand(1);
9770	SDValue N2 = N->getOperand(2);
9771	EVT VT = N->getValueType(0);
9772	SDLoc DL(N);
9773
9774	if (SDValue V = DAG.simplifySelect(N0, N1, N2))
9775	return V;
9776
9777	// vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
9778	if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
9779	return DAG.getSelect(DL, VT, F, N2, N1);
9780
9781	// Canonicalize integer abs.
9782	// vselect (setg[te] X, 0), X, -X ->
9783	// vselect (setgt X, -1), X, -X ->
9784	// vselect (setl[te] X, 0), -X, X ->
9785	// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
9786	if (N0.getOpcode() == ISD::SETCC) {
9787	SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
9788	ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9789	bool isAbs = false;
9790	bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
9791
9792	if (((RHSIsAllZeros && (CC == ISD::SETGT \|\| CC == ISD::SETGE)) \|\|
9793	(ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
9794	N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
9795	isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
9796	else if ((RHSIsAllZeros && (CC == ISD::SETLT \|\| CC == ISD::SETLE)) &&
9797	N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
9798	isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
9799
9800	if (isAbs) {
9801	if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
9802	return DAG.getNode(ISD::ABS, DL, VT, LHS);
9803
9804	SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
9805	DAG.getConstant(VT.getScalarSizeInBits() - 1,
9806	DL, getShiftAmountTy(VT)));
9807	SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
9808	AddToWorklist(Shift.getNode());
9809	AddToWorklist(Add.getNode());
9810	return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
9811	}
9812
9813	// vselect x, y (fcmp lt x, y) -> fminnum x, y
9814	// vselect x, y (fcmp gt x, y) -> fmaxnum x, y
9815	//
9816	// This is OK if we don't care about what happens if either operand is a
9817	// NaN.
9818	//
9819	if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
9820	if (SDValue FMinMax =
9821	combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
9822	return FMinMax;
9823	}
9824
9825	// If this select has a condition (setcc) with narrower operands than the
9826	// select, try to widen the compare to match the select width.
9827	// TODO: This should be extended to handle any constant.
9828	// TODO: This could be extended to handle non-loading patterns, but that
9829	// requires thorough testing to avoid regressions.
9830	if (isNullOrNullSplat(RHS)) {
9831	EVT NarrowVT = LHS.getValueType();
9832	EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
9833	EVT SetCCVT = getSetCCResultType(LHS.getValueType());
9834	unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
9835	unsigned WideWidth = WideVT.getScalarSizeInBits();
9836	bool IsSigned = isSignedIntSetCC(CC);
9837	auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
9838	if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
9839	SetCCWidth != 1 && SetCCWidth < WideWidth &&
9840	TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
9841	TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
9842	// Both compare operands can be widened for free. The LHS can use an
9843	// extended load, and the RHS is a constant:
9844	// vselect (ext (setcc load(X), C)), N1, N2 -->
9845	// vselect (setcc extload(X), C'), N1, N2
9846	auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9847	SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
9848	SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
9849	EVT WideSetCCVT = getSetCCResultType(WideVT);
9850	SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
9851	return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
9852	}
9853	}
9854
9855	// Match VSELECTs into add with unsigned saturation.
9856	if (hasOperation(ISD::UADDSAT, VT)) {
9857	// Check if one of the arms of the VSELECT is vector with all bits set.
9858	// If it's on the left side invert the predicate to simplify logic below.
9859	SDValue Other;
9860	ISD::CondCode SatCC = CC;
9861	if (ISD::isBuildVectorAllOnes(N1.getNode())) {
9862	Other = N2;
9863	SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
9864	} else if (ISD::isBuildVectorAllOnes(N2.getNode())) {
9865	Other = N1;
9866	}
9867
9868	if (Other && Other.getOpcode() == ISD::ADD) {
9869	SDValue CondLHS = LHS, CondRHS = RHS;
9870	SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
9871
9872	// Canonicalize condition operands.
9873	if (SatCC == ISD::SETUGE) {
9874	std::swap(CondLHS, CondRHS);
9875	SatCC = ISD::SETULE;
9876	}
9877
9878	// We can test against either of the addition operands.
9879	// x <= x+y ? x+y : ~0 --> uaddsat x, y
9880	// x+y >= x ? x+y : ~0 --> uaddsat x, y
9881	if (SatCC == ISD::SETULE && Other == CondRHS &&
9882	(OpLHS == CondLHS \|\| OpRHS == CondLHS))
9883	return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
9884
9885	if (isa<BuildVectorSDNode>(OpRHS) && isa<BuildVectorSDNode>(CondRHS) &&
9886	CondLHS == OpLHS) {
9887	// If the RHS is a constant we have to reverse the const
9888	// canonicalization.
9889	// x >= ~C ? x+C : ~0 --> uaddsat x, C
9890	auto MatchUADDSAT = [](ConstantSDNode Op, ConstantSDNode Cond) {
9891	return Cond->getAPIntValue() == ~Op->getAPIntValue();
9892	};
9893	if (SatCC == ISD::SETULE &&
9894	ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
9895	return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
9896	}
9897	}
9898	}
9899
9900	// Match VSELECTs into sub with unsigned saturation.
9901	if (hasOperation(ISD::USUBSAT, VT)) {
9902	// Check if one of the arms of the VSELECT is a zero vector. If it's on
9903	// the left side invert the predicate to simplify logic below.
9904	SDValue Other;
9905	ISD::CondCode SatCC = CC;
9906	if (ISD::isBuildVectorAllZeros(N1.getNode())) {
9907	Other = N2;
9908	SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
9909	} else if (ISD::isBuildVectorAllZeros(N2.getNode())) {
9910	Other = N1;
9911	}
9912
9913	if (Other && Other.getNumOperands() == 2) {
9914	SDValue CondRHS = RHS;
9915	SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
9916
9917	if (Other.getOpcode() == ISD::SUB &&
9918	LHS.getOpcode() == ISD::ZERO_EXTEND && LHS.getOperand(0) == OpLHS &&
9919	OpRHS.getOpcode() == ISD::TRUNCATE && OpRHS.getOperand(0) == RHS) {
9920	// Look for a general sub with unsigned saturation first.
9921	// zext(x) >= y ? x - trunc(y) : 0
9922	// --> usubsat(x,trunc(umin(y,SatLimit)))
9923	// zext(x) > y ? x - trunc(y) : 0
9924	// --> usubsat(x,trunc(umin(y,SatLimit)))
9925	if (SatCC == ISD::SETUGE \|\| SatCC == ISD::SETUGT)
9926	return getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS, DAG,
9927	DL);
9928	}
9929
9930	if (OpLHS == LHS) {
9931	// Look for a general sub with unsigned saturation first.
9932	// x >= y ? x-y : 0 --> usubsat x, y
9933	// x > y ? x-y : 0 --> usubsat x, y
9934	if ((SatCC == ISD::SETUGE \|\| SatCC == ISD::SETUGT) &&
9935	Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
9936	return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
9937
9938	if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS)) {
9939	if (isa<BuildVectorSDNode>(CondRHS)) {
9940	// If the RHS is a constant we have to reverse the const
9941	// canonicalization.
9942	// x > C-1 ? x+-C : 0 --> usubsat x, C
9943	auto MatchUSUBSAT = [](ConstantSDNode Op, ConstantSDNode Cond) {
9944	return (!Op && !Cond) \|\|
9945	(Op && Cond &&
9946	Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
9947	};
9948	if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
9949	ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
9950	/AllowUndefs/ true)) {
9951	OpRHS = DAG.getNode(ISD::SUB, DL, VT,
9952	DAG.getConstant(0, DL, VT), OpRHS);
9953	return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
9954	}
9955
9956	// Another special case: If C was a sign bit, the sub has been
9957	// canonicalized into a xor.
9958	// FIXME: Would it be better to use computeKnownBits to determine
9959	// whether it's safe to decanonicalize the xor?
9960	// x s< 0 ? x^C : 0 --> usubsat x, C
9961	if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) {
9962	if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
9963	ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
9964	OpRHSConst->getAPIntValue().isSignMask()) {
9965	// Note that we have to rebuild the RHS constant here to
9966	// ensure we don't rely on particular values of undef lanes.
9967	OpRHS = DAG.getConstant(OpRHSConst->getAPIntValue(), DL, VT);
9968	return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
9969	}
9970	}
9971	}
9972	}
9973	}
9974	}
9975	}
9976	}
9977
9978	if (SimplifySelectOps(N, N1, N2))
9979	return SDValue(N, 0); // Don't revisit N.
9980
9981	// Fold (vselect (build_vector all_ones), N1, N2) -> N1
9982	if (ISD::isBuildVectorAllOnes(N0.getNode()))
9983	return N1;
9984	// Fold (vselect (build_vector all_zeros), N1, N2) -> N2
9985	if (ISD::isBuildVectorAllZeros(N0.getNode()))
9986	return N2;
9987
9988	// The ConvertSelectToConcatVector function is assuming both the above
9989	// checks for (vselect (build_vector all{ones,zeros) ...) have been made
9990	// and addressed.
9991	if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
9992	N2.getOpcode() == ISD::CONCAT_VECTORS &&
9993	ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
9994	if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
9995	return CV;
9996	}
9997
9998	if (SDValue V = foldVSelectOfConstants(N))
9999	return V;
10000
10001	return SDValue();
10002	}
10003
10004	SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
10005	SDValue N0 = N->getOperand(0);
10006	SDValue N1 = N->getOperand(1);
10007	SDValue N2 = N->getOperand(2);
10008	SDValue N3 = N->getOperand(3);
10009	SDValue N4 = N->getOperand(4);
10010	ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
10011
10012	// fold select_cc lhs, rhs, x, x, cc -> x
10013	if (N2 == N3)
10014	return N2;
10015
10016	// Determine if the condition we're dealing with is constant
10017	if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
10018	CC, SDLoc(N), false)) {
10019	AddToWorklist(SCC.getNode());
10020
10021	if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
10022	if (!SCCC->isNullValue())
10023	return N2; // cond always true -> true val
10024	else
10025	return N3; // cond always false -> false val
10026	} else if (SCC->isUndef()) {
10027	// When the condition is UNDEF, just return the first operand. This is
10028	// coherent the DAG creation, no setcc node is created in this case
10029	return N2;
10030	} else if (SCC.getOpcode() == ISD::SETCC) {
10031	// Fold to a simpler select_cc
10032	SDValue SelectOp = DAG.getNode(
10033	ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
10034	SCC.getOperand(1), N2, N3, SCC.getOperand(2));
10035	SelectOp->setFlags(SCC->getFlags());
10036	return SelectOp;
10037	}
10038	}
10039
10040	// If we can fold this based on the true/false value, do so.
10041	if (SimplifySelectOps(N, N2, N3))
10042	return SDValue(N, 0); // Don't revisit N.
10043
10044	// fold select_cc into other things, such as min/max/abs
10045	return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
10046	}
10047
10048	SDValue DAGCombiner::visitSETCC(SDNode *N) {
10049	// setcc is very commonly used as an argument to brcond. This pattern
10050	// also lend itself to numerous combines and, as a result, it is desired
10051	// we keep the argument to a brcond as a setcc as much as possible.
10052	bool PreferSetCC =
10053	N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
10054
10055	SDValue Combined = SimplifySetCC(
10056	N->getValueType(0), N->getOperand(0), N->getOperand(1),
10057	cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
10058
10059	if (!Combined)
10060	return SDValue();
10061
10062	// If we prefer to have a setcc, and we don't, we'll try our best to
10063	// recreate one using rebuildSetCC.
10064	if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
10065	SDValue NewSetCC = rebuildSetCC(Combined);
10066
10067	// We don't have anything interesting to combine to.
10068	if (NewSetCC.getNode() == N)
10069	return SDValue();
10070
10071	if (NewSetCC)
10072	return NewSetCC;
10073	}
10074
10075	return Combined;
10076	}
10077
10078	SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
10079	SDValue LHS = N->getOperand(0);
10080	SDValue RHS = N->getOperand(1);
10081	SDValue Carry = N->getOperand(2);
10082	SDValue Cond = N->getOperand(3);
10083
10084	// If Carry is false, fold to a regular SETCC.
10085	if (isNullConstant(Carry))
10086	return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
10087
10088	return SDValue();
10089	}
10090
10091	/// Check if N satisfies:
10092	/// N is used once.
10093	/// N is a Load.
10094	/// The load is compatible with ExtOpcode. It means
10095	/// If load has explicit zero/sign extension, ExpOpcode must have the same
10096	/// extension.
10097	/// Otherwise returns true.
10098	static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
10099	if (!N.hasOneUse())
10100	return false;
10101
10102	if (!isa<LoadSDNode>(N))
10103	return false;
10104
10105	LoadSDNode *Load = cast<LoadSDNode>(N);
10106	ISD::LoadExtType LoadExt = Load->getExtensionType();
10107	if (LoadExt == ISD::NON_EXTLOAD \|\| LoadExt == ISD::EXTLOAD)
10108	return true;
10109
10110	// Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
10111	// extension.
10112	if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) \|\|
10113	(LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
10114	return false;
10115
10116	return true;
10117	}
10118
10119	/// Fold
10120	/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
10121	/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
10122	/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
10123	/// This function is called by the DAGCombiner when visiting sext/zext/aext
10124	/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
10125	static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
10126	SelectionDAG &DAG) {
10127	unsigned Opcode = N->getOpcode();
10128	SDValue N0 = N->getOperand(0);
10129	EVT VT = N->getValueType(0);
10130	SDLoc DL(N);
10131
10132	assert((Opcode == ISD::SIGN_EXTEND \|\| Opcode == ISD::ZERO_EXTEND \|\|(((Opcode == ISD::SIGN_EXTEND \|\| Opcode == ISD::ZERO_EXTEND \|\| Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!" ) ? static_cast<void> (0) : __assert_fail ("(Opcode == ISD::SIGN_EXTEND \|\| Opcode == ISD::ZERO_EXTEND \|\| Opcode == ISD::ANY_EXTEND) && \"Expected EXTEND dag node in input!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 10134, __PRETTY_FUNCTION__))
10133	Opcode == ISD::ANY_EXTEND) &&(((Opcode == ISD::SIGN_EXTEND \|\| Opcode == ISD::ZERO_EXTEND \|\| Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!" ) ? static_cast<void> (0) : __assert_fail ("(Opcode == ISD::SIGN_EXTEND \|\| Opcode == ISD::ZERO_EXTEND \|\| Opcode == ISD::ANY_EXTEND) && \"Expected EXTEND dag node in input!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 10134, __PRETTY_FUNCTION__))
10134	"Expected EXTEND dag node in input!")(((Opcode == ISD::SIGN_EXTEND \|\| Opcode == ISD::ZERO_EXTEND \|\| Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!" ) ? static_cast<void> (0) : __assert_fail ("(Opcode == ISD::SIGN_EXTEND \|\| Opcode == ISD::ZERO_EXTEND \|\| Opcode == ISD::ANY_EXTEND) && \"Expected EXTEND dag node in input!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 10134, __PRETTY_FUNCTION__));
10135
10136	if (!(N0->getOpcode() == ISD::SELECT \|\| N0->getOpcode() == ISD::VSELECT) \|\|
10137	!N0.hasOneUse())
10138	return SDValue();
10139
10140	SDValue Op1 = N0->getOperand(1);
10141	SDValue Op2 = N0->getOperand(2);
10142	if (!isCompatibleLoad(Op1, Opcode) \|\| !isCompatibleLoad(Op2, Opcode))
10143	return SDValue();
10144
10145	auto ExtLoadOpcode = ISD::EXTLOAD;
10146	if (Opcode == ISD::SIGN_EXTEND)
10147	ExtLoadOpcode = ISD::SEXTLOAD;
10148	else if (Opcode == ISD::ZERO_EXTEND)
10149	ExtLoadOpcode = ISD::ZEXTLOAD;
10150
10151	LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
10152	LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
10153	if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) \|\|
10154	!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()))
10155	return SDValue();
10156
10157	SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
10158	SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
10159	return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
10160	}
10161
10162	/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
10163	/// a build_vector of constants.
10164	/// This function is called by the DAGCombiner when visiting sext/zext/aext
10165	/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
10166	/// Vector extends are not folded if operations are legal; this is to
10167	/// avoid introducing illegal build_vector dag nodes.
10168	static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
10169	SelectionDAG &DAG, bool LegalTypes) {
10170	unsigned Opcode = N->getOpcode();
10171	SDValue N0 = N->getOperand(0);
10172	EVT VT = N->getValueType(0);
10173	SDLoc DL(N);
10174
10175	assert((Opcode == ISD::SIGN_EXTEND \|\| Opcode == ISD::ZERO_EXTEND \|\|(((Opcode == ISD::SIGN_EXTEND \|\| Opcode == ISD::ZERO_EXTEND \|\| Opcode == ISD::ANY_EXTEND \|\| Opcode == ISD::SIGN_EXTEND_VECTOR_INREG \|\| Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && "Expected EXTEND dag node in input!" ) ? static_cast<void> (0) : __assert_fail ("(Opcode == ISD::SIGN_EXTEND \|\| Opcode == ISD::ZERO_EXTEND \|\| Opcode == ISD::ANY_EXTEND \|\| Opcode == ISD::SIGN_EXTEND_VECTOR_INREG \|\| Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && \"Expected EXTEND dag node in input!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 10178, __PRETTY_FUNCTION__))
10176	Opcode == ISD::ANY_EXTEND \|\| Opcode == ISD::SIGN_EXTEND_VECTOR_INREG \|\|(((Opcode == ISD::SIGN_EXTEND \|\| Opcode == ISD::ZERO_EXTEND \|\| Opcode == ISD::ANY_EXTEND \|\| Opcode == ISD::SIGN_EXTEND_VECTOR_INREG \|\| Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && "Expected EXTEND dag node in input!" ) ? static_cast<void> (0) : __assert_fail ("(Opcode == ISD::SIGN_EXTEND \|\| Opcode == ISD::ZERO_EXTEND \|\| Opcode == ISD::ANY_EXTEND \|\| Opcode == ISD::SIGN_EXTEND_VECTOR_INREG \|\| Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && \"Expected EXTEND dag node in input!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 10178, __PRETTY_FUNCTION__))
10177	Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)(((Opcode == ISD::SIGN_EXTEND \|\| Opcode == ISD::ZERO_EXTEND \|\| Opcode == ISD::ANY_EXTEND \|\| Opcode == ISD::SIGN_EXTEND_VECTOR_INREG \|\| Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && "Expected EXTEND dag node in input!" ) ? static_cast<void> (0) : __assert_fail ("(Opcode == ISD::SIGN_EXTEND \|\| Opcode == ISD::ZERO_EXTEND \|\| Opcode == ISD::ANY_EXTEND \|\| Opcode == ISD::SIGN_EXTEND_VECTOR_INREG \|\| Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && \"Expected EXTEND dag node in input!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 10178, __PRETTY_FUNCTION__))
10178	&& "Expected EXTEND dag node in input!")(((Opcode == ISD::SIGN_EXTEND \|\| Opcode == ISD::ZERO_EXTEND \|\| Opcode == ISD::ANY_EXTEND \|\| Opcode == ISD::SIGN_EXTEND_VECTOR_INREG \|\| Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && "Expected EXTEND dag node in input!" ) ? static_cast<void> (0) : __assert_fail ("(Opcode == ISD::SIGN_EXTEND \|\| Opcode == ISD::ZERO_EXTEND \|\| Opcode == ISD::ANY_EXTEND \|\| Opcode == ISD::SIGN_EXTEND_VECTOR_INREG \|\| Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && \"Expected EXTEND dag node in input!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 10178, __PRETTY_FUNCTION__));
10179
10180	// fold (sext c1) -> c1
10181	// fold (zext c1) -> c1
10182	// fold (aext c1) -> c1
10183	if (isa<ConstantSDNode>(N0))
10184	return DAG.getNode(Opcode, DL, VT, N0);
10185
10186	// fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
10187	// fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
10188	// fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
10189	if (N0->getOpcode() == ISD::SELECT) {
10190	SDValue Op1 = N0->getOperand(1);
10191	SDValue Op2 = N0->getOperand(2);
10192	if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
10193	(Opcode != ISD::ZERO_EXTEND \|\| !TLI.isZExtFree(N0.getValueType(), VT))) {
10194	// For any_extend, choose sign extension of the constants to allow a
10195	// possible further transform to sign_extend_inreg.i.e.
10196	//
10197	// t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
10198	// t2: i64 = any_extend t1
10199	// -->
10200	// t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
10201	// -->
10202	// t4: i64 = sign_extend_inreg t3
10203	unsigned FoldOpc = Opcode;
10204	if (FoldOpc == ISD::ANY_EXTEND)
10205	FoldOpc = ISD::SIGN_EXTEND;
10206	return DAG.getSelect(DL, VT, N0->getOperand(0),
10207	DAG.getNode(FoldOpc, DL, VT, Op1),
10208	DAG.getNode(FoldOpc, DL, VT, Op2));
10209	}
10210	}
10211
10212	// fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
10213	// fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
10214	// fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
10215	EVT SVT = VT.getScalarType();
10216	if (!(VT.isVector() && (!LegalTypes \|\| TLI.isTypeLegal(SVT)) &&
10217	ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
10218	return SDValue();
10219
10220	// We can fold this node into a build_vector.
10221	unsigned VTBits = SVT.getSizeInBits();
10222	unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
10223	SmallVector<SDValue, 8> Elts;
10224	unsigned NumElts = VT.getVectorNumElements();
10225
10226	// For zero-extensions, UNDEF elements still guarantee to have the upper
10227	// bits set to zero.
10228	bool IsZext =
10229	Opcode == ISD::ZERO_EXTEND \|\| Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
10230
10231	for (unsigned i = 0; i != NumElts; ++i) {
10232	SDValue Op = N0.getOperand(i);
10233	if (Op.isUndef()) {
10234	Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
10235	continue;
10236	}
10237
10238	SDLoc DL(Op);
10239	// Get the constant value and if needed trunc it to the size of the type.
10240	// Nodes like build_vector might have constants wider than the scalar type.
10241	APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
10242	if (Opcode == ISD::SIGN_EXTEND \|\| Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
10243	Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
10244	else
10245	Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
10246	}
10247
10248	return DAG.getBuildVector(VT, DL, Elts);
10249	}
10250
10251	// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
10252	// "fold ({s\|z\|a}ext (load x)) -> ({s\|z\|a}ext (truncate ({s\|z\|a}extload x)))"
10253	// transformation. Returns true if extension are possible and the above
10254	// mentioned transformation is profitable.
10255	static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
10256	unsigned ExtOpc,
10257	SmallVectorImpl<SDNode *> &ExtendNodes,
10258	const TargetLowering &TLI) {
10259	bool HasCopyToRegUses = false;
10260	bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
10261	for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
10262	UE = N0.getNode()->use_end();
10263	UI != UE; ++UI) {
10264	SDNode User = UI;
10265	if (User == N)
10266	continue;
10267	if (UI.getUse().getResNo() != N0.getResNo())
10268	continue;
10269	// FIXME: Only extend SETCC N, N and SETCC N, c for now.
10270	if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
10271	ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
10272	if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
10273	// Sign bits will be lost after a zext.
10274	return false;
10275	bool Add = false;
10276	for (unsigned i = 0; i != 2; ++i) {
10277	SDValue UseOp = User->getOperand(i);
10278	if (UseOp == N0)
10279	continue;
10280	if (!isa<ConstantSDNode>(UseOp))
10281	return false;
10282	Add = true;
10283	}
10284	if (Add)
10285	ExtendNodes.push_back(User);
10286	continue;
10287	}
10288	// If truncates aren't free and there are users we can't
10289	// extend, it isn't worthwhile.
10290	if (!isTruncFree)
10291	return false;
10292	// Remember if this value is live-out.
10293	if (User->getOpcode() == ISD::CopyToReg)
10294	HasCopyToRegUses = true;
10295	}
10296
10297	if (HasCopyToRegUses) {
10298	bool BothLiveOut = false;
10299	for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
10300	UI != UE; ++UI) {
10301	SDUse &Use = UI.getUse();
10302	if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
10303	BothLiveOut = true;
10304	break;
10305	}
10306	}
10307	if (BothLiveOut)
10308	// Both unextended and extended values are live out. There had better be
10309	// a good reason for the transformation.
10310	return ExtendNodes.size();
10311	}
10312	return true;
10313	}
10314
10315	void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
10316	SDValue OrigLoad, SDValue ExtLoad,
10317	ISD::NodeType ExtType) {
10318	// Extend SetCC uses if necessary.
10319	SDLoc DL(ExtLoad);
10320	for (SDNode *SetCC : SetCCs) {
10321	SmallVector<SDValue, 4> Ops;
10322
10323	for (unsigned j = 0; j != 2; ++j) {
10324	SDValue SOp = SetCC->getOperand(j);
10325	if (SOp == OrigLoad)
10326	Ops.push_back(ExtLoad);
10327	else
10328	Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
10329	}
10330
10331	Ops.push_back(SetCC->getOperand(2));
10332	CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
10333	}
10334	}
10335
10336	// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
10337	SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
10338	SDValue N0 = N->getOperand(0);
10339	EVT DstVT = N->getValueType(0);
10340	EVT SrcVT = N0.getValueType();
10341
10342	assert((N->getOpcode() == ISD::SIGN_EXTEND \|\|(((N->getOpcode() == ISD::SIGN_EXTEND \|\| N->getOpcode() == ISD::ZERO_EXTEND) && "Unexpected node type (not an extend)!" ) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND \|\| N->getOpcode() == ISD::ZERO_EXTEND) && \"Unexpected node type (not an extend)!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 10344, __PRETTY_FUNCTION__))
10343	N->getOpcode() == ISD::ZERO_EXTEND) &&(((N->getOpcode() == ISD::SIGN_EXTEND \|\| N->getOpcode() == ISD::ZERO_EXTEND) && "Unexpected node type (not an extend)!" ) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND \|\| N->getOpcode() == ISD::ZERO_EXTEND) && \"Unexpected node type (not an extend)!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 10344, __PRETTY_FUNCTION__))
10344	"Unexpected node type (not an extend)!")(((N->getOpcode() == ISD::SIGN_EXTEND \|\| N->getOpcode() == ISD::ZERO_EXTEND) && "Unexpected node type (not an extend)!" ) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND \|\| N->getOpcode() == ISD::ZERO_EXTEND) && \"Unexpected node type (not an extend)!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 10344, __PRETTY_FUNCTION__));
10345
10346	// fold (sext (load x)) to multiple smaller sextloads; same for zext.
10347	// For example, on a target with legal v4i32, but illegal v8i32, turn:
10348	// (v8i32 (sext (v8i16 (load x))))
10349	// into:
10350	// (v8i32 (concat_vectors (v4i32 (sextload x)),
10351	// (v4i32 (sextload (x + 16)))))
10352	// Where uses of the original load, i.e.:
10353	// (v8i16 (load x))
10354	// are replaced with:
10355	// (v8i16 (truncate
10356	// (v8i32 (concat_vectors (v4i32 (sextload x)),
10357	// (v4i32 (sextload (x + 16)))))))
10358	//
10359	// This combine is only applicable to illegal, but splittable, vectors.
10360	// All legal types, and illegal non-vector types, are handled elsewhere.
10361	// This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
10362	//
10363	if (N0->getOpcode() != ISD::LOAD)
10364	return SDValue();
10365
10366	LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10367
10368	if (!ISD::isNON_EXTLoad(LN0) \|\| !ISD::isUNINDEXEDLoad(LN0) \|\|
10369	!N0.hasOneUse() \|\| !LN0->isSimple() \|\|
10370	!DstVT.isVector() \|\| !DstVT.isPow2VectorType() \|\|
10371	!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
10372	return SDValue();
10373
10374	SmallVector<SDNode *, 4> SetCCs;
10375	if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
10376	return SDValue();
10377
10378	ISD::LoadExtType ExtType =
10379	N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10380
10381	// Try to split the vector types to get down to legal types.
10382	EVT SplitSrcVT = SrcVT;
10383	EVT SplitDstVT = DstVT;
10384	while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
10385	SplitSrcVT.getVectorNumElements() > 1) {
10386	SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
10387	SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
10388	}
10389
10390	if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
10391	return SDValue();
10392
10393	assert(!DstVT.isScalableVector() && "Unexpected scalable vector type")((!DstVT.isScalableVector() && "Unexpected scalable vector type" ) ? static_cast<void> (0) : __assert_fail ("!DstVT.isScalableVector() && \"Unexpected scalable vector type\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 10393, __PRETTY_FUNCTION__));
10394
10395	SDLoc DL(N);
10396	const unsigned NumSplits =
10397	DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
10398	const unsigned Stride = SplitSrcVT.getStoreSize();
10399	SmallVector<SDValue, 4> Loads;
10400	SmallVector<SDValue, 4> Chains;
10401
10402	SDValue BasePtr = LN0->getBasePtr();
10403	for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
10404	const unsigned Offset = Idx * Stride;
10405	const Align Align = commonAlignment(LN0->getAlign(), Offset);
10406
10407	SDValue SplitLoad = DAG.getExtLoad(
10408	ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
10409	LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
10410	LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10411
10412	BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL);
10413
10414	Loads.push_back(SplitLoad.getValue(0));
10415	Chains.push_back(SplitLoad.getValue(1));
10416	}
10417
10418	SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
10419	SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
10420
10421	// Simplify TF.
10422	AddToWorklist(NewChain.getNode());
10423
10424	CombineTo(N, NewValue);
10425
10426	// Replace uses of the original load (before extension)
10427	// with a truncate of the concatenated sextloaded vectors.
10428	SDValue Trunc =
10429	DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
10430	ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
10431	CombineTo(N0.getNode(), Trunc, NewChain);
10432	return SDValue(N, 0); // Return N so it doesn't get rechecked!
10433	}
10434
10435	// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
10436	// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
10437	SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
10438	assert(N->getOpcode() == ISD::ZERO_EXTEND)((N->getOpcode() == ISD::ZERO_EXTEND) ? static_cast<void > (0) : __assert_fail ("N->getOpcode() == ISD::ZERO_EXTEND" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 10438, __PRETTY_FUNCTION__));
10439	EVT VT = N->getValueType(0);
10440	EVT OrigVT = N->getOperand(0).getValueType();
10441	if (TLI.isZExtFree(OrigVT, VT))
10442	return SDValue();
10443
10444	// and/or/xor
10445	SDValue N0 = N->getOperand(0);
10446	if (!(N0.getOpcode() == ISD::AND \|\| N0.getOpcode() == ISD::OR \|\|
10447	N0.getOpcode() == ISD::XOR) \|\|
10448	N0.getOperand(1).getOpcode() != ISD::Constant \|\|
10449	(LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
10450	return SDValue();
10451
10452	// shl/shr
10453	SDValue N1 = N0->getOperand(0);
10454	if (!(N1.getOpcode() == ISD::SHL \|\| N1.getOpcode() == ISD::SRL) \|\|
10455	N1.getOperand(1).getOpcode() != ISD::Constant \|\|
10456	(LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
10457	return SDValue();
10458
10459	// load
10460	if (!isa<LoadSDNode>(N1.getOperand(0)))
10461	return SDValue();
10462	LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
10463	EVT MemVT = Load->getMemoryVT();
10464	if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) \|\|
10465	Load->getExtensionType() == ISD::SEXTLOAD \|\| Load->isIndexed())
10466	return SDValue();
10467
10468
10469	// If the shift op is SHL, the logic op must be AND, otherwise the result
10470	// will be wrong.
10471	if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
10472	return SDValue();
10473
10474	if (!N0.hasOneUse() \|\| !N1.hasOneUse())
10475	return SDValue();
10476
10477	SmallVector<SDNode*, 4> SetCCs;
10478	if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
10479	ISD::ZERO_EXTEND, SetCCs, TLI))
10480	return SDValue();
10481
10482	// Actually do the transformation.
10483	SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
10484	Load->getChain(), Load->getBasePtr(),
10485	Load->getMemoryVT(), Load->getMemOperand());
10486
10487	SDLoc DL1(N1);
10488	SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
10489	N1.getOperand(1));
10490
10491	APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
10492	SDLoc DL0(N0);
10493	SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
10494	DAG.getConstant(Mask, DL0, VT));
10495
10496	ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
10497	CombineTo(N, And);
10498	if (SDValue(Load, 0).hasOneUse()) {
10499	DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
10500	} else {
10501	SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
10502	Load->getValueType(0), ExtLoad);
10503	CombineTo(Load, Trunc, ExtLoad.getValue(1));
10504	}
10505
10506	// N0 is dead at this point.
10507	recursivelyDeleteUnusedNodes(N0.getNode());
10508
10509	return SDValue(N,0); // Return N so it doesn't get rechecked!
10510	}
10511
10512	/// If we're narrowing or widening the result of a vector select and the final
10513	/// size is the same size as a setcc (compare) feeding the select, then try to
10514	/// apply the cast operation to the select's operands because matching vector
10515	/// sizes for a select condition and other operands should be more efficient.
10516	SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
10517	unsigned CastOpcode = Cast->getOpcode();
10518	assert((CastOpcode == ISD::SIGN_EXTEND \|\| CastOpcode == ISD::ZERO_EXTEND \|\|(((CastOpcode == ISD::SIGN_EXTEND \|\| CastOpcode == ISD::ZERO_EXTEND \|\| CastOpcode == ISD::TRUNCATE \|\| CastOpcode == ISD::FP_EXTEND \|\| CastOpcode == ISD::FP_ROUND) && "Unexpected opcode for vector select narrowing/widening" ) ? static_cast<void> (0) : __assert_fail ("(CastOpcode == ISD::SIGN_EXTEND \|\| CastOpcode == ISD::ZERO_EXTEND \|\| CastOpcode == ISD::TRUNCATE \|\| CastOpcode == ISD::FP_EXTEND \|\| CastOpcode == ISD::FP_ROUND) && \"Unexpected opcode for vector select narrowing/widening\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 10521, __PRETTY_FUNCTION__))
10519	CastOpcode == ISD::TRUNCATE \|\| CastOpcode == ISD::FP_EXTEND \|\|(((CastOpcode == ISD::SIGN_EXTEND \|\| CastOpcode == ISD::ZERO_EXTEND \|\| CastOpcode == ISD::TRUNCATE \|\| CastOpcode == ISD::FP_EXTEND \|\| CastOpcode == ISD::FP_ROUND) && "Unexpected opcode for vector select narrowing/widening" ) ? static_cast<void> (0) : __assert_fail ("(CastOpcode == ISD::SIGN_EXTEND \|\| CastOpcode == ISD::ZERO_EXTEND \|\| CastOpcode == ISD::TRUNCATE \|\| CastOpcode == ISD::FP_EXTEND \|\| CastOpcode == ISD::FP_ROUND) && \"Unexpected opcode for vector select narrowing/widening\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 10521, __PRETTY_FUNCTION__))
10520	CastOpcode == ISD::FP_ROUND) &&(((CastOpcode == ISD::SIGN_EXTEND \|\| CastOpcode == ISD::ZERO_EXTEND \|\| CastOpcode == ISD::TRUNCATE \|\| CastOpcode == ISD::FP_EXTEND \|\| CastOpcode == ISD::FP_ROUND) && "Unexpected opcode for vector select narrowing/widening" ) ? static_cast<void> (0) : __assert_fail ("(CastOpcode == ISD::SIGN_EXTEND \|\| CastOpcode == ISD::ZERO_EXTEND \|\| CastOpcode == ISD::TRUNCATE \|\| CastOpcode == ISD::FP_EXTEND \|\| CastOpcode == ISD::FP_ROUND) && \"Unexpected opcode for vector select narrowing/widening\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 10521, __PRETTY_FUNCTION__))
10521	"Unexpected opcode for vector select narrowing/widening")(((CastOpcode == ISD::SIGN_EXTEND \|\| CastOpcode == ISD::ZERO_EXTEND \|\| CastOpcode == ISD::TRUNCATE \|\| CastOpcode == ISD::FP_EXTEND \|\| CastOpcode == ISD::FP_ROUND) && "Unexpected opcode for vector select narrowing/widening" ) ? static_cast<void> (0) : __assert_fail ("(CastOpcode == ISD::SIGN_EXTEND \|\| CastOpcode == ISD::ZERO_EXTEND \|\| CastOpcode == ISD::TRUNCATE \|\| CastOpcode == ISD::FP_EXTEND \|\| CastOpcode == ISD::FP_ROUND) && \"Unexpected opcode for vector select narrowing/widening\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 10521, __PRETTY_FUNCTION__));
10522
10523	// We only do this transform before legal ops because the pattern may be
10524	// obfuscated by target-specific operations after legalization. Do not create
10525	// an illegal select op, however, because that may be difficult to lower.
10526	EVT VT = Cast->getValueType(0);
10527	if (LegalOperations \|\| !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
10528	return SDValue();
10529
10530	SDValue VSel = Cast->getOperand(0);
10531	if (VSel.getOpcode() != ISD::VSELECT \|\| !VSel.hasOneUse() \|\|
10532	VSel.getOperand(0).getOpcode() != ISD::SETCC)
10533	return SDValue();
10534
10535	// Does the setcc have the same vector size as the casted select?
10536	SDValue SetCC = VSel.getOperand(0);
10537	EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
10538	if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
10539	return SDValue();
10540
10541	// cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
10542	SDValue A = VSel.getOperand(1);
10543	SDValue B = VSel.getOperand(2);
10544	SDValue CastA, CastB;
10545	SDLoc DL(Cast);
10546	if (CastOpcode == ISD::FP_ROUND) {
10547	// FP_ROUND (fptrunc) has an extra flag operand to pass along.
10548	CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
10549	CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
10550	} else {
10551	CastA = DAG.getNode(CastOpcode, DL, VT, A);
10552	CastB = DAG.getNode(CastOpcode, DL, VT, B);
10553	}
10554	return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
10555	}
10556
10557	// fold ([s\|z]ext ([s\|z]extload x)) -> ([s\|z]ext (truncate ([s\|z]extload x)))
10558	// fold ([s\|z]ext ( extload x)) -> ([s\|z]ext (truncate ([s\|z]extload x)))
10559	static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
10560	const TargetLowering &TLI, EVT VT,
10561	bool LegalOperations, SDNode *N,
10562	SDValue N0, ISD::LoadExtType ExtLoadType) {
10563	SDNode *N0Node = N0.getNode();
10564	bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
10565	: ISD::isZEXTLoad(N0Node);
10566	if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) \|\|
10567	!ISD::isUNINDEXEDLoad(N0Node) \|\| !N0.hasOneUse())
10568	return SDValue();
10569
10570	LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10571	EVT MemVT = LN0->getMemoryVT();
10572	if ((LegalOperations \|\| !LN0->isSimple() \|\|
10573	VT.isVector()) &&
10574	!TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
10575	return SDValue();
10576
10577	SDValue ExtLoad =
10578	DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
10579	LN0->getBasePtr(), MemVT, LN0->getMemOperand());
10580	Combiner.CombineTo(N, ExtLoad);
10581	DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10582	if (LN0->use_empty())
10583	Combiner.recursivelyDeleteUnusedNodes(LN0);
10584	return SDValue(N, 0); // Return N so it doesn't get rechecked!
10585	}
10586
10587	// fold ([s\|z]ext (load x)) -> ([s\|z]ext (truncate ([s\|z]extload x)))
10588	// Only generate vector extloads when 1) they're legal, and 2) they are
10589	// deemed desirable by the target.
10590	static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
10591	const TargetLowering &TLI, EVT VT,
10592	bool LegalOperations, SDNode *N, SDValue N0,
10593	ISD::LoadExtType ExtLoadType,
10594	ISD::NodeType ExtOpc) {
10595	if (!ISD::isNON_EXTLoad(N0.getNode()) \|\|
10596	!ISD::isUNINDEXEDLoad(N0.getNode()) \|\|
10597	((LegalOperations \|\| VT.isVector() \|\|
10598	!cast<LoadSDNode>(N0)->isSimple()) &&
10599	!TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
10600	return {};
10601
10602	bool DoXform = true;
10603	SmallVector<SDNode *, 4> SetCCs;
10604	if (!N0.hasOneUse())
10605	DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
10606	if (VT.isVector())
10607	DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
10608	if (!DoXform)
10609	return {};
10610
10611	LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10612	SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
10613	LN0->getBasePtr(), N0.getValueType(),
10614	LN0->getMemOperand());
10615	Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
10616	// If the load value is used only by N, replace it via CombineTo N.
10617	bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
10618	Combiner.CombineTo(N, ExtLoad);
10619	if (NoReplaceTrunc) {
10620	DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10621	Combiner.recursivelyDeleteUnusedNodes(LN0);
10622	} else {
10623	SDValue Trunc =
10624	DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
10625	Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
10626	}
10627	return SDValue(N, 0); // Return N so it doesn't get rechecked!
10628	}
10629
10630	static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
10631	const TargetLowering &TLI, EVT VT,
10632	SDNode *N, SDValue N0,
10633	ISD::LoadExtType ExtLoadType,
10634	ISD::NodeType ExtOpc) {
10635	if (!N0.hasOneUse())
10636	return SDValue();
10637
10638	MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
10639	if (!Ld \|\| Ld->getExtensionType() != ISD::NON_EXTLOAD)
10640	return SDValue();
10641
10642	if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0)))
10643	return SDValue();
10644
10645	if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
10646	return SDValue();
10647
10648	SDLoc dl(Ld);
10649	SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
10650	SDValue NewLoad = DAG.getMaskedLoad(
10651	VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
10652	PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
10653	ExtLoadType, Ld->isExpandingLoad());
10654	DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
10655	return NewLoad;
10656	}
10657
10658	static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
10659	bool LegalOperations) {
10660	assert((N->getOpcode() == ISD::SIGN_EXTEND \|\|(((N->getOpcode() == ISD::SIGN_EXTEND \|\| N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext") ? static_cast <void> (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND \|\| N->getOpcode() == ISD::ZERO_EXTEND) && \"Expected sext or zext\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 10661, __PRETTY_FUNCTION__))
10661	N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext")(((N->getOpcode() == ISD::SIGN_EXTEND \|\| N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext") ? static_cast <void> (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND \|\| N->getOpcode() == ISD::ZERO_EXTEND) && \"Expected sext or zext\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 10661, __PRETTY_FUNCTION__));
10662
10663	SDValue SetCC = N->getOperand(0);
10664	if (LegalOperations \|\| SetCC.getOpcode() != ISD::SETCC \|\|
10665	!SetCC.hasOneUse() \|\| SetCC.getValueType() != MVT::i1)
10666	return SDValue();
10667
10668	SDValue X = SetCC.getOperand(0);
10669	SDValue Ones = SetCC.getOperand(1);
10670	ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
10671	EVT VT = N->getValueType(0);
10672	EVT XVT = X.getValueType();
10673	// setge X, C is canonicalized to setgt, so we do not need to match that
10674	// pattern. The setlt sibling is folded in SimplifySelectCC() because it does
10675	// not require the 'not' op.
10676	if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
10677	// Invert and smear/shift the sign bit:
10678	// sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
10679	// zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
10680	SDLoc DL(N);
10681	unsigned ShCt = VT.getSizeInBits() - 1;
10682	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10683	if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
10684	SDValue NotX = DAG.getNOT(DL, X, VT);
10685	SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
10686	auto ShiftOpcode =
10687	N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
10688	return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
10689	}
10690	}
10691	return SDValue();
10692	}
10693
10694	SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
10695	SDValue N0 = N->getOperand(0);
10696	EVT VT = N->getValueType(0);
10697	SDLoc DL(N);
10698
10699	if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10700	return Res;
10701
10702	// fold (sext (sext x)) -> (sext x)
10703	// fold (sext (aext x)) -> (sext x)
10704	if (N0.getOpcode() == ISD::SIGN_EXTEND \|\| N0.getOpcode() == ISD::ANY_EXTEND)
10705	return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
10706
10707	if (N0.getOpcode() == ISD::TRUNCATE) {
10708	// fold (sext (truncate (load x))) -> (sext (smaller load x))
10709	// fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
10710	if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
10711	SDNode *oye = N0.getOperand(0).getNode();
10712	if (NarrowLoad.getNode() != N0.getNode()) {
10713	CombineTo(N0.getNode(), NarrowLoad);
10714	// CombineTo deleted the truncate, if needed, but not what's under it.
10715	AddToWorklist(oye);
10716	}
10717	return SDValue(N, 0); // Return N so it doesn't get rechecked!
10718	}
10719
10720	// See if the value being truncated is already sign extended. If so, just
10721	// eliminate the trunc/sext pair.
10722	SDValue Op = N0.getOperand(0);
10723	unsigned OpBits = Op.getScalarValueSizeInBits();
10724	unsigned MidBits = N0.getScalarValueSizeInBits();
10725	unsigned DestBits = VT.getScalarSizeInBits();
10726	unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
10727
10728	if (OpBits == DestBits) {
10729	// Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
10730	// bits, it is already ready.
10731	if (NumSignBits > DestBits-MidBits)
10732	return Op;
10733	} else if (OpBits < DestBits) {
10734	// Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
10735	// bits, just sext from i32.
10736	if (NumSignBits > OpBits-MidBits)
10737	return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
10738	} else {
10739	// Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
10740	// bits, just truncate to i32.
10741	if (NumSignBits > OpBits-MidBits)
10742	return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
10743	}
10744
10745	// fold (sext (truncate x)) -> (sextinreg x).
10746	if (!LegalOperations \|\| TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
10747	N0.getValueType())) {
10748	if (OpBits < DestBits)
10749	Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
10750	else if (OpBits > DestBits)
10751	Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
10752	return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
10753	DAG.getValueType(N0.getValueType()));
10754	}
10755	}
10756
10757	// Try to simplify (sext (load x)).
10758	if (SDValue foldedExt =
10759	tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
10760	ISD::SEXTLOAD, ISD::SIGN_EXTEND))
10761	return foldedExt;
10762
10763	if (SDValue foldedExt =
10764	tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
10765	ISD::SIGN_EXTEND))
10766	return foldedExt;
10767
10768	// fold (sext (load x)) to multiple smaller sextloads.
10769	// Only on illegal but splittable vectors.
10770	if (SDValue ExtLoad = CombineExtLoad(N))
10771	return ExtLoad;
10772
10773	// Try to simplify (sext (sextload x)).
10774	if (SDValue foldedExt = tryToFoldExtOfExtload(
10775	DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
10776	return foldedExt;
10777
10778	// fold (sext (and/or/xor (load x), cst)) ->
10779	// (and/or/xor (sextload x), (sext cst))
10780	if ((N0.getOpcode() == ISD::AND \|\| N0.getOpcode() == ISD::OR \|\|
10781	N0.getOpcode() == ISD::XOR) &&
10782	isa<LoadSDNode>(N0.getOperand(0)) &&
10783	N0.getOperand(1).getOpcode() == ISD::Constant &&
10784	(!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
10785	LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
10786	EVT MemVT = LN00->getMemoryVT();
10787	if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
10788	LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
10789	SmallVector<SDNode*, 4> SetCCs;
10790	bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
10791	ISD::SIGN_EXTEND, SetCCs, TLI);
10792	if (DoXform) {
10793	SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
10794	LN00->getChain(), LN00->getBasePtr(),
10795	LN00->getMemoryVT(),
10796	LN00->getMemOperand());
10797	APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
10798	SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
10799	ExtLoad, DAG.getConstant(Mask, DL, VT));
10800	ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
10801	bool NoReplaceTruncAnd = !N0.hasOneUse();
10802	bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
10803	CombineTo(N, And);
10804	// If N0 has multiple uses, change other uses as well.
10805	if (NoReplaceTruncAnd) {
10806	SDValue TruncAnd =
10807	DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
10808	CombineTo(N0.getNode(), TruncAnd);
10809	}
10810	if (NoReplaceTrunc) {
10811	DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
10812	} else {
10813	SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
10814	LN00->getValueType(0), ExtLoad);
10815	CombineTo(LN00, Trunc, ExtLoad.getValue(1));
10816	}
10817	return SDValue(N,0); // Return N so it doesn't get rechecked!
10818	}
10819	}
10820	}
10821
10822	if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
10823	return V;
10824
10825	if (N0.getOpcode() == ISD::SETCC) {
10826	SDValue N00 = N0.getOperand(0);
10827	SDValue N01 = N0.getOperand(1);
10828	ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10829	EVT N00VT = N00.getValueType();
10830
10831	// sext(setcc) -> sext_in_reg(vsetcc) for vectors.
10832	// Only do this before legalize for now.
10833	if (VT.isVector() && !LegalOperations &&
10834	TLI.getBooleanContents(N00VT) ==
10835	TargetLowering::ZeroOrNegativeOneBooleanContent) {
10836	// On some architectures (such as SSE/NEON/etc) the SETCC result type is
10837	// of the same size as the compared operands. Only optimize sext(setcc())
10838	// if this is the case.
10839	EVT SVT = getSetCCResultType(N00VT);
10840
10841	// If we already have the desired type, don't change it.
10842	if (SVT != N0.getValueType()) {
10843	// We know that the # elements of the results is the same as the
10844	// # elements of the compare (and the # elements of the compare result
10845	// for that matter). Check to see that they are the same size. If so,
10846	// we know that the element size of the sext'd result matches the
10847	// element size of the compare operands.
10848	if (VT.getSizeInBits() == SVT.getSizeInBits())
10849	return DAG.getSetCC(DL, VT, N00, N01, CC);
10850
10851	// If the desired elements are smaller or larger than the source
10852	// elements, we can use a matching integer vector type and then
10853	// truncate/sign extend.
10854	EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
10855	if (SVT == MatchingVecType) {
10856	SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
10857	return DAG.getSExtOrTrunc(VsetCC, DL, VT);
10858	}
10859	}
10860	}
10861
10862	// sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
10863	// Here, T can be 1 or -1, depending on the type of the setcc and
10864	// getBooleanContents().
10865	unsigned SetCCWidth = N0.getScalarValueSizeInBits();
10866
10867	// To determine the "true" side of the select, we need to know the high bit
10868	// of the value returned by the setcc if it evaluates to true.
10869	// If the type of the setcc is i1, then the true case of the select is just
10870	// sext(i1 1), that is, -1.
10871	// If the type of the setcc is larger (say, i8) then the value of the high
10872	// bit depends on getBooleanContents(), so ask TLI for a real "true" value
10873	// of the appropriate width.
10874	SDValue ExtTrueVal = (SetCCWidth == 1)
10875	? DAG.getAllOnesConstant(DL, VT)
10876	: DAG.getBoolConstant(true, DL, VT, N00VT);
10877	SDValue Zero = DAG.getConstant(0, DL, VT);
10878	if (SDValue SCC =
10879	SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
10880	return SCC;
10881
10882	if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
10883	EVT SetCCVT = getSetCCResultType(N00VT);
10884	// Don't do this transform for i1 because there's a select transform
10885	// that would reverse it.
10886	// TODO: We should not do this transform at all without a target hook
10887	// because a sext is likely cheaper than a select?
10888	if (SetCCVT.getScalarSizeInBits() != 1 &&
10889	(!LegalOperations \|\| TLI.isOperationLegal(ISD::SETCC, N00VT))) {
10890	SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
10891	return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
10892	}
10893	}
10894	}
10895
10896	// fold (sext x) -> (zext x) if the sign bit is known zero.
10897	if ((!LegalOperations \|\| TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
10898	DAG.SignBitIsZero(N0))
10899	return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
10900
10901	if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10902	return NewVSel;
10903
10904	// Eliminate this sign extend by doing a negation in the destination type:
10905	// sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
10906	if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
10907	isNullOrNullSplat(N0.getOperand(0)) &&
10908	N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
10909	TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
10910	SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
10911	return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
10912	}
10913	// Eliminate this sign extend by doing a decrement in the destination type:
10914	// sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
10915	if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
10916	isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
10917	N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
10918	TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
10919	SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
10920	return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
10921	}
10922
10923	// fold sext (not i1 X) -> add (zext i1 X), -1
10924	// TODO: This could be extended to handle bool vectors.
10925	if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
10926	(!LegalOperations \|\| (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
10927	TLI.isOperationLegal(ISD::ADD, VT)))) {
10928	// If we can eliminate the 'not', the sext form should be better
10929	if (SDValue NewXor = visitXOR(N0.getNode())) {
10930	// Returning N0 is a form of in-visit replacement that may have
10931	// invalidated N0.
10932	if (NewXor.getNode() == N0.getNode()) {
10933	// Return SDValue here as the xor should have already been replaced in
10934	// this sext.
10935	return SDValue();
10936	} else {
10937	// Return a new sext with the new xor.
10938	return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
10939	}
10940	}
10941
10942	SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
10943	return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
10944	}
10945
10946	if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
10947	return Res;
10948
10949	return SDValue();
10950	}
10951
10952	// isTruncateOf - If N is a truncate of some other value, return true, record
10953	// the value being truncated in Op and which of Op's bits are zero/one in Known.
10954	// This function computes KnownBits to avoid a duplicated call to
10955	// computeKnownBits in the caller.
10956	static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
10957	KnownBits &Known) {
10958	if (N->getOpcode() == ISD::TRUNCATE) {
10959	Op = N->getOperand(0);
10960	Known = DAG.computeKnownBits(Op);
10961	return true;
10962	}
10963
10964	if (N.getOpcode() != ISD::SETCC \|\|
10965	N.getValueType().getScalarType() != MVT::i1 \|\|
10966	cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
10967	return false;
10968
10969	SDValue Op0 = N->getOperand(0);
10970	SDValue Op1 = N->getOperand(1);
10971	assert(Op0.getValueType() == Op1.getValueType())((Op0.getValueType() == Op1.getValueType()) ? static_cast< void> (0) : __assert_fail ("Op0.getValueType() == Op1.getValueType()" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 10971, __PRETTY_FUNCTION__));
10972
10973	if (isNullOrNullSplat(Op0))
10974	Op = Op1;
10975	else if (isNullOrNullSplat(Op1))
10976	Op = Op0;
10977	else
10978	return false;
10979
10980	Known = DAG.computeKnownBits(Op);
10981
10982	return (Known.Zero \| 1).isAllOnesValue();
10983	}
10984
10985	/// Given an extending node with a pop-count operand, if the target does not
10986	/// support a pop-count in the narrow source type but does support it in the
10987	/// destination type, widen the pop-count to the destination type.
10988	static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
10989	assert((Extend->getOpcode() == ISD::ZERO_EXTEND \|\|(((Extend->getOpcode() == ISD::ZERO_EXTEND \|\| Extend->getOpcode () == ISD::ANY_EXTEND) && "Expected extend op") ? static_cast <void> (0) : __assert_fail ("(Extend->getOpcode() == ISD::ZERO_EXTEND \|\| Extend->getOpcode() == ISD::ANY_EXTEND) && \"Expected extend op\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 10990, __PRETTY_FUNCTION__))
10990	Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op")(((Extend->getOpcode() == ISD::ZERO_EXTEND \|\| Extend->getOpcode () == ISD::ANY_EXTEND) && "Expected extend op") ? static_cast <void> (0) : __assert_fail ("(Extend->getOpcode() == ISD::ZERO_EXTEND \|\| Extend->getOpcode() == ISD::ANY_EXTEND) && \"Expected extend op\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 10990, __PRETTY_FUNCTION__));
10991
10992	SDValue CtPop = Extend->getOperand(0);
10993	if (CtPop.getOpcode() != ISD::CTPOP \|\| !CtPop.hasOneUse())
10994	return SDValue();
10995
10996	EVT VT = Extend->getValueType(0);
10997	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10998	if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) \|\|
10999	!TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
11000	return SDValue();
11001
11002	// zext (ctpop X) --> ctpop (zext X)
11003	SDLoc DL(Extend);
11004	SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
11005	return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
11006	}
11007
11008	SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
11009	SDValue N0 = N->getOperand(0);
11010	EVT VT = N->getValueType(0);
11011
11012	if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11013	return Res;
11014
11015	// fold (zext (zext x)) -> (zext x)
11016	// fold (zext (aext x)) -> (zext x)
11017	if (N0.getOpcode() == ISD::ZERO_EXTEND \|\| N0.getOpcode() == ISD::ANY_EXTEND)
11018	return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
11019	N0.getOperand(0));
11020
11021	// fold (zext (truncate x)) -> (zext x) or
11022	// (zext (truncate x)) -> (truncate x)
11023	// This is valid when the truncated bits of x are already zero.
11024	SDValue Op;
11025	KnownBits Known;
11026	if (isTruncateOf(DAG, N0, Op, Known)) {
11027	APInt TruncatedBits =
11028	(Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
11029	APInt(Op.getScalarValueSizeInBits(), 0) :
11030	APInt::getBitsSet(Op.getScalarValueSizeInBits(),
11031	N0.getScalarValueSizeInBits(),
11032	std::min(Op.getScalarValueSizeInBits(),
11033	VT.getScalarSizeInBits()));
11034	if (TruncatedBits.isSubsetOf(Known.Zero))
11035	return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
11036	}
11037
11038	// fold (zext (truncate x)) -> (and x, mask)
11039	if (N0.getOpcode() == ISD::TRUNCATE) {
11040	// fold (zext (truncate (load x))) -> (zext (smaller load x))
11041	// fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
11042	if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
11043	SDNode *oye = N0.getOperand(0).getNode();
11044	if (NarrowLoad.getNode() != N0.getNode()) {
11045	CombineTo(N0.getNode(), NarrowLoad);
11046	// CombineTo deleted the truncate, if needed, but not what's under it.
11047	AddToWorklist(oye);
11048	}
11049	return SDValue(N, 0); // Return N so it doesn't get rechecked!
11050	}
11051
11052	EVT SrcVT = N0.getOperand(0).getValueType();
11053	EVT MinVT = N0.getValueType();
11054
11055	// Try to mask before the extension to avoid having to generate a larger mask,
11056	// possibly over several sub-vectors.
11057	if (SrcVT.bitsLT(VT) && VT.isVector()) {
11058	if (!LegalOperations \|\| (TLI.isOperationLegal(ISD::AND, SrcVT) &&
11059	TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
11060	SDValue Op = N0.getOperand(0);
11061	Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
11062	AddToWorklist(Op.getNode());
11063	SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
11064	// Transfer the debug info; the new node is equivalent to N0.
11065	DAG.transferDbgValues(N0, ZExtOrTrunc);
11066	return ZExtOrTrunc;
11067	}
11068	}
11069
11070	if (!LegalOperations \|\| TLI.isOperationLegal(ISD::AND, VT)) {
11071	SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
11072	AddToWorklist(Op.getNode());
11073	SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
11074	// We may safely transfer the debug info describing the truncate node over
11075	// to the equivalent and operation.
11076	DAG.transferDbgValues(N0, And);
11077	return And;
11078	}
11079	}
11080
11081	// Fold (zext (and (trunc x), cst)) -> (and x, cst),
11082	// if either of the casts is not free.
11083	if (N0.getOpcode() == ISD::AND &&
11084	N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
11085	N0.getOperand(1).getOpcode() == ISD::Constant &&
11086	(!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
11087	N0.getValueType()) \|\|
11088	!TLI.isZExtFree(N0.getValueType(), VT))) {
11089	SDValue X = N0.getOperand(0).getOperand(0);
11090	X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
11091	APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11092	SDLoc DL(N);
11093	return DAG.getNode(ISD::AND, DL, VT,
11094	X, DAG.getConstant(Mask, DL, VT));
11095	}
11096
11097	// Try to simplify (zext (load x)).
11098	if (SDValue foldedExt =
11099	tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11100	ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
11101	return foldedExt;
11102
11103	if (SDValue foldedExt =
11104	tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
11105	ISD::ZERO_EXTEND))
11106	return foldedExt;
11107
11108	// fold (zext (load x)) to multiple smaller zextloads.
11109	// Only on illegal but splittable vectors.
11110	if (SDValue ExtLoad = CombineExtLoad(N))
11111	return ExtLoad;
11112
11113	// fold (zext (and/or/xor (load x), cst)) ->
11114	// (and/or/xor (zextload x), (zext cst))
11115	// Unless (and (load x) cst) will match as a zextload already and has
11116	// additional users.
11117	if ((N0.getOpcode() == ISD::AND \|\| N0.getOpcode() == ISD::OR \|\|
11118	N0.getOpcode() == ISD::XOR) &&
11119	isa<LoadSDNode>(N0.getOperand(0)) &&
11120	N0.getOperand(1).getOpcode() == ISD::Constant &&
11121	(!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
11122	LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
11123	EVT MemVT = LN00->getMemoryVT();
11124	if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
11125	LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
11126	bool DoXform = true;
11127	SmallVector<SDNode*, 4> SetCCs;
11128	if (!N0.hasOneUse()) {
11129	if (N0.getOpcode() == ISD::AND) {
11130	auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
11131	EVT LoadResultTy = AndC->getValueType(0);
11132	EVT ExtVT;
11133	if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
11134	DoXform = false;
11135	}
11136	}
11137	if (DoXform)
11138	DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
11139	ISD::ZERO_EXTEND, SetCCs, TLI);
11140	if (DoXform) {
11141	SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
11142	LN00->getChain(), LN00->getBasePtr(),
11143	LN00->getMemoryVT(),
11144	LN00->getMemOperand());
11145	APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11146	SDLoc DL(N);
11147	SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
11148	ExtLoad, DAG.getConstant(Mask, DL, VT));
11149	ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
11150	bool NoReplaceTruncAnd = !N0.hasOneUse();
11151	bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
11152	CombineTo(N, And);
11153	// If N0 has multiple uses, change other uses as well.
11154	if (NoReplaceTruncAnd) {
11155	SDValue TruncAnd =
11156	DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
11157	CombineTo(N0.getNode(), TruncAnd);
11158	}
11159	if (NoReplaceTrunc) {
11160	DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
11161	} else {
11162	SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
11163	LN00->getValueType(0), ExtLoad);
11164	CombineTo(LN00, Trunc, ExtLoad.getValue(1));
11165	}
11166	return SDValue(N,0); // Return N so it doesn't get rechecked!
11167	}
11168	}
11169	}
11170
11171	// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
11172	// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
11173	if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
11174	return ZExtLoad;
11175
11176	// Try to simplify (zext (zextload x)).
11177	if (SDValue foldedExt = tryToFoldExtOfExtload(
11178	DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
11179	return foldedExt;
11180
11181	if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
11182	return V;
11183
11184	if (N0.getOpcode() == ISD::SETCC) {
11185	// Only do this before legalize for now.
11186	if (!LegalOperations && VT.isVector() &&
11187	N0.getValueType().getVectorElementType() == MVT::i1) {
11188	EVT N00VT = N0.getOperand(0).getValueType();
11189	if (getSetCCResultType(N00VT) == N0.getValueType())
11190	return SDValue();
11191
11192	// We know that the # elements of the results is the same as the #
11193	// elements of the compare (and the # elements of the compare result for
11194	// that matter). Check to see that they are the same size. If so, we know
11195	// that the element size of the sext'd result matches the element size of
11196	// the compare operands.
11197	SDLoc DL(N);
11198	if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
11199	// zext(setcc) -> zext_in_reg(vsetcc) for vectors.
11200	SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
11201	N0.getOperand(1), N0.getOperand(2));
11202	return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
11203	}
11204
11205	// If the desired elements are smaller or larger than the source
11206	// elements we can use a matching integer vector type and then
11207	// truncate/any extend followed by zext_in_reg.
11208	EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
11209	SDValue VsetCC =
11210	DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
11211	N0.getOperand(1), N0.getOperand(2));
11212	return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
11213	N0.getValueType());
11214	}
11215
11216	// zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
11217	SDLoc DL(N);
11218	EVT N0VT = N0.getValueType();
11219	EVT N00VT = N0.getOperand(0).getValueType();
11220	if (SDValue SCC = SimplifySelectCC(
11221	DL, N0.getOperand(0), N0.getOperand(1),
11222	DAG.getBoolConstant(true, DL, N0VT, N00VT),
11223	DAG.getBoolConstant(false, DL, N0VT, N00VT),
11224	cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
11225	return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
11226	}
11227
11228	// (zext (shl (zext x), cst)) -> (shl (zext x), cst)
11229	if ((N0.getOpcode() == ISD::SHL \|\| N0.getOpcode() == ISD::SRL) &&
11230	isa<ConstantSDNode>(N0.getOperand(1)) &&
11231	N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
11232	N0.hasOneUse()) {
11233	SDValue ShAmt = N0.getOperand(1);
11234	if (N0.getOpcode() == ISD::SHL) {
11235	SDValue InnerZExt = N0.getOperand(0);
11236	// If the original shl may be shifting out bits, do not perform this
11237	// transformation.
11238	unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
11239	InnerZExt.getOperand(0).getValueSizeInBits();
11240	if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
11241	return SDValue();
11242	}
11243
11244	SDLoc DL(N);
11245
11246	// Ensure that the shift amount is wide enough for the shifted value.
11247	if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
11248	ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
11249
11250	return DAG.getNode(N0.getOpcode(), DL, VT,
11251	DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
11252	ShAmt);
11253	}
11254
11255	if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11256	return NewVSel;
11257
11258	if (SDValue NewCtPop = widenCtPop(N, DAG))
11259	return NewCtPop;
11260
11261	if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11262	return Res;
11263
11264	return SDValue();
11265	}
11266
11267	SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
11268	SDValue N0 = N->getOperand(0);
11269	EVT VT = N->getValueType(0);
11270
11271	if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11272	return Res;
11273
11274	// fold (aext (aext x)) -> (aext x)
11275	// fold (aext (zext x)) -> (zext x)
11276	// fold (aext (sext x)) -> (sext x)
11277	if (N0.getOpcode() == ISD::ANY_EXTEND \|\|
11278	N0.getOpcode() == ISD::ZERO_EXTEND \|\|
11279	N0.getOpcode() == ISD::SIGN_EXTEND)
11280	return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
11281
11282	// fold (aext (truncate (load x))) -> (aext (smaller load x))
11283	// fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
11284	if (N0.getOpcode() == ISD::TRUNCATE) {
11285	if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
11286	SDNode *oye = N0.getOperand(0).getNode();
11287	if (NarrowLoad.getNode() != N0.getNode()) {
11288	CombineTo(N0.getNode(), NarrowLoad);
11289	// CombineTo deleted the truncate, if needed, but not what's under it.
11290	AddToWorklist(oye);
11291	}
11292	return SDValue(N, 0); // Return N so it doesn't get rechecked!
11293	}
11294	}
11295
11296	// fold (aext (truncate x))
11297	if (N0.getOpcode() == ISD::TRUNCATE)
11298	return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
11299
11300	// Fold (aext (and (trunc x), cst)) -> (and x, cst)
11301	// if the trunc is not free.
11302	if (N0.getOpcode() == ISD::AND &&
11303	N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
11304	N0.getOperand(1).getOpcode() == ISD::Constant &&
11305	!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
11306	N0.getValueType())) {
11307	SDLoc DL(N);
11308	SDValue X = N0.getOperand(0).getOperand(0);
11309	X = DAG.getAnyExtOrTrunc(X, DL, VT);
11310	APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11311	return DAG.getNode(ISD::AND, DL, VT,
11312	X, DAG.getConstant(Mask, DL, VT));
11313	}
11314
11315	// fold (aext (load x)) -> (aext (truncate (extload x)))
11316	// None of the supported targets knows how to perform load and any_ext
11317	// on vectors in one instruction, so attempt to fold to zext instead.
11318	if (VT.isVector()) {
11319	// Try to simplify (zext (load x)).
11320	if (SDValue foldedExt =
11321	tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11322	ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
11323	return foldedExt;
11324	} else if (ISD::isNON_EXTLoad(N0.getNode()) &&
11325	ISD::isUNINDEXEDLoad(N0.getNode()) &&
11326	TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
11327	bool DoXform = true;
11328	SmallVector<SDNode *, 4> SetCCs;
11329	if (!N0.hasOneUse())
11330	DoXform =
11331	ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
11332	if (DoXform) {
11333	LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11334	SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
11335	LN0->getChain(), LN0->getBasePtr(),
11336	N0.getValueType(), LN0->getMemOperand());
11337	ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
11338	// If the load value is used only by N, replace it via CombineTo N.
11339	bool NoReplaceTrunc = N0.hasOneUse();
11340	CombineTo(N, ExtLoad);
11341	if (NoReplaceTrunc) {
11342	DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11343	recursivelyDeleteUnusedNodes(LN0);
11344	} else {
11345	SDValue Trunc =
11346	DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
11347	CombineTo(LN0, Trunc, ExtLoad.getValue(1));
11348	}
11349	return SDValue(N, 0); // Return N so it doesn't get rechecked!
11350	}
11351	}
11352
11353	// fold (aext (zextload x)) -> (aext (truncate (zextload x)))
11354	// fold (aext (sextload x)) -> (aext (truncate (sextload x)))
11355	// fold (aext ( extload x)) -> (aext (truncate (extload x)))
11356	if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
11357	ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
11358	LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11359	ISD::LoadExtType ExtType = LN0->getExtensionType();
11360	EVT MemVT = LN0->getMemoryVT();
11361	if (!LegalOperations \|\| TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
11362	SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
11363	VT, LN0->getChain(), LN0->getBasePtr(),
11364	MemVT, LN0->getMemOperand());
11365	CombineTo(N, ExtLoad);
11366	DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11367	recursivelyDeleteUnusedNodes(LN0);
11368	return SDValue(N, 0); // Return N so it doesn't get rechecked!
11369	}
11370	}
11371
11372	if (N0.getOpcode() == ISD::SETCC) {
11373	// For vectors:
11374	// aext(setcc) -> vsetcc
11375	// aext(setcc) -> truncate(vsetcc)
11376	// aext(setcc) -> aext(vsetcc)
11377	// Only do this before legalize for now.
11378	if (VT.isVector() && !LegalOperations) {
11379	EVT N00VT = N0.getOperand(0).getValueType();
11380	if (getSetCCResultType(N00VT) == N0.getValueType())
11381	return SDValue();
11382
11383	// We know that the # elements of the results is the same as the
11384	// # elements of the compare (and the # elements of the compare result
11385	// for that matter). Check to see that they are the same size. If so,
11386	// we know that the element size of the sext'd result matches the
11387	// element size of the compare operands.
11388	if (VT.getSizeInBits() == N00VT.getSizeInBits())
11389	return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
11390	N0.getOperand(1),
11391	cast<CondCodeSDNode>(N0.getOperand(2))->get());
11392
11393	// If the desired elements are smaller or larger than the source
11394	// elements we can use a matching integer vector type and then
11395	// truncate/any extend
11396	EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
11397	SDValue VsetCC =
11398	DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
11399	N0.getOperand(1),
11400	cast<CondCodeSDNode>(N0.getOperand(2))->get());
11401	return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
11402	}
11403
11404	// aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
11405	SDLoc DL(N);
11406	if (SDValue SCC = SimplifySelectCC(
11407	DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
11408	DAG.getConstant(0, DL, VT),
11409	cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
11410	return SCC;
11411	}
11412
11413	if (SDValue NewCtPop = widenCtPop(N, DAG))
11414	return NewCtPop;
11415
11416	if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11417	return Res;
11418
11419	return SDValue();
11420	}
11421
11422	SDValue DAGCombiner::visitAssertExt(SDNode *N) {
11423	unsigned Opcode = N->getOpcode();
11424	SDValue N0 = N->getOperand(0);
11425	SDValue N1 = N->getOperand(1);
11426	EVT AssertVT = cast<VTSDNode>(N1)->getVT();
11427
11428	// fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
11429	if (N0.getOpcode() == Opcode &&
11430	AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
11431	return N0;
11432
11433	if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
11434	N0.getOperand(0).getOpcode() == Opcode) {
11435	// We have an assert, truncate, assert sandwich. Make one stronger assert
11436	// by asserting on the smallest asserted type to the larger source type.
11437	// This eliminates the later assert:
11438	// assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
11439	// assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
11440	SDValue BigA = N0.getOperand(0);
11441	EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
11442	assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&((BigA_AssertVT.bitsLE(N0.getValueType()) && "Asserting zero/sign-extended bits to a type larger than the " "truncated destination does not provide information") ? static_cast <void> (0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 11444, __PRETTY_FUNCTION__))
11443	"Asserting zero/sign-extended bits to a type larger than the "((BigA_AssertVT.bitsLE(N0.getValueType()) && "Asserting zero/sign-extended bits to a type larger than the " "truncated destination does not provide information") ? static_cast <void> (0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 11444, __PRETTY_FUNCTION__))
11444	"truncated destination does not provide information")((BigA_AssertVT.bitsLE(N0.getValueType()) && "Asserting zero/sign-extended bits to a type larger than the " "truncated destination does not provide information") ? static_cast <void> (0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 11444, __PRETTY_FUNCTION__));
11445
11446	SDLoc DL(N);
11447	EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
11448	SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
11449	SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
11450	BigA.getOperand(0), MinAssertVTVal);
11451	return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
11452	}
11453
11454	// If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
11455	// than X. Just move the AssertZext in front of the truncate and drop the
11456	// AssertSExt.
11457	if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
11458	N0.getOperand(0).getOpcode() == ISD::AssertSext &&
11459	Opcode == ISD::AssertZext) {
11460	SDValue BigA = N0.getOperand(0);
11461	EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
11462	assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&((BigA_AssertVT.bitsLE(N0.getValueType()) && "Asserting zero/sign-extended bits to a type larger than the " "truncated destination does not provide information") ? static_cast <void> (0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 11464, __PRETTY_FUNCTION__))
11463	"Asserting zero/sign-extended bits to a type larger than the "((BigA_AssertVT.bitsLE(N0.getValueType()) && "Asserting zero/sign-extended bits to a type larger than the " "truncated destination does not provide information") ? static_cast <void> (0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 11464, __PRETTY_FUNCTION__))
11464	"truncated destination does not provide information")((BigA_AssertVT.bitsLE(N0.getValueType()) && "Asserting zero/sign-extended bits to a type larger than the " "truncated destination does not provide information") ? static_cast <void> (0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 11464, __PRETTY_FUNCTION__));
11465
11466	if (AssertVT.bitsLT(BigA_AssertVT)) {
11467	SDLoc DL(N);
11468	SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
11469	BigA.getOperand(0), N1);
11470	return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
11471	}
11472	}
11473
11474	return SDValue();
11475	}
11476
11477	SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
11478	SDLoc DL(N);
11479
11480	Align AL = cast<AssertAlignSDNode>(N)->getAlign();
11481	SDValue N0 = N->getOperand(0);
11482
11483	// Fold (assertalign (assertalign x, AL0), AL1) ->
11484	// (assertalign x, max(AL0, AL1))
11485	if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
11486	return DAG.getAssertAlign(DL, N0.getOperand(0),
11487	std::max(AL, AAN->getAlign()));
11488
11489	// In rare cases, there are trivial arithmetic ops in source operands. Sink
11490	// this assert down to source operands so that those arithmetic ops could be
11491	// exposed to the DAG combining.
11492	switch (N0.getOpcode()) {
11493	default:
11494	break;
11495	case ISD::ADD:
11496	case ISD::SUB: {
11497	unsigned AlignShift = Log2(AL);
11498	SDValue LHS = N0.getOperand(0);
11499	SDValue RHS = N0.getOperand(1);
11500	unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
11501	unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11502	if (LHSAlignShift >= AlignShift \|\| RHSAlignShift >= AlignShift) {
11503	if (LHSAlignShift < AlignShift)
11504	LHS = DAG.getAssertAlign(DL, LHS, AL);
11505	if (RHSAlignShift < AlignShift)
11506	RHS = DAG.getAssertAlign(DL, RHS, AL);
11507	return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
11508	}
11509	break;
11510	}
11511	}
11512
11513	return SDValue();
11514	}
11515
11516	/// If the result of a wider load is shifted to right of N bits and then
11517	/// truncated to a narrower type and where N is a multiple of number of bits of
11518	/// the narrower type, transform it to a narrower load from address + N / num of
11519	/// bits of new type. Also narrow the load if the result is masked with an AND
11520	/// to effectively produce a smaller type. If the result is to be extended, also
11521	/// fold the extension to form a extending load.
11522	SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
11523	unsigned Opc = N->getOpcode();
11524
11525	ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
11526	SDValue N0 = N->getOperand(0);
11527	EVT VT = N->getValueType(0);
11528	EVT ExtVT = VT;
11529
11530	// This transformation isn't valid for vector loads.
11531	if (VT.isVector())
11532	return SDValue();
11533
11534	unsigned ShAmt = 0;
11535	bool HasShiftedOffset = false;
11536	// Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
11537	// extended to VT.
11538	if (Opc == ISD::SIGN_EXTEND_INREG) {
11539	ExtType = ISD::SEXTLOAD;
11540	ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
11541	} else if (Opc == ISD::SRL) {
11542	// Another special-case: SRL is basically zero-extending a narrower value,
11543	// or it maybe shifting a higher subword, half or byte into the lowest
11544	// bits.
11545	ExtType = ISD::ZEXTLOAD;
11546	N0 = SDValue(N, 0);
11547
11548	auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
11549	auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11550	if (!N01 \|\| !LN0)
11551	return SDValue();
11552
11553	uint64_t ShiftAmt = N01->getZExtValue();
11554	uint64_t MemoryWidth = LN0->getMemoryVT().getScalarSizeInBits();
11555	if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
11556	ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
11557	else
11558	ExtVT = EVT::getIntegerVT(*DAG.getContext(),
11559	VT.getScalarSizeInBits() - ShiftAmt);
11560	} else if (Opc == ISD::AND) {
11561	// An AND with a constant mask is the same as a truncate + zero-extend.
11562	auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
11563	if (!AndC)
11564	return SDValue();
11565
11566	const APInt &Mask = AndC->getAPIntValue();
11567	unsigned ActiveBits = 0;
11568	if (Mask.isMask()) {
11569	ActiveBits = Mask.countTrailingOnes();
11570	} else if (Mask.isShiftedMask()) {
11571	ShAmt = Mask.countTrailingZeros();
11572	APInt ShiftedMask = Mask.lshr(ShAmt);
11573	ActiveBits = ShiftedMask.countTrailingOnes();
11574	HasShiftedOffset = true;
11575	} else
11576	return SDValue();
11577
11578	ExtType = ISD::ZEXTLOAD;
11579	ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
11580	}
11581
11582	if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
11583	SDValue SRL = N0;
11584	if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
11585	ShAmt = ConstShift->getZExtValue();
11586	unsigned EVTBits = ExtVT.getScalarSizeInBits();
11587	// Is the shift amount a multiple of size of VT?
11588	if ((ShAmt & (EVTBits-1)) == 0) {
11589	N0 = N0.getOperand(0);
11590	// Is the load width a multiple of size of VT?
11591	if ((N0.getScalarValueSizeInBits() & (EVTBits - 1)) != 0)
11592	return SDValue();
11593	}
11594
11595	// At this point, we must have a load or else we can't do the transform.
11596	auto *LN0 = dyn_cast<LoadSDNode>(N0);
11597	if (!LN0) return SDValue();
11598
11599	// Because a SRL must be assumed to need to zero-extend the high bits
11600	// (as opposed to anyext the high bits), we can't combine the zextload
11601	// lowering of SRL and an sextload.
11602	if (LN0->getExtensionType() == ISD::SEXTLOAD)
11603	return SDValue();
11604
11605	// If the shift amount is larger than the input type then we're not
11606	// accessing any of the loaded bytes. If the load was a zextload/extload
11607	// then the result of the shift+trunc is zero/undef (handled elsewhere).
11608	if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
11609	return SDValue();
11610
11611	// If the SRL is only used by a masking AND, we may be able to adjust
11612	// the ExtVT to make the AND redundant.
11613	SDNode Mask = (SRL->use_begin());
11614	if (Mask->getOpcode() == ISD::AND &&
11615	isa<ConstantSDNode>(Mask->getOperand(1))) {
11616	const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
11617	if (ShiftMask.isMask()) {
11618	EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
11619	ShiftMask.countTrailingOnes());
11620	// If the mask is smaller, recompute the type.
11621	if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
11622	TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
11623	ExtVT = MaskedVT;
11624	}
11625	}
11626	}
11627	}
11628
11629	// If the load is shifted left (and the result isn't shifted back right),
11630	// we can fold the truncate through the shift.
11631	unsigned ShLeftAmt = 0;
11632	if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
11633	ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
11634	if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
11635	ShLeftAmt = N01->getZExtValue();
11636	N0 = N0.getOperand(0);
11637	}
11638	}
11639
11640	// If we haven't found a load, we can't narrow it.
11641	if (!isa<LoadSDNode>(N0))
11642	return SDValue();
11643
11644	LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11645	// Reducing the width of a volatile load is illegal. For atomics, we may be
11646	// able to reduce the width provided we never widen again. (see D66309)
11647	if (!LN0->isSimple() \|\|
11648	!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
11649	return SDValue();
11650
11651	auto AdjustBigEndianShift = [&](unsigned ShAmt) {
11652	unsigned LVTStoreBits =
11653	LN0->getMemoryVT().getStoreSizeInBits().getFixedSize();
11654	unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedSize();
11655	return LVTStoreBits - EVTStoreBits - ShAmt;
11656	};
11657
11658	// For big endian targets, we need to adjust the offset to the pointer to
11659	// load the correct bytes.
11660	if (DAG.getDataLayout().isBigEndian())
11661	ShAmt = AdjustBigEndianShift(ShAmt);
11662
11663	uint64_t PtrOff = ShAmt / 8;
11664	Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff);
11665	SDLoc DL(LN0);
11666	// The original load itself didn't wrap, so an offset within it doesn't.
11667	SDNodeFlags Flags;
11668	Flags.setNoUnsignedWrap(true);
11669	SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(),
11670	TypeSize::Fixed(PtrOff), DL, Flags);
11671	AddToWorklist(NewPtr.getNode());
11672
11673	SDValue Load;
11674	if (ExtType == ISD::NON_EXTLOAD)
11675	Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
11676	LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11677	LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
11678	else
11679	Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
11680	LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
11681	NewAlign, LN0->getMemOperand()->getFlags(),
11682	LN0->getAAInfo());
11683
11684	// Replace the old load's chain with the new load's chain.
11685	WorklistRemover DeadNodes(*this);
11686	DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
11687
11688	// Shift the result left, if we've swallowed a left shift.
11689	SDValue Result = Load;
11690	if (ShLeftAmt != 0) {
11691	EVT ShImmTy = getShiftAmountTy(Result.getValueType());
11692	if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
11693	ShImmTy = VT;
11694	// If the shift amount is as large as the result size (but, presumably,
11695	// no larger than the source) then the useful bits of the result are
11696	// zero; we can't simply return the shortened shift, because the result
11697	// of that operation is undefined.
11698	if (ShLeftAmt >= VT.getScalarSizeInBits())
11699	Result = DAG.getConstant(0, DL, VT);
11700	else
11701	Result = DAG.getNode(ISD::SHL, DL, VT,
11702	Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
11703	}
11704
11705	if (HasShiftedOffset) {
11706	// Recalculate the shift amount after it has been altered to calculate
11707	// the offset.
11708	if (DAG.getDataLayout().isBigEndian())
11709	ShAmt = AdjustBigEndianShift(ShAmt);
11710
11711	// We're using a shifted mask, so the load now has an offset. This means
11712	// that data has been loaded into the lower bytes than it would have been
11713	// before, so we need to shl the loaded data into the correct position in the
11714	// register.
11715	SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
11716	Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
11717	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
11718	}
11719
11720	// Return the new loaded value.
11721	return Result;
11722	}
11723
11724	SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
11725	SDValue N0 = N->getOperand(0);
11726	SDValue N1 = N->getOperand(1);
11727	EVT VT = N->getValueType(0);
11728	EVT ExtVT = cast<VTSDNode>(N1)->getVT();
11729	unsigned VTBits = VT.getScalarSizeInBits();
11730	unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
11731
11732	// sext_vector_inreg(undef) = 0 because the top bit will all be the same.
11733	if (N0.isUndef())
11734	return DAG.getConstant(0, SDLoc(N), VT);
11735
11736	// fold (sext_in_reg c1) -> c1
11737	if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
11738	return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
11739
11740	// If the input is already sign extended, just drop the extension.
11741	if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1))
11742	return N0;
11743
11744	// fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
11745	if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
11746	ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
11747	return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
11748	N1);
11749
11750	// fold (sext_in_reg (sext x)) -> (sext x)
11751	// fold (sext_in_reg (aext x)) -> (sext x)
11752	// if x is small enough or if we know that x has more than 1 sign bit and the
11753	// sign_extend_inreg is extending from one of them.
11754	if (N0.getOpcode() == ISD::SIGN_EXTEND \|\| N0.getOpcode() == ISD::ANY_EXTEND) {
11755	SDValue N00 = N0.getOperand(0);
11756	unsigned N00Bits = N00.getScalarValueSizeInBits();
11757	if ((N00Bits <= ExtVTBits \|\|
11758	(N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) &&
11759	(!LegalOperations \|\| TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
11760	return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
11761	}
11762
11763	// fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
11764	if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG \|\|
11765	N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG \|\|
11766	N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
11767	N0.getOperand(0).getScalarValueSizeInBits() == ExtVTBits) {
11768	if (!LegalOperations \|\|
11769	TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
11770	return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
11771	N0.getOperand(0));
11772	}
11773
11774	// fold (sext_in_reg (zext x)) -> (sext x)
11775	// iff we are extending the source sign bit.
11776	if (N0.getOpcode() == ISD::ZERO_EXTEND) {
11777	SDValue N00 = N0.getOperand(0);
11778	if (N00.getScalarValueSizeInBits() == ExtVTBits &&
11779	(!LegalOperations \|\| TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
11780	return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
11781	}
11782
11783	// fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
11784	if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
11785	return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
11786
11787	// fold operands of sext_in_reg based on knowledge that the top bits are not
11788	// demanded.
11789	if (SimplifyDemandedBits(SDValue(N, 0)))
11790	return SDValue(N, 0);
11791
11792	// fold (sext_in_reg (load x)) -> (smaller sextload x)
11793	// fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
11794	if (SDValue NarrowLoad = ReduceLoadWidth(N))
11795	return NarrowLoad;
11796
11797	// fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
11798	// fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
11799	// We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
11800	if (N0.getOpcode() == ISD::SRL) {
11801	if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
11802	if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
11803	// We can turn this into an SRA iff the input to the SRL is already sign
11804	// extended enough.
11805	unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
11806	if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
11807	return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
11808	N0.getOperand(1));
11809	}
11810	}
11811
11812	// fold (sext_inreg (extload x)) -> (sextload x)
11813	// If sextload is not supported by target, we can only do the combine when
11814	// load has one use. Doing otherwise can block folding the extload with other
11815	// extends that the target does support.
11816	if (ISD::isEXTLoad(N0.getNode()) &&
11817	ISD::isUNINDEXEDLoad(N0.getNode()) &&
11818	ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
11819	((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
11820	N0.hasOneUse()) \|\|
11821	TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
11822	LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11823	SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
11824	LN0->getChain(),
11825	LN0->getBasePtr(), ExtVT,
11826	LN0->getMemOperand());
11827	CombineTo(N, ExtLoad);
11828	CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
11829	AddToWorklist(ExtLoad.getNode());
11830	return SDValue(N, 0); // Return N so it doesn't get rechecked!
11831	}
11832	// fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
11833	if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
11834	N0.hasOneUse() &&
11835	ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
11836	((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
11837	TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
11838	LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11839	SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
11840	LN0->getChain(),
11841	LN0->getBasePtr(), ExtVT,
11842	LN0->getMemOperand());
11843	CombineTo(N, ExtLoad);
11844	CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
11845	return SDValue(N, 0); // Return N so it doesn't get rechecked!
11846	}
11847
11848	// fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
11849	// ignore it if the masked load is already sign extended
11850	if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
11851	if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
11852	Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
11853	TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
11854	SDValue ExtMaskedLoad = DAG.getMaskedLoad(
11855	VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
11856	Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
11857	Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
11858	CombineTo(N, ExtMaskedLoad);
11859	CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
11860	return SDValue(N, 0); // Return N so it doesn't get rechecked!
11861	}
11862	}
11863
11864	// fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
11865	if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
11866	if (SDValue(GN0, 0).hasOneUse() &&
11867	ExtVT == GN0->getMemoryVT() &&
11868	TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
11869	SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
11870	GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
11871
11872	SDValue ExtLoad = DAG.getMaskedGather(
11873	DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
11874	GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
11875
11876	CombineTo(N, ExtLoad);
11877	CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
11878	AddToWorklist(ExtLoad.getNode());
11879	return SDValue(N, 0); // Return N so it doesn't get rechecked!
11880	}
11881	}
11882
11883	// Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
11884	if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
11885	if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
11886	N0.getOperand(1), false))
11887	return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
11888	}
11889
11890	return SDValue();
11891	}
11892
11893	SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
11894	SDValue N0 = N->getOperand(0);
11895	EVT VT = N->getValueType(0);
11896
11897	// sext_vector_inreg(undef) = 0 because the top bit will all be the same.
11898	if (N0.isUndef())
11899	return DAG.getConstant(0, SDLoc(N), VT);
11900
11901	if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11902	return Res;
11903
11904	if (SimplifyDemandedVectorElts(SDValue(N, 0)))
11905	return SDValue(N, 0);
11906
11907	return SDValue();
11908	}
11909
11910	SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
11911	SDValue N0 = N->getOperand(0);
11912	EVT VT = N->getValueType(0);
11913
11914	// zext_vector_inreg(undef) = 0 because the top bits will be zero.
11915	if (N0.isUndef())
11916	return DAG.getConstant(0, SDLoc(N), VT);
11917
11918	if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11919	return Res;
11920
11921	if (SimplifyDemandedVectorElts(SDValue(N, 0)))
11922	return SDValue(N, 0);
11923
11924	return SDValue();
11925	}
11926
11927	SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
11928	SDValue N0 = N->getOperand(0);
11929	EVT VT = N->getValueType(0);
11930	EVT SrcVT = N0.getValueType();
11931	bool isLE = DAG.getDataLayout().isLittleEndian();
11932
11933	// noop truncate
11934	if (SrcVT == VT)
11935	return N0;
11936
11937	// fold (truncate (truncate x)) -> (truncate x)
11938	if (N0.getOpcode() == ISD::TRUNCATE)
11939	return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
11940
11941	// fold (truncate c1) -> c1
11942	if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
11943	SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
11944	if (C.getNode() != N)
11945	return C;
11946	}
11947
11948	// fold (truncate (ext x)) -> (ext x) or (truncate x) or x
11949	if (N0.getOpcode() == ISD::ZERO_EXTEND \|\|
11950	N0.getOpcode() == ISD::SIGN_EXTEND \|\|
11951	N0.getOpcode() == ISD::ANY_EXTEND) {
11952	// if the source is smaller than the dest, we still need an extend.
11953	if (N0.getOperand(0).getValueType().bitsLT(VT))
11954	return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
11955	// if the source is larger than the dest, than we just need the truncate.
11956	if (N0.getOperand(0).getValueType().bitsGT(VT))
11957	return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
11958	// if the source and dest are the same type, we can drop both the extend
11959	// and the truncate.
11960	return N0.getOperand(0);
11961	}
11962
11963	// If this is anyext(trunc), don't fold it, allow ourselves to be folded.
11964	if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
11965	return SDValue();
11966
11967	// Fold extract-and-trunc into a narrow extract. For example:
11968	// i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
11969	// i32 y = TRUNCATE(i64 x)
11970	// -- becomes --
11971	// v16i8 b = BITCAST (v2i64 val)
11972	// i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
11973	//
11974	// Note: We only run this optimization after type legalization (which often
11975	// creates this pattern) and before operation legalization after which
11976	// we need to be more careful about the vector instructions that we generate.
11977	if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
11978	LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
11979	EVT VecTy = N0.getOperand(0).getValueType();
11980	EVT ExTy = N0.getValueType();
11981	EVT TrTy = N->getValueType(0);
11982
11983	auto EltCnt = VecTy.getVectorElementCount();
11984	unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
11985	auto NewEltCnt = EltCnt * SizeRatio;
11986
11987	EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
11988	assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size")((NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size" ) ? static_cast<void> (0) : __assert_fail ("NVT.getSizeInBits() == VecTy.getSizeInBits() && \"Invalid Size\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 11988, __PRETTY_FUNCTION__));
11989
11990	SDValue EltNo = N0->getOperand(1);
11991	if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
11992	int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
11993	int Index = isLE ? (EltSizeRatio) : (EltSizeRatio + (SizeRatio-1));
11994
11995	SDLoc DL(N);
11996	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
11997	DAG.getBitcast(NVT, N0.getOperand(0)),
11998	DAG.getVectorIdxConstant(Index, DL));
11999	}
12000	}
12001
12002	// trunc (select c, a, b) -> select c, (trunc a), (trunc b)
12003	if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
12004	if ((!LegalOperations \|\| TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
12005	TLI.isTruncateFree(SrcVT, VT)) {
12006	SDLoc SL(N0);
12007	SDValue Cond = N0.getOperand(0);
12008	SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
12009	SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
12010	return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
12011	}
12012	}
12013
12014	// trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
12015	if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
12016	(!LegalOperations \|\| TLI.isOperationLegal(ISD::SHL, VT)) &&
12017	TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
12018	SDValue Amt = N0.getOperand(1);
12019	KnownBits Known = DAG.computeKnownBits(Amt);
12020	unsigned Size = VT.getScalarSizeInBits();
12021	if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
12022	SDLoc SL(N);
12023	EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
12024
12025	SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
12026	if (AmtVT != Amt.getValueType()) {
12027	Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
12028	AddToWorklist(Amt.getNode());
12029	}
12030	return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
12031	}
12032	}
12033
12034	if (SDValue V = foldSubToUSubSat(VT, N0.getNode()))
12035	return V;
12036
12037	// Attempt to pre-truncate BUILD_VECTOR sources.
12038	if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
12039	TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
12040	// Avoid creating illegal types if running after type legalizer.
12041	(!LegalTypes \|\| TLI.isTypeLegal(VT.getScalarType()))) {
12042	SDLoc DL(N);
12043	EVT SVT = VT.getScalarType();
12044	SmallVector<SDValue, 8> TruncOps;
12045	for (const SDValue &Op : N0->op_values()) {
12046	SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
12047	TruncOps.push_back(TruncOp);
12048	}
12049	return DAG.getBuildVector(VT, DL, TruncOps);
12050	}
12051
12052	// Fold a series of buildvector, bitcast, and truncate if possible.
12053	// For example fold
12054	// (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
12055	// (2xi32 (buildvector x, y)).
12056	if (Level == AfterLegalizeVectorOps && VT.isVector() &&
12057	N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
12058	N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
12059	N0.getOperand(0).hasOneUse()) {
12060	SDValue BuildVect = N0.getOperand(0);
12061	EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
12062	EVT TruncVecEltTy = VT.getVectorElementType();
12063
12064	// Check that the element types match.
12065	if (BuildVectEltTy == TruncVecEltTy) {
12066	// Now we only need to compute the offset of the truncated elements.
12067	unsigned BuildVecNumElts = BuildVect.getNumOperands();
12068	unsigned TruncVecNumElts = VT.getVectorNumElements();
12069	unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
12070
12071	assert((BuildVecNumElts % TruncVecNumElts) == 0 &&(((BuildVecNumElts % TruncVecNumElts) == 0 && "Invalid number of elements" ) ? static_cast<void> (0) : __assert_fail ("(BuildVecNumElts % TruncVecNumElts) == 0 && \"Invalid number of elements\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 12072, __PRETTY_FUNCTION__))
12072	"Invalid number of elements")(((BuildVecNumElts % TruncVecNumElts) == 0 && "Invalid number of elements" ) ? static_cast<void> (0) : __assert_fail ("(BuildVecNumElts % TruncVecNumElts) == 0 && \"Invalid number of elements\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 12072, __PRETTY_FUNCTION__));
12073
12074	SmallVector<SDValue, 8> Opnds;
12075	for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
12076	Opnds.push_back(BuildVect.getOperand(i));
12077
12078	return DAG.getBuildVector(VT, SDLoc(N), Opnds);
12079	}
12080	}
12081
12082	// See if we can simplify the input to this truncate through knowledge that
12083	// only the low bits are being used.
12084	// For example "trunc (or (shl x, 8), y)" // -> trunc y
12085	// Currently we only perform this optimization on scalars because vectors
12086	// may have different active low bits.
12087	if (!VT.isVector()) {
12088	APInt Mask =
12089	APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
12090	if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
12091	return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
12092	}
12093
12094	// fold (truncate (load x)) -> (smaller load x)
12095	// fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
12096	if (!LegalTypes \|\| TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
12097	if (SDValue Reduced = ReduceLoadWidth(N))
12098	return Reduced;
12099
12100	// Handle the case where the load remains an extending load even
12101	// after truncation.
12102	if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
12103	LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12104	if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) {
12105	SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
12106	VT, LN0->getChain(), LN0->getBasePtr(),
12107	LN0->getMemoryVT(),
12108	LN0->getMemOperand());
12109	DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
12110	return NewLoad;
12111	}
12112	}
12113	}
12114
12115	// fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
12116	// where ... are all 'undef'.
12117	if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
12118	SmallVector<EVT, 8> VTs;
12119	SDValue V;
12120	unsigned Idx = 0;
12121	unsigned NumDefs = 0;
12122
12123	for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
12124	SDValue X = N0.getOperand(i);
12125	if (!X.isUndef()) {
12126	V = X;
12127	Idx = i;
12128	NumDefs++;
12129	}
12130	// Stop if more than one members are non-undef.
12131	if (NumDefs > 1)
12132	break;
12133
12134	VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
12135	VT.getVectorElementType(),
12136	X.getValueType().getVectorElementCount()));
12137	}
12138
12139	if (NumDefs == 0)
12140	return DAG.getUNDEF(VT);
12141
12142	if (NumDefs == 1) {
12143	assert(V.getNode() && "The single defined operand is empty!")((V.getNode() && "The single defined operand is empty!" ) ? static_cast<void> (0) : __assert_fail ("V.getNode() && \"The single defined operand is empty!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 12143, __PRETTY_FUNCTION__));
12144	SmallVector<SDValue, 8> Opnds;
12145	for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
12146	if (i != Idx) {
12147	Opnds.push_back(DAG.getUNDEF(VTs[i]));
12148	continue;
12149	}
12150	SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
12151	AddToWorklist(NV.getNode());
12152	Opnds.push_back(NV);
12153	}
12154	return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
12155	}
12156	}
12157
12158	// Fold truncate of a bitcast of a vector to an extract of the low vector
12159	// element.
12160	//
12161	// e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
12162	if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
12163	SDValue VecSrc = N0.getOperand(0);
12164	EVT VecSrcVT = VecSrc.getValueType();
12165	if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
12166	(!LegalOperations \|\|
12167	TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
12168	SDLoc SL(N);
12169
12170	unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
12171	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
12172	DAG.getVectorIdxConstant(Idx, SL));
12173	}
12174	}
12175
12176	// Simplify the operands using demanded-bits information.
12177	if (SimplifyDemandedBits(SDValue(N, 0)))
12178	return SDValue(N, 0);
12179
12180	// (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
12181	// (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
12182	// When the adde's carry is not used.
12183	if ((N0.getOpcode() == ISD::ADDE \|\| N0.getOpcode() == ISD::ADDCARRY) &&
12184	N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
12185	// We only do for addcarry before legalize operation
12186	((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) \|\|
12187	TLI.isOperationLegal(N0.getOpcode(), VT))) {
12188	SDLoc SL(N);
12189	auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
12190	auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
12191	auto VTs = DAG.getVTList(VT, N0->getValueType(1));
12192	return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
12193	}
12194
12195	// fold (truncate (extract_subvector(ext x))) ->
12196	// (extract_subvector x)
12197	// TODO: This can be generalized to cover cases where the truncate and extract
12198	// do not fully cancel each other out.
12199	if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
12200	SDValue N00 = N0.getOperand(0);
12201	if (N00.getOpcode() == ISD::SIGN_EXTEND \|\|
12202	N00.getOpcode() == ISD::ZERO_EXTEND \|\|
12203	N00.getOpcode() == ISD::ANY_EXTEND) {
12204	if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
12205	VT.getVectorElementType())
12206	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
12207	N00.getOperand(0), N0.getOperand(1));
12208	}
12209	}
12210
12211	if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12212	return NewVSel;
12213
12214	// Narrow a suitable binary operation with a non-opaque constant operand by
12215	// moving it ahead of the truncate. This is limited to pre-legalization
12216	// because targets may prefer a wider type during later combines and invert
12217	// this transform.
12218	switch (N0.getOpcode()) {
12219	case ISD::ADD:
12220	case ISD::SUB:
12221	case ISD::MUL:
12222	case ISD::AND:
12223	case ISD::OR:
12224	case ISD::XOR:
12225	if (!LegalOperations && N0.hasOneUse() &&
12226	(isConstantOrConstantVector(N0.getOperand(0), true) \|\|
12227	isConstantOrConstantVector(N0.getOperand(1), true))) {
12228	// TODO: We already restricted this to pre-legalization, but for vectors
12229	// we are extra cautious to not create an unsupported operation.
12230	// Target-specific changes are likely needed to avoid regressions here.
12231	if (VT.isScalarInteger() \|\| TLI.isOperationLegal(N0.getOpcode(), VT)) {
12232	SDLoc DL(N);
12233	SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
12234	SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
12235	return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
12236	}
12237	}
12238	break;
12239	case ISD::USUBSAT:
12240	// Truncate the USUBSAT only if LHS is a known zero-extension, its not
12241	// enough to know that the upper bits are zero we must ensure that we don't
12242	// introduce an extra truncate.
12243	if (!LegalOperations && N0.hasOneUse() &&
12244	N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
12245	N0.getOperand(0).getOperand(0).getScalarValueSizeInBits() <=
12246	VT.getScalarSizeInBits() &&
12247	hasOperation(N0.getOpcode(), VT)) {
12248	return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
12249	DAG, SDLoc(N));
12250	}
12251	break;
12252	}
12253
12254	return SDValue();
12255	}
12256
12257	static SDNode getBuildPairElt(SDNode N, unsigned i) {
12258	SDValue Elt = N->getOperand(i);
12259	if (Elt.getOpcode() != ISD::MERGE_VALUES)
12260	return Elt.getNode();
12261	return Elt.getOperand(Elt.getResNo()).getNode();
12262	}
12263
12264	/// build_pair (load, load) -> load
12265	/// if load locations are consecutive.
12266	SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
12267	assert(N->getOpcode() == ISD::BUILD_PAIR)((N->getOpcode() == ISD::BUILD_PAIR) ? static_cast<void > (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_PAIR" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 12267, __PRETTY_FUNCTION__));
12268
12269	LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
12270	LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
12271
12272	// A BUILD_PAIR is always having the least significant part in elt 0 and the
12273	// most significant part in elt 1. So when combining into one large load, we
12274	// need to consider the endianness.
12275	if (DAG.getDataLayout().isBigEndian())
12276	std::swap(LD1, LD2);
12277
12278	if (!LD1 \|\| !LD2 \|\| !ISD::isNON_EXTLoad(LD1) \|\| !LD1->hasOneUse() \|\|
12279	LD1->getAddressSpace() != LD2->getAddressSpace())
12280	return SDValue();
12281	EVT LD1VT = LD1->getValueType(0);
12282	unsigned LD1Bytes = LD1VT.getStoreSize();
12283	if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
12284	DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
12285	Align Alignment = LD1->getAlign();
12286	Align NewAlign = DAG.getDataLayout().getABITypeAlign(
12287	VT.getTypeForEVT(*DAG.getContext()));
12288
12289	if (NewAlign <= Alignment &&
12290	(!LegalOperations \|\| TLI.isOperationLegal(ISD::LOAD, VT)))
12291	return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
12292	LD1->getPointerInfo(), Alignment);
12293	}
12294
12295	return SDValue();
12296	}
12297
12298	static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
12299	// On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
12300	// and Lo parts; on big-endian machines it doesn't.
12301	return DAG.getDataLayout().isBigEndian() ? 1 : 0;
12302	}
12303
12304	static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
12305	const TargetLowering &TLI) {
12306	// If this is not a bitcast to an FP type or if the target doesn't have
12307	// IEEE754-compliant FP logic, we're done.
12308	EVT VT = N->getValueType(0);
12309	if (!VT.isFloatingPoint() \|\| !TLI.hasBitPreservingFPLogic(VT))
12310	return SDValue();
12311
12312	// TODO: Handle cases where the integer constant is a different scalar
12313	// bitwidth to the FP.
12314	SDValue N0 = N->getOperand(0);
12315	EVT SourceVT = N0.getValueType();
12316	if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
12317	return SDValue();
12318
12319	unsigned FPOpcode;
12320	APInt SignMask;
12321	switch (N0.getOpcode()) {
12322	case ISD::AND:
12323	FPOpcode = ISD::FABS;
12324	SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
12325	break;
12326	case ISD::XOR:
12327	FPOpcode = ISD::FNEG;
12328	SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
12329	break;
12330	case ISD::OR:
12331	FPOpcode = ISD::FABS;
12332	SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
12333	break;
12334	default:
12335	return SDValue();
12336	}
12337
12338	// Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
12339	// Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
12340	// Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
12341	// fneg (fabs X)
12342	SDValue LogicOp0 = N0.getOperand(0);
12343	ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
12344	if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
12345	LogicOp0.getOpcode() == ISD::BITCAST &&
12346	LogicOp0.getOperand(0).getValueType() == VT) {
12347	SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
12348	NumFPLogicOpsConv++;
12349	if (N0.getOpcode() == ISD::OR)
12350	return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
12351	return FPOp;
12352	}
12353
12354	return SDValue();
12355	}
12356
12357	SDValue DAGCombiner::visitBITCAST(SDNode *N) {
12358	SDValue N0 = N->getOperand(0);
12359	EVT VT = N->getValueType(0);
12360
12361	if (N0.isUndef())
12362	return DAG.getUNDEF(VT);
12363
12364	// If the input is a BUILD_VECTOR with all constant elements, fold this now.
12365	// Only do this before legalize types, unless both types are integer and the
12366	// scalar type is legal. Only do this before legalize ops, since the target
12367	// maybe depending on the bitcast.
12368	// First check to see if this is all constant.
12369	// TODO: Support FP bitcasts after legalize types.
12370	if (VT.isVector() &&
12371	(!LegalTypes \|\|
12372	(!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
12373	TLI.isTypeLegal(VT.getVectorElementType()))) &&
12374	N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
12375	cast<BuildVectorSDNode>(N0)->isConstant())
12376	return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
12377	VT.getVectorElementType());
12378
12379	// If the input is a constant, let getNode fold it.
12380	if (isa<ConstantSDNode>(N0) \|\| isa<ConstantFPSDNode>(N0)) {
12381	// If we can't allow illegal operations, we need to check that this is just
12382	// a fp -> int or int -> conversion and that the resulting operation will
12383	// be legal.
12384	if (!LegalOperations \|\|
12385	(isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
12386	TLI.isOperationLegal(ISD::ConstantFP, VT)) \|\|
12387	(isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
12388	TLI.isOperationLegal(ISD::Constant, VT))) {
12389	SDValue C = DAG.getBitcast(VT, N0);
12390	if (C.getNode() != N)
12391	return C;
12392	}
12393	}
12394
12395	// (conv (conv x, t1), t2) -> (conv x, t2)
12396	if (N0.getOpcode() == ISD::BITCAST)
12397	return DAG.getBitcast(VT, N0.getOperand(0));
12398
12399	// fold (conv (load x)) -> (load (conv*)x)
12400	// If the resultant load doesn't need a higher alignment than the original!
12401	if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
12402	// Do not remove the cast if the types differ in endian layout.
12403	TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
12404	TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
12405	// If the load is volatile, we only want to change the load type if the
12406	// resulting load is legal. Otherwise we might increase the number of
12407	// memory accesses. We don't care if the original type was legal or not
12408	// as we assume software couldn't rely on the number of accesses of an
12409	// illegal type.
12410	((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) \|\|
12411	TLI.isOperationLegal(ISD::LOAD, VT))) {
12412	LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12413
12414	if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
12415	*LN0->getMemOperand())) {
12416	SDValue Load =
12417	DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
12418	LN0->getPointerInfo(), LN0->getAlign(),
12419	LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
12420	DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
12421	return Load;
12422	}
12423	}
12424
12425	if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
12426	return V;
12427
12428	// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
12429	// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
12430	//
12431	// For ppc_fp128:
12432	// fold (bitcast (fneg x)) ->
12433	// flipbit = signbit
12434	// (xor (bitcast x) (build_pair flipbit, flipbit))
12435	//
12436	// fold (bitcast (fabs x)) ->
12437	// flipbit = (and (extract_element (bitcast x), 0), signbit)
12438	// (xor (bitcast x) (build_pair flipbit, flipbit))
12439	// This often reduces constant pool loads.
12440	if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) \|\|
12441	(N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
12442	N0.getNode()->hasOneUse() && VT.isInteger() &&
12443	!VT.isVector() && !N0.getValueType().isVector()) {
12444	SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
12445	AddToWorklist(NewConv.getNode());
12446
12447	SDLoc DL(N);
12448	if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
12449	assert(VT.getSizeInBits() == 128)((VT.getSizeInBits() == 128) ? static_cast<void> (0) : __assert_fail ("VT.getSizeInBits() == 128", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 12449, __PRETTY_FUNCTION__));
12450	SDValue SignBit = DAG.getConstant(
12451	APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
12452	SDValue FlipBit;
12453	if (N0.getOpcode() == ISD::FNEG) {
12454	FlipBit = SignBit;
12455	AddToWorklist(FlipBit.getNode());
12456	} else {
12457	assert(N0.getOpcode() == ISD::FABS)((N0.getOpcode() == ISD::FABS) ? static_cast<void> (0) : __assert_fail ("N0.getOpcode() == ISD::FABS", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 12457, __PRETTY_FUNCTION__));
12458	SDValue Hi =
12459	DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
12460	DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
12461	SDLoc(NewConv)));
12462	AddToWorklist(Hi.getNode());
12463	FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
12464	AddToWorklist(FlipBit.getNode());
12465	}
12466	SDValue FlipBits =
12467	DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
12468	AddToWorklist(FlipBits.getNode());
12469	return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
12470	}
12471	APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
12472	if (N0.getOpcode() == ISD::FNEG)
12473	return DAG.getNode(ISD::XOR, DL, VT,
12474	NewConv, DAG.getConstant(SignBit, DL, VT));
12475	assert(N0.getOpcode() == ISD::FABS)((N0.getOpcode() == ISD::FABS) ? static_cast<void> (0) : __assert_fail ("N0.getOpcode() == ISD::FABS", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 12475, __PRETTY_FUNCTION__));
12476	return DAG.getNode(ISD::AND, DL, VT,
12477	NewConv, DAG.getConstant(~SignBit, DL, VT));
12478	}
12479
12480	// fold (bitconvert (fcopysign cst, x)) ->
12481	// (or (and (bitconvert x), sign), (and cst, (not sign)))
12482	// Note that we don't handle (copysign x, cst) because this can always be
12483	// folded to an fneg or fabs.
12484	//
12485	// For ppc_fp128:
12486	// fold (bitcast (fcopysign cst, x)) ->
12487	// flipbit = (and (extract_element
12488	// (xor (bitcast cst), (bitcast x)), 0),
12489	// signbit)
12490	// (xor (bitcast cst) (build_pair flipbit, flipbit))
12491	if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
12492	isa<ConstantFPSDNode>(N0.getOperand(0)) &&
12493	VT.isInteger() && !VT.isVector()) {
12494	unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
12495	EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
12496	if (isTypeLegal(IntXVT)) {
12497	SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
12498	AddToWorklist(X.getNode());
12499
12500	// If X has a different width than the result/lhs, sext it or truncate it.
12501	unsigned VTWidth = VT.getSizeInBits();
12502	if (OrigXWidth < VTWidth) {
12503	X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
12504	AddToWorklist(X.getNode());
12505	} else if (OrigXWidth > VTWidth) {
12506	// To get the sign bit in the right place, we have to shift it right
12507	// before truncating.
12508	SDLoc DL(X);
12509	X = DAG.getNode(ISD::SRL, DL,
12510	X.getValueType(), X,
12511	DAG.getConstant(OrigXWidth-VTWidth, DL,
12512	X.getValueType()));
12513	AddToWorklist(X.getNode());
12514	X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
12515	AddToWorklist(X.getNode());
12516	}
12517
12518	if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
12519	APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
12520	SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
12521	AddToWorklist(Cst.getNode());
12522	SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
12523	AddToWorklist(X.getNode());
12524	SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
12525	AddToWorklist(XorResult.getNode());
12526	SDValue XorResult64 = DAG.getNode(
12527	ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
12528	DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
12529	SDLoc(XorResult)));
12530	AddToWorklist(XorResult64.getNode());
12531	SDValue FlipBit =
12532	DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
12533	DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
12534	AddToWorklist(FlipBit.getNode());
12535	SDValue FlipBits =
12536	DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
12537	AddToWorklist(FlipBits.getNode());
12538	return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
12539	}
12540	APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
12541	X = DAG.getNode(ISD::AND, SDLoc(X), VT,
12542	X, DAG.getConstant(SignBit, SDLoc(X), VT));
12543	AddToWorklist(X.getNode());
12544
12545	SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
12546	Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
12547	Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
12548	AddToWorklist(Cst.getNode());
12549
12550	return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
12551	}
12552	}
12553
12554	// bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
12555	if (N0.getOpcode() == ISD::BUILD_PAIR)
12556	if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
12557	return CombineLD;
12558
12559	// Remove double bitcasts from shuffles - this is often a legacy of
12560	// XformToShuffleWithZero being used to combine bitmaskings (of
12561	// float vectors bitcast to integer vectors) into shuffles.
12562	// bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
12563	if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
12564	N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
12565	VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
12566	!(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
12567	ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
12568
12569	// If operands are a bitcast, peek through if it casts the original VT.
12570	// If operands are a constant, just bitcast back to original VT.
12571	auto PeekThroughBitcast = [&](SDValue Op) {
12572	if (Op.getOpcode() == ISD::BITCAST &&
12573	Op.getOperand(0).getValueType() == VT)
12574	return SDValue(Op.getOperand(0));
12575	if (Op.isUndef() \|\| ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) \|\|
12576	ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
12577	return DAG.getBitcast(VT, Op);
12578	return SDValue();
12579	};
12580
12581	// FIXME: If either input vector is bitcast, try to convert the shuffle to
12582	// the result type of this bitcast. This would eliminate at least one
12583	// bitcast. See the transform in InstCombine.
12584	SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
12585	SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
12586	if (!(SV0 && SV1))
12587	return SDValue();
12588
12589	int MaskScale =
12590	VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
12591	SmallVector<int, 8> NewMask;
12592	for (int M : SVN->getMask())
12593	for (int i = 0; i != MaskScale; ++i)
12594	NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
12595
12596	SDValue LegalShuffle =
12597	TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
12598	if (LegalShuffle)
12599	return LegalShuffle;
12600	}
12601
12602	return SDValue();
12603	}
12604
12605	SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
12606	EVT VT = N->getValueType(0);
12607	return CombineConsecutiveLoads(N, VT);
12608	}
12609
12610	SDValue DAGCombiner::visitFREEZE(SDNode *N) {
12611	SDValue N0 = N->getOperand(0);
12612
12613	// (freeze (freeze x)) -> (freeze x)
12614	if (N0.getOpcode() == ISD::FREEZE)
12615	return N0;
12616
12617	// If the input is a constant, return it.
12618	if (isa<ConstantSDNode>(N0) \|\| isa<ConstantFPSDNode>(N0))
12619	return N0;
12620
12621	return SDValue();
12622	}
12623
12624	/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
12625	/// operands. DstEltVT indicates the destination element value type.
12626	SDValue DAGCombiner::
12627	ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
12628	EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
12629
12630	// If this is already the right type, we're done.
12631	if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
12632
12633	unsigned SrcBitSize = SrcEltVT.getSizeInBits();
12634	unsigned DstBitSize = DstEltVT.getSizeInBits();
12635
12636	// If this is a conversion of N elements of one type to N elements of another
12637	// type, convert each element. This handles FP<->INT cases.
12638	if (SrcBitSize == DstBitSize) {
12639	SmallVector<SDValue, 8> Ops;
12640	for (SDValue Op : BV->op_values()) {
12641	// If the vector element type is not legal, the BUILD_VECTOR operands
12642	// are promoted and implicitly truncated. Make that explicit here.
12643	if (Op.getValueType() != SrcEltVT)
12644	Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
12645	Ops.push_back(DAG.getBitcast(DstEltVT, Op));
12646	AddToWorklist(Ops.back().getNode());
12647	}
12648	EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
12649	BV->getValueType(0).getVectorNumElements());
12650	return DAG.getBuildVector(VT, SDLoc(BV), Ops);
12651	}
12652
12653	// Otherwise, we're growing or shrinking the elements. To avoid having to
12654	// handle annoying details of growing/shrinking FP values, we convert them to
12655	// int first.
12656	if (SrcEltVT.isFloatingPoint()) {
12657	// Convert the input float vector to a int vector where the elements are the
12658	// same sizes.
12659	EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
12660	BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
12661	SrcEltVT = IntVT;
12662	}
12663
12664	// Now we know the input is an integer vector. If the output is a FP type,
12665	// convert to integer first, then to FP of the right size.
12666	if (DstEltVT.isFloatingPoint()) {
12667	EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
12668	SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
12669
12670	// Next, convert to FP elements of the same size.
12671	return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
12672	}
12673
12674	SDLoc DL(BV);
12675
12676	// Okay, we know the src/dst types are both integers of differing types.
12677	// Handling growing first.
12678	assert(SrcEltVT.isInteger() && DstEltVT.isInteger())((SrcEltVT.isInteger() && DstEltVT.isInteger()) ? static_cast <void> (0) : __assert_fail ("SrcEltVT.isInteger() && DstEltVT.isInteger()" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 12678, __PRETTY_FUNCTION__));
12679	if (SrcBitSize < DstBitSize) {
12680	unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
12681
12682	SmallVector<SDValue, 8> Ops;
12683	for (unsigned i = 0, e = BV->getNumOperands(); i != e;
12684	i += NumInputsPerOutput) {
12685	bool isLE = DAG.getDataLayout().isLittleEndian();
12686	APInt NewBits = APInt(DstBitSize, 0);
12687	bool EltIsUndef = true;
12688	for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
12689	// Shift the previously computed bits over.
12690	NewBits <<= SrcBitSize;
12691	SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
12692	if (Op.isUndef()) continue;
12693	EltIsUndef = false;
12694
12695	NewBits \|= cast<ConstantSDNode>(Op)->getAPIntValue().
12696	zextOrTrunc(SrcBitSize).zext(DstBitSize);
12697	}
12698
12699	if (EltIsUndef)
12700	Ops.push_back(DAG.getUNDEF(DstEltVT));
12701	else
12702	Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
12703	}
12704
12705	EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
12706	return DAG.getBuildVector(VT, DL, Ops);
12707	}
12708
12709	// Finally, this must be the case where we are shrinking elements: each input
12710	// turns into multiple outputs.
12711	unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
12712	EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
12713	NumOutputsPerInput*BV->getNumOperands());
12714	SmallVector<SDValue, 8> Ops;
12715
12716	for (const SDValue &Op : BV->op_values()) {
12717	if (Op.isUndef()) {
12718	Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
12719	continue;
12720	}
12721
12722	APInt OpVal = cast<ConstantSDNode>(Op)->
12723	getAPIntValue().zextOrTrunc(SrcBitSize);
12724
12725	for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
12726	APInt ThisVal = OpVal.trunc(DstBitSize);
12727	Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
12728	OpVal.lshrInPlace(DstBitSize);
12729	}
12730
12731	// For big endian targets, swap the order of the pieces of each element.
12732	if (DAG.getDataLayout().isBigEndian())
12733	std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
12734	}
12735
12736	return DAG.getBuildVector(VT, DL, Ops);
12737	}
12738
12739	static bool isContractable(SDNode *N) {
12740	SDNodeFlags F = N->getFlags();
12741	return F.hasAllowContract() \|\| F.hasAllowReassociation();
12742	}
12743
12744	/// Try to perform FMA combining on a given FADD node.
12745	SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
12746	SDValue N0 = N->getOperand(0);
12747	SDValue N1 = N->getOperand(1);
12748	EVT VT = N->getValueType(0);
12749	SDLoc SL(N);
12750
12751	const TargetOptions &Options = DAG.getTarget().Options;
12752
12753	// Floating-point multiply-add with intermediate rounding.
12754	bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
12755
12756	// Floating-point multiply-add without intermediate rounding.
12757	bool HasFMA =
12758	TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
12759	(!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FMA, VT));
12760
12761	// No valid opcode, do not combine.
12762	if (!HasFMAD && !HasFMA)
12763	return SDValue();
12764
12765	bool CanFuse = Options.UnsafeFPMath \|\| isContractable(N);
12766	bool CanReassociate =
12767	Options.UnsafeFPMath \|\| N->getFlags().hasAllowReassociation();
12768	bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast \|\|
12769	CanFuse \|\| HasFMAD);
12770	// If the addition is not contractable, do not combine.
12771	if (!AllowFusionGlobally && !isContractable(N))
12772	return SDValue();
12773
12774	if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
12775	return SDValue();
12776
12777	// Always prefer FMAD to FMA for precision.
12778	unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
12779	bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
12780
12781	// Is the node an FMUL and contractable either due to global flags or
12782	// SDNodeFlags.
12783	auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
12784	if (N.getOpcode() != ISD::FMUL)
12785	return false;
12786	return AllowFusionGlobally \|\| isContractable(N.getNode());
12787	};
12788	// If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
12789	// prefer to fold the multiply with fewer uses.
12790	if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
12791	if (N0.getNode()->use_size() > N1.getNode()->use_size())
12792	std::swap(N0, N1);
12793	}
12794
12795	// fold (fadd (fmul x, y), z) -> (fma x, y, z)
12796	if (isContractableFMUL(N0) && (Aggressive \|\| N0->hasOneUse())) {
12797	return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
12798	N0.getOperand(1), N1);
12799	}
12800
12801	// fold (fadd x, (fmul y, z)) -> (fma y, z, x)
12802	// Note: Commutes FADD operands.
12803	if (isContractableFMUL(N1) && (Aggressive \|\| N1->hasOneUse())) {
12804	return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
12805	N1.getOperand(1), N0);
12806	}
12807
12808	// fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
12809	// fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
12810	// This requires reassociation because it changes the order of operations.
12811	SDValue FMA, E;
12812	if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode &&
12813	N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
12814	N0.getOperand(2).hasOneUse()) {
12815	FMA = N0;
12816	E = N1;
12817	} else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode &&
12818	N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
12819	N1.getOperand(2).hasOneUse()) {
12820	FMA = N1;
12821	E = N0;
12822	}
12823	if (FMA && E) {
12824	SDValue A = FMA.getOperand(0);
12825	SDValue B = FMA.getOperand(1);
12826	SDValue C = FMA.getOperand(2).getOperand(0);
12827	SDValue D = FMA.getOperand(2).getOperand(1);
12828	SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
12829	return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE);
12830	}
12831
12832	// Look through FP_EXTEND nodes to do more combining.
12833
12834	// fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
12835	if (N0.getOpcode() == ISD::FP_EXTEND) {
12836	SDValue N00 = N0.getOperand(0);
12837	if (isContractableFMUL(N00) &&
12838	TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12839	N00.getValueType())) {
12840	return DAG.getNode(PreferredFusedOpcode, SL, VT,
12841	DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
12842	DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
12843	N1);
12844	}
12845	}
12846
12847	// fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
12848	// Note: Commutes FADD operands.
12849	if (N1.getOpcode() == ISD::FP_EXTEND) {
12850	SDValue N10 = N1.getOperand(0);
12851	if (isContractableFMUL(N10) &&
12852	TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12853	N10.getValueType())) {
12854	return DAG.getNode(PreferredFusedOpcode, SL, VT,
12855	DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
12856	DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)),
12857	N0);
12858	}
12859	}
12860
12861	// More folding opportunities when target permits.
12862	if (Aggressive) {
12863	// fold (fadd (fma x, y, (fpext (fmul u, v))), z)
12864	// -> (fma x, y, (fma (fpext u), (fpext v), z))
12865	auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
12866	SDValue Z) {
12867	return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
12868	DAG.getNode(PreferredFusedOpcode, SL, VT,
12869	DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
12870	DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
12871	Z));
12872	};
12873	if (N0.getOpcode() == PreferredFusedOpcode) {
12874	SDValue N02 = N0.getOperand(2);
12875	if (N02.getOpcode() == ISD::FP_EXTEND) {
12876	SDValue N020 = N02.getOperand(0);
12877	if (isContractableFMUL(N020) &&
12878	TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12879	N020.getValueType())) {
12880	return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
12881	N020.getOperand(0), N020.getOperand(1),
12882	N1);
12883	}
12884	}
12885	}
12886
12887	// fold (fadd (fpext (fma x, y, (fmul u, v))), z)
12888	// -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
12889	// FIXME: This turns two single-precision and one double-precision
12890	// operation into two double-precision operations, which might not be
12891	// interesting for all targets, especially GPUs.
12892	auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
12893	SDValue Z) {
12894	return DAG.getNode(
12895	PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
12896	DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
12897	DAG.getNode(PreferredFusedOpcode, SL, VT,
12898	DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
12899	DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
12900	};
12901	if (N0.getOpcode() == ISD::FP_EXTEND) {
12902	SDValue N00 = N0.getOperand(0);
12903	if (N00.getOpcode() == PreferredFusedOpcode) {
12904	SDValue N002 = N00.getOperand(2);
12905	if (isContractableFMUL(N002) &&
12906	TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12907	N00.getValueType())) {
12908	return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
12909	N002.getOperand(0), N002.getOperand(1),
12910	N1);
12911	}
12912	}
12913	}
12914
12915	// fold (fadd x, (fma y, z, (fpext (fmul u, v)))
12916	// -> (fma y, z, (fma (fpext u), (fpext v), x))
12917	if (N1.getOpcode() == PreferredFusedOpcode) {
12918	SDValue N12 = N1.getOperand(2);
12919	if (N12.getOpcode() == ISD::FP_EXTEND) {
12920	SDValue N120 = N12.getOperand(0);
12921	if (isContractableFMUL(N120) &&
12922	TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12923	N120.getValueType())) {
12924	return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
12925	N120.getOperand(0), N120.getOperand(1),
12926	N0);
12927	}
12928	}
12929	}
12930
12931	// fold (fadd x, (fpext (fma y, z, (fmul u, v)))
12932	// -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
12933	// FIXME: This turns two single-precision and one double-precision
12934	// operation into two double-precision operations, which might not be
12935	// interesting for all targets, especially GPUs.
12936	if (N1.getOpcode() == ISD::FP_EXTEND) {
12937	SDValue N10 = N1.getOperand(0);
12938	if (N10.getOpcode() == PreferredFusedOpcode) {
12939	SDValue N102 = N10.getOperand(2);
12940	if (isContractableFMUL(N102) &&
12941	TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12942	N10.getValueType())) {
12943	return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
12944	N102.getOperand(0), N102.getOperand(1),
12945	N0);
12946	}
12947	}
12948	}
12949	}
12950
12951	return SDValue();
12952	}
12953
12954	/// Try to perform FMA combining on a given FSUB node.
12955	SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
12956	SDValue N0 = N->getOperand(0);
12957	SDValue N1 = N->getOperand(1);
12958	EVT VT = N->getValueType(0);
12959	SDLoc SL(N);
12960
12961	const TargetOptions &Options = DAG.getTarget().Options;
12962	// Floating-point multiply-add with intermediate rounding.
12963	bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
12964
12965	// Floating-point multiply-add without intermediate rounding.
12966	bool HasFMA =
12967	TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
12968	(!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FMA, VT));
12969
12970	// No valid opcode, do not combine.
12971	if (!HasFMAD && !HasFMA)
12972	return SDValue();
12973
12974	const SDNodeFlags Flags = N->getFlags();
12975	bool CanFuse = Options.UnsafeFPMath \|\| isContractable(N);
12976	bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast \|\|
12977	CanFuse \|\| HasFMAD);
12978
12979	// If the subtraction is not contractable, do not combine.
12980	if (!AllowFusionGlobally && !isContractable(N))
12981	return SDValue();
12982
12983	if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
12984	return SDValue();
12985
12986	// Always prefer FMAD to FMA for precision.
12987	unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
12988	bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
12989	bool NoSignedZero = Options.NoSignedZerosFPMath \|\| Flags.hasNoSignedZeros();
12990
12991	// Is the node an FMUL and contractable either due to global flags or
12992	// SDNodeFlags.
12993	auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
12994	if (N.getOpcode() != ISD::FMUL)
12995	return false;
12996	return AllowFusionGlobally \|\| isContractable(N.getNode());
12997	};
12998
12999	// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
13000	auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
13001	if (isContractableFMUL(XY) && (Aggressive \|\| XY->hasOneUse())) {
13002	return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
13003	XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z));
13004	}
13005	return SDValue();
13006	};
13007
13008	// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
13009	// Note: Commutes FSUB operands.
13010	auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
13011	if (isContractableFMUL(YZ) && (Aggressive \|\| YZ->hasOneUse())) {
13012	return DAG.getNode(PreferredFusedOpcode, SL, VT,
13013	DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
13014	YZ.getOperand(1), X);
13015	}
13016	return SDValue();
13017	};
13018
13019	// If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
13020	// prefer to fold the multiply with fewer uses.
13021	if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
13022	(N0.getNode()->use_size() > N1.getNode()->use_size())) {
13023	// fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
13024	if (SDValue V = tryToFoldXSubYZ(N0, N1))
13025	return V;
13026	// fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
13027	if (SDValue V = tryToFoldXYSubZ(N0, N1))
13028	return V;
13029	} else {
13030	// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
13031	if (SDValue V = tryToFoldXYSubZ(N0, N1))
13032	return V;
13033	// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
13034	if (SDValue V = tryToFoldXSubYZ(N0, N1))
13035	return V;
13036	}
13037
13038	// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
13039	if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
13040	(Aggressive \|\| (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
13041	SDValue N00 = N0.getOperand(0).getOperand(0);
13042	SDValue N01 = N0.getOperand(0).getOperand(1);
13043	return DAG.getNode(PreferredFusedOpcode, SL, VT,
13044	DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
13045	DAG.getNode(ISD::FNEG, SL, VT, N1));
13046	}
13047
13048	// Look through FP_EXTEND nodes to do more combining.
13049
13050	// fold (fsub (fpext (fmul x, y)), z)
13051	// -> (fma (fpext x), (fpext y), (fneg z))
13052	if (N0.getOpcode() == ISD::FP_EXTEND) {
13053	SDValue N00 = N0.getOperand(0);
13054	if (isContractableFMUL(N00) &&
13055	TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13056	N00.getValueType())) {
13057	return DAG.getNode(PreferredFusedOpcode, SL, VT,
13058	DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13059	DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13060	DAG.getNode(ISD::FNEG, SL, VT, N1));
13061	}
13062	}
13063
13064	// fold (fsub x, (fpext (fmul y, z)))
13065	// -> (fma (fneg (fpext y)), (fpext z), x)
13066	// Note: Commutes FSUB operands.
13067	if (N1.getOpcode() == ISD::FP_EXTEND) {
13068	SDValue N10 = N1.getOperand(0);
13069	if (isContractableFMUL(N10) &&
13070	TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13071	N10.getValueType())) {
13072	return DAG.getNode(
13073	PreferredFusedOpcode, SL, VT,
13074	DAG.getNode(ISD::FNEG, SL, VT,
13075	DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
13076	DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
13077	}
13078	}
13079
13080	// fold (fsub (fpext (fneg (fmul, x, y))), z)
13081	// -> (fneg (fma (fpext x), (fpext y), z))
13082	// Note: This could be removed with appropriate canonicalization of the
13083	// input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
13084	// orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
13085	// from implementing the canonicalization in visitFSUB.
13086	if (N0.getOpcode() == ISD::FP_EXTEND) {
13087	SDValue N00 = N0.getOperand(0);
13088	if (N00.getOpcode() == ISD::FNEG) {
13089	SDValue N000 = N00.getOperand(0);
13090	if (isContractableFMUL(N000) &&
13091	TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13092	N00.getValueType())) {
13093	return DAG.getNode(
13094	ISD::FNEG, SL, VT,
13095	DAG.getNode(PreferredFusedOpcode, SL, VT,
13096	DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
13097	DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
13098	N1));
13099	}
13100	}
13101	}
13102
13103	// fold (fsub (fneg (fpext (fmul, x, y))), z)
13104	// -> (fneg (fma (fpext x)), (fpext y), z)
13105	// Note: This could be removed with appropriate canonicalization of the
13106	// input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
13107	// orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
13108	// from implementing the canonicalization in visitFSUB.
13109	if (N0.getOpcode() == ISD::FNEG) {
13110	SDValue N00 = N0.getOperand(0);
13111	if (N00.getOpcode() == ISD::FP_EXTEND) {
13112	SDValue N000 = N00.getOperand(0);
13113	if (isContractableFMUL(N000) &&
13114	TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13115	N000.getValueType())) {
13116	return DAG.getNode(
13117	ISD::FNEG, SL, VT,
13118	DAG.getNode(PreferredFusedOpcode, SL, VT,
13119	DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
13120	DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
13121	N1));
13122	}
13123	}
13124	}
13125
13126	// More folding opportunities when target permits.
13127	if (Aggressive) {
13128	// fold (fsub (fma x, y, (fmul u, v)), z)
13129	// -> (fma x, y (fma u, v, (fneg z)))
13130	if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
13131	isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
13132	N0.getOperand(2)->hasOneUse()) {
13133	return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
13134	N0.getOperand(1),
13135	DAG.getNode(PreferredFusedOpcode, SL, VT,
13136	N0.getOperand(2).getOperand(0),
13137	N0.getOperand(2).getOperand(1),
13138	DAG.getNode(ISD::FNEG, SL, VT, N1)));
13139	}
13140
13141	// fold (fsub x, (fma y, z, (fmul u, v)))
13142	// -> (fma (fneg y), z, (fma (fneg u), v, x))
13143	if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
13144	isContractableFMUL(N1.getOperand(2)) &&
13145	N1->hasOneUse() && NoSignedZero) {
13146	SDValue N20 = N1.getOperand(2).getOperand(0);
13147	SDValue N21 = N1.getOperand(2).getOperand(1);
13148	return DAG.getNode(
13149	PreferredFusedOpcode, SL, VT,
13150	DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
13151	DAG.getNode(PreferredFusedOpcode, SL, VT,
13152	DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
13153	}
13154
13155
13156	// fold (fsub (fma x, y, (fpext (fmul u, v))), z)
13157	// -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
13158	if (N0.getOpcode() == PreferredFusedOpcode &&
13159	N0->hasOneUse()) {
13160	SDValue N02 = N0.getOperand(2);
13161	if (N02.getOpcode() == ISD::FP_EXTEND) {
13162	SDValue N020 = N02.getOperand(0);
13163	if (isContractableFMUL(N020) &&
13164	TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13165	N020.getValueType())) {
13166	return DAG.getNode(
13167	PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
13168	DAG.getNode(
13169	PreferredFusedOpcode, SL, VT,
13170	DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
13171	DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
13172	DAG.getNode(ISD::FNEG, SL, VT, N1)));
13173	}
13174	}
13175	}
13176
13177	// fold (fsub (fpext (fma x, y, (fmul u, v))), z)
13178	// -> (fma (fpext x), (fpext y),
13179	// (fma (fpext u), (fpext v), (fneg z)))
13180	// FIXME: This turns two single-precision and one double-precision
13181	// operation into two double-precision operations, which might not be
13182	// interesting for all targets, especially GPUs.
13183	if (N0.getOpcode() == ISD::FP_EXTEND) {
13184	SDValue N00 = N0.getOperand(0);
13185	if (N00.getOpcode() == PreferredFusedOpcode) {
13186	SDValue N002 = N00.getOperand(2);
13187	if (isContractableFMUL(N002) &&
13188	TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13189	N00.getValueType())) {
13190	return DAG.getNode(
13191	PreferredFusedOpcode, SL, VT,
13192	DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13193	DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13194	DAG.getNode(
13195	PreferredFusedOpcode, SL, VT,
13196	DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
13197	DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
13198	DAG.getNode(ISD::FNEG, SL, VT, N1)));
13199	}
13200	}
13201	}
13202
13203	// fold (fsub x, (fma y, z, (fpext (fmul u, v))))
13204	// -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
13205	if (N1.getOpcode() == PreferredFusedOpcode &&
13206	N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
13207	N1->hasOneUse()) {
13208	SDValue N120 = N1.getOperand(2).getOperand(0);
13209	if (isContractableFMUL(N120) &&
13210	TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13211	N120.getValueType())) {
13212	SDValue N1200 = N120.getOperand(0);
13213	SDValue N1201 = N120.getOperand(1);
13214	return DAG.getNode(
13215	PreferredFusedOpcode, SL, VT,
13216	DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
13217	DAG.getNode(PreferredFusedOpcode, SL, VT,
13218	DAG.getNode(ISD::FNEG, SL, VT,
13219	DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
13220	DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
13221	}
13222	}
13223
13224	// fold (fsub x, (fpext (fma y, z, (fmul u, v))))
13225	// -> (fma (fneg (fpext y)), (fpext z),
13226	// (fma (fneg (fpext u)), (fpext v), x))
13227	// FIXME: This turns two single-precision and one double-precision
13228	// operation into two double-precision operations, which might not be
13229	// interesting for all targets, especially GPUs.
13230	if (N1.getOpcode() == ISD::FP_EXTEND &&
13231	N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
13232	SDValue CvtSrc = N1.getOperand(0);
13233	SDValue N100 = CvtSrc.getOperand(0);
13234	SDValue N101 = CvtSrc.getOperand(1);
13235	SDValue N102 = CvtSrc.getOperand(2);
13236	if (isContractableFMUL(N102) &&
13237	TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13238	CvtSrc.getValueType())) {
13239	SDValue N1020 = N102.getOperand(0);
13240	SDValue N1021 = N102.getOperand(1);
13241	return DAG.getNode(
13242	PreferredFusedOpcode, SL, VT,
13243	DAG.getNode(ISD::FNEG, SL, VT,
13244	DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)),
13245	DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
13246	DAG.getNode(PreferredFusedOpcode, SL, VT,
13247	DAG.getNode(ISD::FNEG, SL, VT,
13248	DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
13249	DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
13250	}
13251	}
13252	}
13253
13254	return SDValue();
13255	}
13256
13257	/// Try to perform FMA combining on a given FMUL node based on the distributive
13258	/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
13259	/// subtraction instead of addition).
13260	SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
13261	SDValue N0 = N->getOperand(0);
13262	SDValue N1 = N->getOperand(1);
13263	EVT VT = N->getValueType(0);
13264	SDLoc SL(N);
13265
13266	assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation")((N->getOpcode() == ISD::FMUL && "Expected FMUL Operation" ) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::FMUL && \"Expected FMUL Operation\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 13266, __PRETTY_FUNCTION__));
13267
13268	const TargetOptions &Options = DAG.getTarget().Options;
13269
13270	// The transforms below are incorrect when x == 0 and y == inf, because the
13271	// intermediate multiplication produces a nan.
13272	if (!Options.NoInfsFPMath)
13273	return SDValue();
13274
13275	// Floating-point multiply-add without intermediate rounding.
13276	bool HasFMA =
13277	(Options.AllowFPOpFusion == FPOpFusion::Fast \|\| Options.UnsafeFPMath) &&
13278	TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13279	(!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13280
13281	// Floating-point multiply-add with intermediate rounding. This can result
13282	// in a less precise result due to the changed rounding order.
13283	bool HasFMAD = Options.UnsafeFPMath &&
13284	(LegalOperations && TLI.isFMADLegal(DAG, N));
13285
13286	// No valid opcode, do not combine.
13287	if (!HasFMAD && !HasFMA)
13288	return SDValue();
13289
13290	// Always prefer FMAD to FMA for precision.
13291	unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13292	bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13293
13294	// fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
13295	// fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
13296	auto FuseFADD = [&](SDValue X, SDValue Y) {
13297	if (X.getOpcode() == ISD::FADD && (Aggressive \|\| X->hasOneUse())) {
13298	if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
13299	if (C->isExactlyValue(+1.0))
13300	return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13301	Y);
13302	if (C->isExactlyValue(-1.0))
13303	return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13304	DAG.getNode(ISD::FNEG, SL, VT, Y));
13305	}
13306	}
13307	return SDValue();
13308	};
13309
13310	if (SDValue FMA = FuseFADD(N0, N1))
13311	return FMA;
13312	if (SDValue FMA = FuseFADD(N1, N0))
13313	return FMA;
13314
13315	// fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
13316	// fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
13317	// fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
13318	// fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
13319	auto FuseFSUB = [&](SDValue X, SDValue Y) {
13320	if (X.getOpcode() == ISD::FSUB && (Aggressive \|\| X->hasOneUse())) {
13321	if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
13322	if (C0->isExactlyValue(+1.0))
13323	return DAG.getNode(PreferredFusedOpcode, SL, VT,
13324	DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
13325	Y);
13326	if (C0->isExactlyValue(-1.0))
13327	return DAG.getNode(PreferredFusedOpcode, SL, VT,
13328	DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
13329	DAG.getNode(ISD::FNEG, SL, VT, Y));
13330	}
13331	if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
13332	if (C1->isExactlyValue(+1.0))
13333	return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13334	DAG.getNode(ISD::FNEG, SL, VT, Y));
13335	if (C1->isExactlyValue(-1.0))
13336	return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13337	Y);
13338	}
13339	}
13340	return SDValue();
13341	};
13342
13343	if (SDValue FMA = FuseFSUB(N0, N1))
13344	return FMA;
13345	if (SDValue FMA = FuseFSUB(N1, N0))
13346	return FMA;
13347
13348	return SDValue();
13349	}
13350
13351	SDValue DAGCombiner::visitFADD(SDNode *N) {
13352	SDValue N0 = N->getOperand(0);
13353	SDValue N1 = N->getOperand(1);
13354	bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
13355	bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
13356	EVT VT = N->getValueType(0);
13357	SDLoc DL(N);
13358	const TargetOptions &Options = DAG.getTarget().Options;
13359	SDNodeFlags Flags = N->getFlags();
13360	SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13361
13362	if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13363	return R;
13364
13365	// fold vector ops
13366	if (VT.isVector())
13367	if (SDValue FoldedVOp = SimplifyVBinOp(N))
13368	return FoldedVOp;
13369
13370	// fold (fadd c1, c2) -> c1 + c2
13371	if (N0CFP && N1CFP)
13372	return DAG.getNode(ISD::FADD, DL, VT, N0, N1);
13373
13374	// canonicalize constant to RHS
13375	if (N0CFP && !N1CFP)
13376	return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
13377
13378	// N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
13379	ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
13380	if (N1C && N1C->isZero())
13381	if (N1C->isNegative() \|\| Options.NoSignedZerosFPMath \|\| Flags.hasNoSignedZeros())
13382	return N0;
13383
13384	if (SDValue NewSel = foldBinOpIntoSelect(N))
13385	return NewSel;
13386
13387	// fold (fadd A, (fneg B)) -> (fsub A, B)
13388	if (!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
13389	if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
13390	N1, DAG, LegalOperations, ForCodeSize))
13391	return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
13392
13393	// fold (fadd (fneg A), B) -> (fsub B, A)
13394	if (!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
13395	if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
13396	N0, DAG, LegalOperations, ForCodeSize))
13397	return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
13398
13399	auto isFMulNegTwo = [](SDValue FMul) {
13400	if (!FMul.hasOneUse() \|\| FMul.getOpcode() != ISD::FMUL)
13401	return false;
13402	auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
13403	return C && C->isExactlyValue(-2.0);
13404	};
13405
13406	// fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
13407	if (isFMulNegTwo(N0)) {
13408	SDValue B = N0.getOperand(0);
13409	SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
13410	return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
13411	}
13412	// fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
13413	if (isFMulNegTwo(N1)) {
13414	SDValue B = N1.getOperand(0);
13415	SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
13416	return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
13417	}
13418
13419	// No FP constant should be created after legalization as Instruction
13420	// Selection pass has a hard time dealing with FP constants.
13421	bool AllowNewConst = (Level < AfterLegalizeDAG);
13422
13423	// If nnan is enabled, fold lots of things.
13424	if ((Options.NoNaNsFPMath \|\| Flags.hasNoNaNs()) && AllowNewConst) {
13425	// If allowed, fold (fadd (fneg x), x) -> 0.0
13426	if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
13427	return DAG.getConstantFP(0.0, DL, VT);
13428
13429	// If allowed, fold (fadd x, (fneg x)) -> 0.0
13430	if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
13431	return DAG.getConstantFP(0.0, DL, VT);
13432	}
13433
13434	// If 'unsafe math' or reassoc and nsz, fold lots of things.
13435	// TODO: break out portions of the transformations below for which Unsafe is
13436	// considered and which do not require both nsz and reassoc
13437	if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) \|\|
13438	(Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
13439	AllowNewConst) {
13440	// fadd (fadd x, c1), c2 -> fadd x, c1 + c2
13441	if (N1CFP && N0.getOpcode() == ISD::FADD &&
13442	DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
13443	SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
13444	return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
13445	}
13446
13447	// We can fold chains of FADD's of the same value into multiplications.
13448	// This transform is not safe in general because we are reducing the number
13449	// of rounding steps.
13450	if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
13451	if (N0.getOpcode() == ISD::FMUL) {
13452	bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
13453	bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
13454
13455	// (fadd (fmul x, c), x) -> (fmul x, c+1)
13456	if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
13457	SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
13458	DAG.getConstantFP(1.0, DL, VT));
13459	return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
13460	}
13461
13462	// (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
13463	if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
13464	N1.getOperand(0) == N1.getOperand(1) &&
13465	N0.getOperand(0) == N1.getOperand(0)) {
13466	SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
13467	DAG.getConstantFP(2.0, DL, VT));
13468	return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
13469	}
13470	}
13471
13472	if (N1.getOpcode() == ISD::FMUL) {
13473	bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
13474	bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
13475
13476	// (fadd x, (fmul x, c)) -> (fmul x, c+1)
13477	if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
13478	SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
13479	DAG.getConstantFP(1.0, DL, VT));
13480	return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
13481	}
13482
13483	// (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
13484	if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
13485	N0.getOperand(0) == N0.getOperand(1) &&
13486	N1.getOperand(0) == N0.getOperand(0)) {
13487	SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
13488	DAG.getConstantFP(2.0, DL, VT));
13489	return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
13490	}
13491	}
13492
13493	if (N0.getOpcode() == ISD::FADD) {
13494	bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
13495	// (fadd (fadd x, x), x) -> (fmul x, 3.0)
13496	if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
13497	(N0.getOperand(0) == N1)) {
13498	return DAG.getNode(ISD::FMUL, DL, VT, N1,
13499	DAG.getConstantFP(3.0, DL, VT));
13500	}
13501	}
13502
13503	if (N1.getOpcode() == ISD::FADD) {
13504	bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
13505	// (fadd x, (fadd x, x)) -> (fmul x, 3.0)
13506	if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
13507	N1.getOperand(0) == N0) {
13508	return DAG.getNode(ISD::FMUL, DL, VT, N0,
13509	DAG.getConstantFP(3.0, DL, VT));
13510	}
13511	}
13512
13513	// (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
13514	if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
13515	N0.getOperand(0) == N0.getOperand(1) &&
13516	N1.getOperand(0) == N1.getOperand(1) &&
13517	N0.getOperand(0) == N1.getOperand(0)) {
13518	return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
13519	DAG.getConstantFP(4.0, DL, VT));
13520	}
13521	}
13522	} // enable-unsafe-fp-math
13523
13524	// FADD -> FMA combines:
13525	if (SDValue Fused = visitFADDForFMACombine(N)) {
13526	AddToWorklist(Fused.getNode());
13527	return Fused;
13528	}
13529	return SDValue();
13530	}
13531
13532	SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
13533	SDValue Chain = N->getOperand(0);
13534	SDValue N0 = N->getOperand(1);
13535	SDValue N1 = N->getOperand(2);
13536	EVT VT = N->getValueType(0);
13537	EVT ChainVT = N->getValueType(1);
13538	SDLoc DL(N);
13539	SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13540
13541	// fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
13542	if (!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
13543	if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
13544	N1, DAG, LegalOperations, ForCodeSize)) {
13545	return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
13546	{Chain, N0, NegN1});
13547	}
13548
13549	// fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
13550	if (!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
13551	if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
13552	N0, DAG, LegalOperations, ForCodeSize)) {
13553	return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
13554	{Chain, N1, NegN0});
13555	}
13556	return SDValue();
13557	}
13558
13559	SDValue DAGCombiner::visitFSUB(SDNode *N) {
13560	SDValue N0 = N->getOperand(0);
13561	SDValue N1 = N->getOperand(1);
13562	ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
13563	ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
13564	EVT VT = N->getValueType(0);
13565	SDLoc DL(N);
13566	const TargetOptions &Options = DAG.getTarget().Options;
13567	const SDNodeFlags Flags = N->getFlags();
13568	SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13569
13570	if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13571	return R;
13572
13573	// fold vector ops
13574	if (VT.isVector())
13575	if (SDValue FoldedVOp = SimplifyVBinOp(N))
13576	return FoldedVOp;
13577
13578	// fold (fsub c1, c2) -> c1-c2
13579	if (N0CFP && N1CFP)
13580	return DAG.getNode(ISD::FSUB, DL, VT, N0, N1);
13581
13582	if (SDValue NewSel = foldBinOpIntoSelect(N))
13583	return NewSel;
13584
13585	// (fsub A, 0) -> A
13586	if (N1CFP && N1CFP->isZero()) {
13587	if (!N1CFP->isNegative() \|\| Options.NoSignedZerosFPMath \|\|
13588	Flags.hasNoSignedZeros()) {
13589	return N0;
13590	}
13591	}
13592
13593	if (N0 == N1) {
13594	// (fsub x, x) -> 0.0
13595	if (Options.NoNaNsFPMath \|\| Flags.hasNoNaNs())
13596	return DAG.getConstantFP(0.0f, DL, VT);
13597	}
13598
13599	// (fsub -0.0, N1) -> -N1
13600	if (N0CFP && N0CFP->isZero()) {
13601	if (N0CFP->isNegative() \|\|
13602	(Options.NoSignedZerosFPMath \|\| Flags.hasNoSignedZeros())) {
13603	// We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
13604	// flushed to zero, unless all users treat denorms as zero (DAZ).
13605	// FIXME: This transform will change the sign of a NaN and the behavior
13606	// of a signaling NaN. It is only valid when a NoNaN flag is present.
13607	DenormalMode DenormMode = DAG.getDenormalMode(VT);
13608	if (DenormMode == DenormalMode::getIEEE()) {
13609	if (SDValue NegN1 =
13610	TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
13611	return NegN1;
13612	if (!LegalOperations \|\| TLI.isOperationLegal(ISD::FNEG, VT))
13613	return DAG.getNode(ISD::FNEG, DL, VT, N1);
13614	}
13615	}
13616	}
13617
13618	if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) \|\|
13619	(Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
13620	N1.getOpcode() == ISD::FADD) {
13621	// X - (X + Y) -> -Y
13622	if (N0 == N1->getOperand(0))
13623	return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
13624	// X - (Y + X) -> -Y
13625	if (N0 == N1->getOperand(1))
13626	return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
13627	}
13628
13629	// fold (fsub A, (fneg B)) -> (fadd A, B)
13630	if (SDValue NegN1 =
13631	TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
13632	return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
13633
13634	// FSUB -> FMA combines:
13635	if (SDValue Fused = visitFSUBForFMACombine(N)) {
13636	AddToWorklist(Fused.getNode());
13637	return Fused;
13638	}
13639
13640	return SDValue();
13641	}
13642
13643	SDValue DAGCombiner::visitFMUL(SDNode *N) {
13644	SDValue N0 = N->getOperand(0);
13645	SDValue N1 = N->getOperand(1);
13646	ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
13647	ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
13648	EVT VT = N->getValueType(0);
13649	SDLoc DL(N);
13650	const TargetOptions &Options = DAG.getTarget().Options;
13651	const SDNodeFlags Flags = N->getFlags();
13652	SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13653
13654	if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13655	return R;
13656
13657	// fold vector ops
13658	if (VT.isVector()) {
13659	// This just handles C1 * C2 for vectors. Other vector folds are below.
13660	if (SDValue FoldedVOp = SimplifyVBinOp(N))
13661	return FoldedVOp;
13662	}
13663
13664	// fold (fmul c1, c2) -> c1*c2
13665	if (N0CFP && N1CFP)
13666	return DAG.getNode(ISD::FMUL, DL, VT, N0, N1);
13667
13668	// canonicalize constant to RHS
13669	if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
13670	!DAG.isConstantFPBuildVectorOrConstantFP(N1))
13671	return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
13672
13673	if (SDValue NewSel = foldBinOpIntoSelect(N))
13674	return NewSel;
13675
13676	if (Options.UnsafeFPMath \|\| Flags.hasAllowReassociation()) {
13677	// fmul (fmul X, C1), C2 -> fmul X, C1 * C2
13678	if (DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
13679	N0.getOpcode() == ISD::FMUL) {
13680	SDValue N00 = N0.getOperand(0);
13681	SDValue N01 = N0.getOperand(1);
13682	// Avoid an infinite loop by making sure that N00 is not a constant
13683	// (the inner multiply has not been constant folded yet).
13684	if (DAG.isConstantFPBuildVectorOrConstantFP(N01) &&
13685	!DAG.isConstantFPBuildVectorOrConstantFP(N00)) {
13686	SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
13687	return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
13688	}
13689	}
13690
13691	// Match a special-case: we convert X * 2.0 into fadd.
13692	// fmul (fadd X, X), C -> fmul X, 2.0 * C
13693	if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
13694	N0.getOperand(0) == N0.getOperand(1)) {
13695	const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
13696	SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
13697	return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
13698	}
13699	}
13700
13701	// fold (fmul X, 2.0) -> (fadd X, X)
13702	if (N1CFP && N1CFP->isExactlyValue(+2.0))
13703	return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
13704
13705	// fold (fmul X, -1.0) -> (fneg X)
13706	if (N1CFP && N1CFP->isExactlyValue(-1.0))
13707	if (!LegalOperations \|\| TLI.isOperationLegal(ISD::FNEG, VT))
13708	return DAG.getNode(ISD::FNEG, DL, VT, N0);
13709
13710	// -N0 * -N1 --> N0 * N1
13711	TargetLowering::NegatibleCost CostN0 =
13712	TargetLowering::NegatibleCost::Expensive;
13713	TargetLowering::NegatibleCost CostN1 =
13714	TargetLowering::NegatibleCost::Expensive;
13715	SDValue NegN0 =
13716	TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
13717	SDValue NegN1 =
13718	TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
13719	if (NegN0 && NegN1 &&
13720	(CostN0 == TargetLowering::NegatibleCost::Cheaper \|\|
13721	CostN1 == TargetLowering::NegatibleCost::Cheaper))
13722	return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
13723
13724	// fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
13725	// fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
13726	if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
13727	(N0.getOpcode() == ISD::SELECT \|\| N1.getOpcode() == ISD::SELECT) &&
13728	TLI.isOperationLegal(ISD::FABS, VT)) {
13729	SDValue Select = N0, X = N1;
13730	if (Select.getOpcode() != ISD::SELECT)
13731	std::swap(Select, X);
13732
13733	SDValue Cond = Select.getOperand(0);
13734	auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
13735	auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
13736
13737	if (TrueOpnd && FalseOpnd &&
13738	Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
13739	isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
13740	cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
13741	ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
13742	switch (CC) {
13743	default: break;
13744	case ISD::SETOLT:
13745	case ISD::SETULT:
13746	case ISD::SETOLE:
13747	case ISD::SETULE:
13748	case ISD::SETLT:
13749	case ISD::SETLE:
13750	std::swap(TrueOpnd, FalseOpnd);
13751	LLVM_FALLTHROUGH[[gnu::fallthrough]];
13752	case ISD::SETOGT:
13753	case ISD::SETUGT:
13754	case ISD::SETOGE:
13755	case ISD::SETUGE:
13756	case ISD::SETGT:
13757	case ISD::SETGE:
13758	if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
13759	TLI.isOperationLegal(ISD::FNEG, VT))
13760	return DAG.getNode(ISD::FNEG, DL, VT,
13761	DAG.getNode(ISD::FABS, DL, VT, X));
13762	if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
13763	return DAG.getNode(ISD::FABS, DL, VT, X);
13764
13765	break;
13766	}
13767	}
13768	}
13769
13770	// FMUL -> FMA combines:
13771	if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
13772	AddToWorklist(Fused.getNode());
13773	return Fused;
13774	}
13775
13776	return SDValue();
13777	}
13778
13779	SDValue DAGCombiner::visitFMA(SDNode *N) {
13780	SDValue N0 = N->getOperand(0);
13781	SDValue N1 = N->getOperand(1);
13782	SDValue N2 = N->getOperand(2);
13783	ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13784	ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
13785	EVT VT = N->getValueType(0);
13786	SDLoc DL(N);
13787	const TargetOptions &Options = DAG.getTarget().Options;
13788	// FMA nodes have flags that propagate to the created nodes.
13789	SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13790
13791	bool UnsafeFPMath =
13792	Options.UnsafeFPMath \|\| N->getFlags().hasAllowReassociation();
13793
13794	// Constant fold FMA.
13795	if (isa<ConstantFPSDNode>(N0) &&
13796	isa<ConstantFPSDNode>(N1) &&
13797	isa<ConstantFPSDNode>(N2)) {
13798	return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
13799	}
13800
13801	// (-N0 * -N1) + N2 --> (N0 * N1) + N2
13802	TargetLowering::NegatibleCost CostN0 =
13803	TargetLowering::NegatibleCost::Expensive;
13804	TargetLowering::NegatibleCost CostN1 =
13805	TargetLowering::NegatibleCost::Expensive;
13806	SDValue NegN0 =
13807	TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
13808	SDValue NegN1 =
13809	TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
13810	if (NegN0 && NegN1 &&
13811	(CostN0 == TargetLowering::NegatibleCost::Cheaper \|\|
13812	CostN1 == TargetLowering::NegatibleCost::Cheaper))
13813	return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
13814
13815	if (UnsafeFPMath) {
13816	if (N0CFP && N0CFP->isZero())
13817	return N2;
13818	if (N1CFP && N1CFP->isZero())
13819	return N2;
13820	}
13821
13822	if (N0CFP && N0CFP->isExactlyValue(1.0))
13823	return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
13824	if (N1CFP && N1CFP->isExactlyValue(1.0))
13825	return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
13826
13827	// Canonicalize (fma c, x, y) -> (fma x, c, y)
13828	if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
13829	!DAG.isConstantFPBuildVectorOrConstantFP(N1))
13830	return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
13831
13832	if (UnsafeFPMath) {
13833	// (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
13834	if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
13835	DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
13836	DAG.isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
13837	return DAG.getNode(ISD::FMUL, DL, VT, N0,
13838	DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
13839	}
13840
13841	// (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
13842	if (N0.getOpcode() == ISD::FMUL &&
13843	DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
13844	DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
13845	return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
13846	DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)),
13847	N2);
13848	}
13849	}
13850
13851	// (fma x, -1, y) -> (fadd (fneg x), y)
13852	if (N1CFP) {
13853	if (N1CFP->isExactlyValue(1.0))
13854	return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
13855
13856	if (N1CFP->isExactlyValue(-1.0) &&
13857	(!LegalOperations \|\| TLI.isOperationLegal(ISD::FNEG, VT))) {
13858	SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
13859	AddToWorklist(RHSNeg.getNode());
13860	return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
13861	}
13862
13863	// fma (fneg x), K, y -> fma x -K, y
13864	if (N0.getOpcode() == ISD::FNEG &&
13865	(TLI.isOperationLegal(ISD::ConstantFP, VT) \|\|
13866	(N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
13867	ForCodeSize)))) {
13868	return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
13869	DAG.getNode(ISD::FNEG, DL, VT, N1), N2);
13870	}
13871	}
13872
13873	if (UnsafeFPMath) {
13874	// (fma x, c, x) -> (fmul x, (c+1))
13875	if (N1CFP && N0 == N2) {
13876	return DAG.getNode(
13877	ISD::FMUL, DL, VT, N0,
13878	DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(1.0, DL, VT)));
13879	}
13880
13881	// (fma x, c, (fneg x)) -> (fmul x, (c-1))
13882	if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
13883	return DAG.getNode(
13884	ISD::FMUL, DL, VT, N0,
13885	DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(-1.0, DL, VT)));
13886	}
13887	}
13888
13889	// fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
13890	// fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
13891	if (!TLI.isFNegFree(VT))
13892	if (SDValue Neg = TLI.getCheaperNegatedExpression(
13893	SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
13894	return DAG.getNode(ISD::FNEG, DL, VT, Neg);
13895	return SDValue();
13896	}
13897
13898	// Combine multiple FDIVs with the same divisor into multiple FMULs by the
13899	// reciprocal.
13900	// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
13901	// Notice that this is not always beneficial. One reason is different targets
13902	// may have different costs for FDIV and FMUL, so sometimes the cost of two
13903	// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
13904	// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
13905	SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
13906	// TODO: Limit this transform based on optsize/minsize - it always creates at
13907	// least 1 extra instruction. But the perf win may be substantial enough
13908	// that only minsize should restrict this.
13909	bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
13910	const SDNodeFlags Flags = N->getFlags();
13911	if (LegalDAG \|\| (!UnsafeMath && !Flags.hasAllowReciprocal()))
13912	return SDValue();
13913
13914	// Skip if current node is a reciprocal/fneg-reciprocal.
13915	SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
13916	ConstantFPSDNode N0CFP = isConstOrConstSplatFP(N0, / AllowUndefs */ true);
13917	if (N0CFP && (N0CFP->isExactlyValue(1.0) \|\| N0CFP->isExactlyValue(-1.0)))
13918	return SDValue();
13919
13920	// Exit early if the target does not want this transform or if there can't
13921	// possibly be enough uses of the divisor to make the transform worthwhile.
13922	unsigned MinUses = TLI.combineRepeatedFPDivisors();
13923
13924	// For splat vectors, scale the number of uses by the splat factor. If we can
13925	// convert the division into a scalar op, that will likely be much faster.
13926	unsigned NumElts = 1;
13927	EVT VT = N->getValueType(0);
13928	if (VT.isVector() && DAG.isSplatValue(N1))
13929	NumElts = VT.getVectorNumElements();
13930
13931	if (!MinUses \|\| (N1->use_size() * NumElts) < MinUses)
13932	return SDValue();
13933
13934	// Find all FDIV users of the same divisor.
13935	// Use a set because duplicates may be present in the user list.
13936	SetVector<SDNode *> Users;
13937	for (auto *U : N1->uses()) {
13938	if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
13939	// Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
13940	if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
13941	U->getOperand(0) == U->getOperand(1).getOperand(0) &&
13942	U->getFlags().hasAllowReassociation() &&
13943	U->getFlags().hasNoSignedZeros())
13944	continue;
13945
13946	// This division is eligible for optimization only if global unsafe math
13947	// is enabled or if this division allows reciprocal formation.
13948	if (UnsafeMath \|\| U->getFlags().hasAllowReciprocal())
13949	Users.insert(U);
13950	}
13951	}
13952
13953	// Now that we have the actual number of divisor uses, make sure it meets
13954	// the minimum threshold specified by the target.
13955	if ((Users.size() * NumElts) < MinUses)
13956	return SDValue();
13957
13958	SDLoc DL(N);
13959	SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
13960	SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
13961
13962	// Dividend / Divisor -> Dividend * Reciprocal
13963	for (auto *U : Users) {
13964	SDValue Dividend = U->getOperand(0);
13965	if (Dividend != FPOne) {
13966	SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
13967	Reciprocal, Flags);
13968	CombineTo(U, NewNode);
13969	} else if (U != Reciprocal.getNode()) {
13970	// In the absence of fast-math-flags, this user node is always the
13971	// same node as Reciprocal, but with FMF they may be different nodes.
13972	CombineTo(U, Reciprocal);
13973	}
13974	}
13975	return SDValue(N, 0); // N was replaced.
13976	}
13977
13978	SDValue DAGCombiner::visitFDIV(SDNode *N) {
13979	SDValue N0 = N->getOperand(0);
13980	SDValue N1 = N->getOperand(1);
13981	ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13982	ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
13983	EVT VT = N->getValueType(0);
13984	SDLoc DL(N);
13985	const TargetOptions &Options = DAG.getTarget().Options;
13986	SDNodeFlags Flags = N->getFlags();
13987	SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13988
13989	if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13990	return R;
13991
13992	// fold vector ops
13993	if (VT.isVector())
13994	if (SDValue FoldedVOp = SimplifyVBinOp(N))
13995	return FoldedVOp;
13996
13997	// fold (fdiv c1, c2) -> c1/c2
13998	if (N0CFP && N1CFP)
13999	return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
14000
14001	if (SDValue NewSel = foldBinOpIntoSelect(N))
14002	return NewSel;
14003
14004	if (SDValue V = combineRepeatedFPDivisors(N))
14005	return V;
14006
14007	if (Options.UnsafeFPMath \|\| Flags.hasAllowReciprocal()) {
14008	// fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
14009	if (N1CFP) {
14010	// Compute the reciprocal 1.0 / c2.
14011	const APFloat &N1APF = N1CFP->getValueAPF();
14012	APFloat Recip(N1APF.getSemantics(), 1); // 1.0
14013	APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
14014	// Only do the transform if the reciprocal is a legal fp immediate that
14015	// isn't too nasty (eg NaN, denormal, ...).
14016	if ((st == APFloat::opOK \|\| st == APFloat::opInexact) && // Not too nasty
14017	(!LegalOperations \|\|
14018	// FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
14019	// backend)... we should handle this gracefully after Legalize.
14020	// TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) \|\|
14021	TLI.isOperationLegal(ISD::ConstantFP, VT) \|\|
14022	TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
14023	return DAG.getNode(ISD::FMUL, DL, VT, N0,
14024	DAG.getConstantFP(Recip, DL, VT));
14025	}
14026
14027	// If this FDIV is part of a reciprocal square root, it may be folded
14028	// into a target-specific square root estimate instruction.
14029	if (N1.getOpcode() == ISD::FSQRT) {
14030	if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
14031	return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14032	} else if (N1.getOpcode() == ISD::FP_EXTEND &&
14033	N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14034	if (SDValue RV =
14035	buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
14036	RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
14037	AddToWorklist(RV.getNode());
14038	return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14039	}
14040	} else if (N1.getOpcode() == ISD::FP_ROUND &&
14041	N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14042	if (SDValue RV =
14043	buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
14044	RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
14045	AddToWorklist(RV.getNode());
14046	return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14047	}
14048	} else if (N1.getOpcode() == ISD::FMUL) {
14049	// Look through an FMUL. Even though this won't remove the FDIV directly,
14050	// it's still worthwhile to get rid of the FSQRT if possible.
14051	SDValue Sqrt, Y;
14052	if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14053	Sqrt = N1.getOperand(0);
14054	Y = N1.getOperand(1);
14055	} else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
14056	Sqrt = N1.getOperand(1);
14057	Y = N1.getOperand(0);
14058	}
14059	if (Sqrt.getNode()) {
14060	// If the other multiply operand is known positive, pull it into the
14061	// sqrt. That will eliminate the division if we convert to an estimate.
14062	if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
14063	N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
14064	SDValue A;
14065	if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
14066	A = Y.getOperand(0);
14067	else if (Y == Sqrt.getOperand(0))
14068	A = Y;
14069	if (A) {
14070	// X / (fabs(A) * sqrt(Z)) --> X / sqrt(AAZ) --> X * rsqrt(AAZ)
14071	// X / (A * sqrt(A)) --> X / sqrt(AAA) --> X * rsqrt(AAA)
14072	SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
14073	SDValue AAZ =
14074	DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
14075	if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
14076	return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
14077
14078	// Estimate creation failed. Clean up speculatively created nodes.
14079	recursivelyDeleteUnusedNodes(AAZ.getNode());
14080	}
14081	}
14082
14083	// We found a FSQRT, so try to make this fold:
14084	// X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
14085	if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
14086	SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
14087	AddToWorklist(Div.getNode());
14088	return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
14089	}
14090	}
14091	}
14092
14093	// Fold into a reciprocal estimate and multiply instead of a real divide.
14094	if (Options.NoInfsFPMath \|\| Flags.hasNoInfs())
14095	if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
14096	return RV;
14097	}
14098
14099	// Fold X/Sqrt(X) -> Sqrt(X)
14100	if ((Options.NoSignedZerosFPMath \|\| Flags.hasNoSignedZeros()) &&
14101	(Options.UnsafeFPMath \|\| Flags.hasAllowReassociation()))
14102	if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
14103	return N1;
14104
14105	// (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
14106	TargetLowering::NegatibleCost CostN0 =
14107	TargetLowering::NegatibleCost::Expensive;
14108	TargetLowering::NegatibleCost CostN1 =
14109	TargetLowering::NegatibleCost::Expensive;
14110	SDValue NegN0 =
14111	TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14112	SDValue NegN1 =
14113	TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14114	if (NegN0 && NegN1 &&
14115	(CostN0 == TargetLowering::NegatibleCost::Cheaper \|\|
14116	CostN1 == TargetLowering::NegatibleCost::Cheaper))
14117	return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
14118
14119	return SDValue();
14120	}
14121
14122	SDValue DAGCombiner::visitFREM(SDNode *N) {
14123	SDValue N0 = N->getOperand(0);
14124	SDValue N1 = N->getOperand(1);
14125	ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14126	ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
14127	EVT VT = N->getValueType(0);
14128	SDNodeFlags Flags = N->getFlags();
14129	SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14130
14131	if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14132	return R;
14133
14134	// fold (frem c1, c2) -> fmod(c1,c2)
14135	if (N0CFP && N1CFP)
14136	return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1);
14137
14138	if (SDValue NewSel = foldBinOpIntoSelect(N))
14139	return NewSel;
14140
14141	return SDValue();
14142	}
14143
14144	SDValue DAGCombiner::visitFSQRT(SDNode *N) {
14145	SDNodeFlags Flags = N->getFlags();
14146	const TargetOptions &Options = DAG.getTarget().Options;
14147
14148	// Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
14149	// sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
14150	if (!Flags.hasApproximateFuncs() \|\|
14151	(!Options.NoInfsFPMath && !Flags.hasNoInfs()))
14152	return SDValue();
14153
14154	SDValue N0 = N->getOperand(0);
14155	if (TLI.isFsqrtCheap(N0, DAG))
14156	return SDValue();
14157
14158	// FSQRT nodes have flags that propagate to the created nodes.
14159	// TODO: If this is N0/sqrt(N0), and we reach this node before trying to
14160	// transform the fdiv, we may produce a sub-optimal estimate sequence
14161	// because the reciprocal calculation may not have to filter out a
14162	// 0.0 input.
14163	return buildSqrtEstimate(N0, Flags);
14164	}
14165
14166	/// copysign(x, fp_extend(y)) -> copysign(x, y)
14167	/// copysign(x, fp_round(y)) -> copysign(x, y)
14168	static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
14169	SDValue N1 = N->getOperand(1);
14170	if ((N1.getOpcode() == ISD::FP_EXTEND \|\|
14171	N1.getOpcode() == ISD::FP_ROUND)) {
14172	EVT N1VT = N1->getValueType(0);
14173	EVT N1Op0VT = N1->getOperand(0).getValueType();
14174
14175	// Always fold no-op FP casts.
14176	if (N1VT == N1Op0VT)
14177	return true;
14178
14179	// Do not optimize out type conversion of f128 type yet.
14180	// For some targets like x86_64, configuration is changed to keep one f128
14181	// value in one SSE register, but instruction selection cannot handle
14182	// FCOPYSIGN on SSE registers yet.
14183	if (N1Op0VT == MVT::f128)
14184	return false;
14185
14186	// Avoid mismatched vector operand types, for better instruction selection.
14187	if (N1Op0VT.isVector())
14188	return false;
14189
14190	return true;
14191	}
14192	return false;
14193	}
14194
14195	SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
14196	SDValue N0 = N->getOperand(0);
14197	SDValue N1 = N->getOperand(1);
14198	bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
14199	bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
14200	EVT VT = N->getValueType(0);
14201
14202	if (N0CFP && N1CFP) // Constant fold
14203	return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
14204
14205	if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
14206	const APFloat &V = N1C->getValueAPF();
14207	// copysign(x, c1) -> fabs(x) iff ispos(c1)
14208	// copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
14209	if (!V.isNegative()) {
14210	if (!LegalOperations \|\| TLI.isOperationLegal(ISD::FABS, VT))
14211	return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14212	} else {
14213	if (!LegalOperations \|\| TLI.isOperationLegal(ISD::FNEG, VT))
14214	return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
14215	DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
14216	}
14217	}
14218
14219	// copysign(fabs(x), y) -> copysign(x, y)
14220	// copysign(fneg(x), y) -> copysign(x, y)
14221	// copysign(copysign(x,z), y) -> copysign(x, y)
14222	if (N0.getOpcode() == ISD::FABS \|\| N0.getOpcode() == ISD::FNEG \|\|
14223	N0.getOpcode() == ISD::FCOPYSIGN)
14224	return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
14225
14226	// copysign(x, abs(y)) -> abs(x)
14227	if (N1.getOpcode() == ISD::FABS)
14228	return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14229
14230	// copysign(x, copysign(y,z)) -> copysign(x, z)
14231	if (N1.getOpcode() == ISD::FCOPYSIGN)
14232	return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
14233
14234	// copysign(x, fp_extend(y)) -> copysign(x, y)
14235	// copysign(x, fp_round(y)) -> copysign(x, y)
14236	if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
14237	return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
14238
14239	return SDValue();
14240	}
14241
14242	SDValue DAGCombiner::visitFPOW(SDNode *N) {
14243	ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
14244	if (!ExponentC)
14245	return SDValue();
14246	SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14247
14248	// Try to convert x ** (1/3) into cube root.
14249	// TODO: Handle the various flavors of long double.
14250	// TODO: Since we're approximating, we don't need an exact 1/3 exponent.
14251	// Some range near 1/3 should be fine.
14252	EVT VT = N->getValueType(0);
14253	if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) \|\|
14254	(VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
14255	// pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
14256	// pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
14257	// pow(-val, 1/3) = nan; cbrt(-val) = -num.
14258	// For regular numbers, rounding may cause the results to differ.
14259	// Therefore, we require { nsz ninf nnan afn } for this transform.
14260	// TODO: We could select out the special cases if we don't have nsz/ninf.
14261	SDNodeFlags Flags = N->getFlags();
14262	if (!Flags.hasNoSignedZeros() \|\| !Flags.hasNoInfs() \|\| !Flags.hasNoNaNs() \|\|
14263	!Flags.hasApproximateFuncs())
14264	return SDValue();
14265
14266	// Do not create a cbrt() libcall if the target does not have it, and do not
14267	// turn a pow that has lowering support into a cbrt() libcall.
14268	if (!DAG.getLibInfo().has(LibFunc_cbrt) \|\|
14269	(!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
14270	DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
14271	return SDValue();
14272
14273	return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
14274	}
14275
14276	// Try to convert x (1/4) and x (3/4) into square roots.
14277	// x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
14278	// TODO: This could be extended (using a target hook) to handle smaller
14279	// power-of-2 fractional exponents.
14280	bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
14281	bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
14282	if (ExponentIs025 \|\| ExponentIs075) {
14283	// pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
14284	// pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
14285	// pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
14286	// pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
14287	// For regular numbers, rounding may cause the results to differ.
14288	// Therefore, we require { nsz ninf afn } for this transform.
14289	// TODO: We could select out the special cases if we don't have nsz/ninf.
14290	SDNodeFlags Flags = N->getFlags();
14291
14292	// We only need no signed zeros for the 0.25 case.
14293	if ((!Flags.hasNoSignedZeros() && ExponentIs025) \|\| !Flags.hasNoInfs() \|\|
14294	!Flags.hasApproximateFuncs())
14295	return SDValue();
14296
14297	// Don't double the number of libcalls. We are trying to inline fast code.
14298	if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
14299	return SDValue();
14300
14301	// Assume that libcalls are the smallest code.
14302	// TODO: This restriction should probably be lifted for vectors.
14303	if (ForCodeSize)
14304	return SDValue();
14305
14306	// pow(X, 0.25) --> sqrt(sqrt(X))
14307	SDLoc DL(N);
14308	SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
14309	SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
14310	if (ExponentIs025)
14311	return SqrtSqrt;
14312	// pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
14313	return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
14314	}
14315
14316	return SDValue();
14317	}
14318
14319	static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
14320	const TargetLowering &TLI) {
14321	// This optimization is guarded by a function attribute because it may produce
14322	// unexpected results. Ie, programs may be relying on the platform-specific
14323	// undefined behavior when the float-to-int conversion overflows.
14324	const Function &F = DAG.getMachineFunction().getFunction();
14325	Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
14326	if (StrictOverflow.getValueAsString().equals("false"))
14327	return SDValue();
14328
14329	// We only do this if the target has legal ftrunc. Otherwise, we'd likely be
14330	// replacing casts with a libcall. We also must be allowed to ignore -0.0
14331	// because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
14332	// conversions would return +0.0.
14333	// FIXME: We should be able to use node-level FMF here.
14334	// TODO: If strict math, should we use FABS (+ range check for signed cast)?
14335	EVT VT = N->getValueType(0);
14336	if (!TLI.isOperationLegal(ISD::FTRUNC, VT) \|\|
14337	!DAG.getTarget().Options.NoSignedZerosFPMath)
14338	return SDValue();
14339
14340	// fptosi/fptoui round towards zero, so converting from FP to integer and
14341	// back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
14342	SDValue N0 = N->getOperand(0);
14343	if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
14344	N0.getOperand(0).getValueType() == VT)
14345	return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
14346
14347	if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
14348	N0.getOperand(0).getValueType() == VT)
14349	return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
14350
14351	return SDValue();
14352	}
14353
14354	SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
14355	SDValue N0 = N->getOperand(0);
14356	EVT VT = N->getValueType(0);
14357	EVT OpVT = N0.getValueType();
14358
14359	// [us]itofp(undef) = 0, because the result value is bounded.
14360	if (N0.isUndef())
14361	return DAG.getConstantFP(0.0, SDLoc(N), VT);
14362
14363	// fold (sint_to_fp c1) -> c1fp
14364	if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
14365	// ...but only if the target supports immediate floating-point values
14366	(!LegalOperations \|\|
14367	TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
14368	return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
14369
14370	// If the input is a legal type, and SINT_TO_FP is not legal on this target,
14371	// but UINT_TO_FP is legal on this target, try to convert.
14372	if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
14373	hasOperation(ISD::UINT_TO_FP, OpVT)) {
14374	// If the sign bit is known to be zero, we can change this to UINT_TO_FP.
14375	if (DAG.SignBitIsZero(N0))
14376	return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
14377	}
14378
14379	// The next optimizations are desirable only if SELECT_CC can be lowered.
14380	// fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
14381	if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
14382	!VT.isVector() &&
14383	(!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14384	SDLoc DL(N);
14385	return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
14386	DAG.getConstantFP(0.0, DL, VT));
14387	}
14388
14389	// fold (sint_to_fp (zext (setcc x, y, cc))) ->
14390	// (select (setcc x, y, cc), 1.0, 0.0)
14391	if (N0.getOpcode() == ISD::ZERO_EXTEND &&
14392	N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
14393	(!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14394	SDLoc DL(N);
14395	return DAG.getSelect(DL, VT, N0.getOperand(0),
14396	DAG.getConstantFP(1.0, DL, VT),
14397	DAG.getConstantFP(0.0, DL, VT));
14398	}
14399
14400	if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
14401	return FTrunc;
14402
14403	return SDValue();
14404	}
14405
14406	SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
14407	SDValue N0 = N->getOperand(0);
14408	EVT VT = N->getValueType(0);
14409	EVT OpVT = N0.getValueType();
14410
14411	// [us]itofp(undef) = 0, because the result value is bounded.
14412	if (N0.isUndef())
14413	return DAG.getConstantFP(0.0, SDLoc(N), VT);
14414
14415	// fold (uint_to_fp c1) -> c1fp
14416	if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
14417	// ...but only if the target supports immediate floating-point values
14418	(!LegalOperations \|\|
14419	TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
14420	return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
14421
14422	// If the input is a legal type, and UINT_TO_FP is not legal on this target,
14423	// but SINT_TO_FP is legal on this target, try to convert.
14424	if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
14425	hasOperation(ISD::SINT_TO_FP, OpVT)) {
14426	// If the sign bit is known to be zero, we can change this to SINT_TO_FP.
14427	if (DAG.SignBitIsZero(N0))
14428	return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
14429	}
14430
14431	// fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
14432	if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
14433	(!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14434	SDLoc DL(N);
14435	return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
14436	DAG.getConstantFP(0.0, DL, VT));
14437	}
14438
14439	if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
14440	return FTrunc;
14441
14442	return SDValue();
14443	}
14444
14445	// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
14446	static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
14447	SDValue N0 = N->getOperand(0);
14448	EVT VT = N->getValueType(0);
14449
14450	if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
14451	return SDValue();
14452
14453	SDValue Src = N0.getOperand(0);
14454	EVT SrcVT = Src.getValueType();
14455	bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
14456	bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
14457
14458	// We can safely assume the conversion won't overflow the output range,
14459	// because (for example) (uint8_t)18293.f is undefined behavior.
14460
14461	// Since we can assume the conversion won't overflow, our decision as to
14462	// whether the input will fit in the float should depend on the minimum
14463	// of the input range and output range.
14464
14465	// This means this is also safe for a signed input and unsigned output, since
14466	// a negative input would lead to undefined behavior.
14467	unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
14468	unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
14469	unsigned ActualSize = std::min(InputSize, OutputSize);
14470	const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
14471
14472	// We can only fold away the float conversion if the input range can be
14473	// represented exactly in the float range.
14474	if (APFloat::semanticsPrecision(sem) >= ActualSize) {
14475	if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
14476	unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
14477	: ISD::ZERO_EXTEND;
14478	return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
14479	}
14480	if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
14481	return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
14482	return DAG.getBitcast(VT, Src);
14483	}
14484	return SDValue();
14485	}
14486
14487	SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
14488	SDValue N0 = N->getOperand(0);
14489	EVT VT = N->getValueType(0);
14490
14491	// fold (fp_to_sint undef) -> undef
14492	if (N0.isUndef())
14493	return DAG.getUNDEF(VT);
14494
14495	// fold (fp_to_sint c1fp) -> c1
14496	if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14497	return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
14498
14499	return FoldIntToFPToInt(N, DAG);
14500	}
14501
14502	SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
14503	SDValue N0 = N->getOperand(0);
14504	EVT VT = N->getValueType(0);
14505
14506	// fold (fp_to_uint undef) -> undef
14507	if (N0.isUndef())
14508	return DAG.getUNDEF(VT);
14509
14510	// fold (fp_to_uint c1fp) -> c1
14511	if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14512	return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
14513
14514	return FoldIntToFPToInt(N, DAG);
14515	}
14516
14517	SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
14518	SDValue N0 = N->getOperand(0);
14519	SDValue N1 = N->getOperand(1);
14520	ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14521	EVT VT = N->getValueType(0);
14522
14523	// fold (fp_round c1fp) -> c1fp
14524	if (N0CFP)
14525	return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
14526
14527	// fold (fp_round (fp_extend x)) -> x
14528	if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
14529	return N0.getOperand(0);
14530
14531	// fold (fp_round (fp_round x)) -> (fp_round x)
14532	if (N0.getOpcode() == ISD::FP_ROUND) {
14533	const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
14534	const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
14535
14536	// Skip this folding if it results in an fp_round from f80 to f16.
14537	//
14538	// f80 to f16 always generates an expensive (and as yet, unimplemented)
14539	// libcall to __truncxfhf2 instead of selecting native f16 conversion
14540	// instructions from f32 or f64. Moreover, the first (value-preserving)
14541	// fp_round from f80 to either f32 or f64 may become a NOP in platforms like
14542	// x86.
14543	if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
14544	return SDValue();
14545
14546	// If the first fp_round isn't a value preserving truncation, it might
14547	// introduce a tie in the second fp_round, that wouldn't occur in the
14548	// single-step fp_round we want to fold to.
14549	// In other words, double rounding isn't the same as rounding.
14550	// Also, this is a value preserving truncation iff both fp_round's are.
14551	if (DAG.getTarget().Options.UnsafeFPMath \|\| N0IsTrunc) {
14552	SDLoc DL(N);
14553	return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
14554	DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
14555	}
14556	}
14557
14558	// fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
14559	if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
14560	SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
14561	N0.getOperand(0), N1);
14562	AddToWorklist(Tmp.getNode());
14563	return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
14564	Tmp, N0.getOperand(1));
14565	}
14566
14567	if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14568	return NewVSel;
14569
14570	return SDValue();
14571	}
14572
14573	SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
14574	SDValue N0 = N->getOperand(0);
14575	EVT VT = N->getValueType(0);
14576
14577	// If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
14578	if (N->hasOneUse() &&
14579	N->use_begin()->getOpcode() == ISD::FP_ROUND)
14580	return SDValue();
14581
14582	// fold (fp_extend c1fp) -> c1fp
14583	if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14584	return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
14585
14586	// fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
14587	if (N0.getOpcode() == ISD::FP16_TO_FP &&
14588	TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
14589	return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
14590
14591	// Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
14592	// value of X.
14593	if (N0.getOpcode() == ISD::FP_ROUND
14594	&& N0.getConstantOperandVal(1) == 1) {
14595	SDValue In = N0.getOperand(0);
14596	if (In.getValueType() == VT) return In;
14597	if (VT.bitsLT(In.getValueType()))
14598	return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
14599	In, N0.getOperand(1));
14600	return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
14601	}
14602
14603	// fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
14604	if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
14605	TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
14606	LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14607	SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
14608	LN0->getChain(),
14609	LN0->getBasePtr(), N0.getValueType(),
14610	LN0->getMemOperand());
14611	CombineTo(N, ExtLoad);
14612	CombineTo(N0.getNode(),
14613	DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
14614	N0.getValueType(), ExtLoad,
14615	DAG.getIntPtrConstant(1, SDLoc(N0))),
14616	ExtLoad.getValue(1));
14617	return SDValue(N, 0); // Return N so it doesn't get rechecked!
14618	}
14619
14620	if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14621	return NewVSel;
14622
14623	return SDValue();
14624	}
14625
14626	SDValue DAGCombiner::visitFCEIL(SDNode *N) {
14627	SDValue N0 = N->getOperand(0);
14628	EVT VT = N->getValueType(0);
14629
14630	// fold (fceil c1) -> fceil(c1)
14631	if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14632	return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
14633
14634	return SDValue();
14635	}
14636
14637	SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
14638	SDValue N0 = N->getOperand(0);
14639	EVT VT = N->getValueType(0);
14640
14641	// fold (ftrunc c1) -> ftrunc(c1)
14642	if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14643	return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
14644
14645	// fold ftrunc (known rounded int x) -> x
14646	// ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
14647	// likely to be generated to extract integer from a rounded floating value.
14648	switch (N0.getOpcode()) {
14649	default: break;
14650	case ISD::FRINT:
14651	case ISD::FTRUNC:
14652	case ISD::FNEARBYINT:
14653	case ISD::FFLOOR:
14654	case ISD::FCEIL:
14655	return N0;
14656	}
14657
14658	return SDValue();
14659	}
14660
14661	SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
14662	SDValue N0 = N->getOperand(0);
14663	EVT VT = N->getValueType(0);
14664
14665	// fold (ffloor c1) -> ffloor(c1)
14666	if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14667	return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
14668
14669	return SDValue();
14670	}
14671
14672	SDValue DAGCombiner::visitFNEG(SDNode *N) {
14673	SDValue N0 = N->getOperand(0);
14674	EVT VT = N->getValueType(0);
14675	SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14676
14677	// Constant fold FNEG.
14678	if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14679	return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
14680
14681	if (SDValue NegN0 =
14682	TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
14683	return NegN0;
14684
14685	// -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
14686	// FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
14687	// know it was called from a context with a nsz flag if the input fsub does
14688	// not.
14689	if (N0.getOpcode() == ISD::FSUB &&
14690	(DAG.getTarget().Options.NoSignedZerosFPMath \|\|
14691	N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
14692	return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
14693	N0.getOperand(0));
14694	}
14695
14696	if (SDValue Cast = foldSignChangeInBitcast(N))
14697	return Cast;
14698
14699	return SDValue();
14700	}
14701
14702	static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
14703	APFloat (*Op)(const APFloat &, const APFloat &)) {
14704	SDValue N0 = N->getOperand(0);
14705	SDValue N1 = N->getOperand(1);
14706	EVT VT = N->getValueType(0);
14707	const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
14708	const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
14709	const SDNodeFlags Flags = N->getFlags();
14710	unsigned Opc = N->getOpcode();
14711	bool PropagatesNaN = Opc == ISD::FMINIMUM \|\| Opc == ISD::FMAXIMUM;
14712	bool IsMin = Opc == ISD::FMINNUM \|\| Opc == ISD::FMINIMUM;
14713	SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14714
14715	if (N0CFP && N1CFP) {
14716	const APFloat &C0 = N0CFP->getValueAPF();
14717	const APFloat &C1 = N1CFP->getValueAPF();
14718	return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
14719	}
14720
14721	// Canonicalize to constant on RHS.
14722	if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
14723	!DAG.isConstantFPBuildVectorOrConstantFP(N1))
14724	return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
14725
14726	if (N1CFP) {
14727	const APFloat &AF = N1CFP->getValueAPF();
14728
14729	// minnum(X, nan) -> X
14730	// maxnum(X, nan) -> X
14731	// minimum(X, nan) -> nan
14732	// maximum(X, nan) -> nan
14733	if (AF.isNaN())
14734	return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
14735
14736	// In the following folds, inf can be replaced with the largest finite
14737	// float, if the ninf flag is set.
14738	if (AF.isInfinity() \|\| (Flags.hasNoInfs() && AF.isLargest())) {
14739	// minnum(X, -inf) -> -inf
14740	// maxnum(X, +inf) -> +inf
14741	// minimum(X, -inf) -> -inf if nnan
14742	// maximum(X, +inf) -> +inf if nnan
14743	if (IsMin == AF.isNegative() && (!PropagatesNaN \|\| Flags.hasNoNaNs()))
14744	return N->getOperand(1);
14745
14746	// minnum(X, +inf) -> X if nnan
14747	// maxnum(X, -inf) -> X if nnan
14748	// minimum(X, +inf) -> X
14749	// maximum(X, -inf) -> X
14750	if (IsMin != AF.isNegative() && (PropagatesNaN \|\| Flags.hasNoNaNs()))
14751	return N->getOperand(0);
14752	}
14753	}
14754
14755	return SDValue();
14756	}
14757
14758	SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
14759	return visitFMinMax(DAG, N, minnum);
14760	}
14761
14762	SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
14763	return visitFMinMax(DAG, N, maxnum);
14764	}
14765
14766	SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
14767	return visitFMinMax(DAG, N, minimum);
14768	}
14769
14770	SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
14771	return visitFMinMax(DAG, N, maximum);
14772	}
14773
14774	SDValue DAGCombiner::visitFABS(SDNode *N) {
14775	SDValue N0 = N->getOperand(0);
14776	EVT VT = N->getValueType(0);
14777
14778	// fold (fabs c1) -> fabs(c1)
14779	if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14780	return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14781
14782	// fold (fabs (fabs x)) -> (fabs x)
14783	if (N0.getOpcode() == ISD::FABS)
14784	return N->getOperand(0);
14785
14786	// fold (fabs (fneg x)) -> (fabs x)
14787	// fold (fabs (fcopysign x, y)) -> (fabs x)
14788	if (N0.getOpcode() == ISD::FNEG \|\| N0.getOpcode() == ISD::FCOPYSIGN)
14789	return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
14790
14791	if (SDValue Cast = foldSignChangeInBitcast(N))
14792	return Cast;
14793
14794	return SDValue();
14795	}
14796
14797	SDValue DAGCombiner::visitBRCOND(SDNode *N) {
14798	SDValue Chain = N->getOperand(0);
14799	SDValue N1 = N->getOperand(1);
14800	SDValue N2 = N->getOperand(2);
14801
14802	// BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
14803	// nondeterministic jumps).
14804	if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
14805	return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
14806	N1->getOperand(0), N2);
14807	}
14808
14809	// If N is a constant we could fold this into a fallthrough or unconditional
14810	// branch. However that doesn't happen very often in normal code, because
14811	// Instcombine/SimplifyCFG should have handled the available opportunities.
14812	// If we did this folding here, it would be necessary to update the
14813	// MachineBasicBlock CFG, which is awkward.
14814
14815	// fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
14816	// on the target.
14817	if (N1.getOpcode() == ISD::SETCC &&
14818	TLI.isOperationLegalOrCustom(ISD::BR_CC,
14819	N1.getOperand(0).getValueType())) {
14820	return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
14821	Chain, N1.getOperand(2),
14822	N1.getOperand(0), N1.getOperand(1), N2);
14823	}
14824
14825	if (N1.hasOneUse()) {
14826	// rebuildSetCC calls visitXor which may change the Chain when there is a
14827	// STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
14828	HandleSDNode ChainHandle(Chain);
14829	if (SDValue NewN1 = rebuildSetCC(N1))
14830	return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
14831	ChainHandle.getValue(), NewN1, N2);
14832	}
14833
14834	return SDValue();
14835	}
14836
14837	SDValue DAGCombiner::rebuildSetCC(SDValue N) {
14838	if (N.getOpcode() == ISD::SRL \|\|
14839	(N.getOpcode() == ISD::TRUNCATE &&
14840	(N.getOperand(0).hasOneUse() &&
14841	N.getOperand(0).getOpcode() == ISD::SRL))) {
14842	// Look pass the truncate.
14843	if (N.getOpcode() == ISD::TRUNCATE)
14844	N = N.getOperand(0);
14845
14846	// Match this pattern so that we can generate simpler code:
14847	//
14848	// %a = ...
14849	// %b = and i32 %a, 2
14850	// %c = srl i32 %b, 1
14851	// brcond i32 %c ...
14852	//
14853	// into
14854	//
14855	// %a = ...
14856	// %b = and i32 %a, 2
14857	// %c = setcc eq %b, 0
14858	// brcond %c ...
14859	//
14860	// This applies only when the AND constant value has one bit set and the
14861	// SRL constant is equal to the log2 of the AND constant. The back-end is
14862	// smart enough to convert the result into a TEST/JMP sequence.
14863	SDValue Op0 = N.getOperand(0);
14864	SDValue Op1 = N.getOperand(1);
14865
14866	if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
14867	SDValue AndOp1 = Op0.getOperand(1);
14868
14869	if (AndOp1.getOpcode() == ISD::Constant) {
14870	const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
14871
14872	if (AndConst.isPowerOf2() &&
14873	cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
14874	SDLoc DL(N);
14875	return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
14876	Op0, DAG.getConstant(0, DL, Op0.getValueType()),
14877	ISD::SETNE);
14878	}
14879	}
14880	}
14881	}
14882
14883	// Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
14884	// Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
14885	if (N.getOpcode() == ISD::XOR) {
14886	// Because we may call this on a speculatively constructed
14887	// SimplifiedSetCC Node, we need to simplify this node first.
14888	// Ideally this should be folded into SimplifySetCC and not
14889	// here. For now, grab a handle to N so we don't lose it from
14890	// replacements interal to the visit.
14891	HandleSDNode XORHandle(N);
14892	while (N.getOpcode() == ISD::XOR) {
14893	SDValue Tmp = visitXOR(N.getNode());
14894	// No simplification done.
14895	if (!Tmp.getNode())
14896	break;
14897	// Returning N is form in-visit replacement that may invalidated
14898	// N. Grab value from Handle.
14899	if (Tmp.getNode() == N.getNode())
14900	N = XORHandle.getValue();
14901	else // Node simplified. Try simplifying again.
14902	N = Tmp;
14903	}
14904
14905	if (N.getOpcode() != ISD::XOR)
14906	return N;
14907
14908	SDValue Op0 = N->getOperand(0);
14909	SDValue Op1 = N->getOperand(1);
14910
14911	if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
14912	bool Equal = false;
14913	// (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
14914	if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
14915	Op0.getValueType() == MVT::i1) {
14916	N = Op0;
14917	Op0 = N->getOperand(0);
14918	Op1 = N->getOperand(1);
14919	Equal = true;
14920	}
14921
14922	EVT SetCCVT = N.getValueType();
14923	if (LegalTypes)
14924	SetCCVT = getSetCCResultType(SetCCVT);
14925	// Replace the uses of XOR with SETCC
14926	return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
14927	Equal ? ISD::SETEQ : ISD::SETNE);
14928	}
14929	}
14930
14931	return SDValue();
14932	}
14933
14934	// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
14935	//
14936	SDValue DAGCombiner::visitBR_CC(SDNode *N) {
14937	CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
14938	SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
14939
14940	// If N is a constant we could fold this into a fallthrough or unconditional
14941	// branch. However that doesn't happen very often in normal code, because
14942	// Instcombine/SimplifyCFG should have handled the available opportunities.
14943	// If we did this folding here, it would be necessary to update the
14944	// MachineBasicBlock CFG, which is awkward.
14945
14946	// Use SimplifySetCC to simplify SETCC's.
14947	SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
14948	CondLHS, CondRHS, CC->get(), SDLoc(N),
14949	false);
14950	if (Simp.getNode()) AddToWorklist(Simp.getNode());
14951
14952	// fold to a simpler setcc
14953	if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
14954	return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
14955	N->getOperand(0), Simp.getOperand(2),
14956	Simp.getOperand(0), Simp.getOperand(1),
14957	N->getOperand(4));
14958
14959	return SDValue();
14960	}
14961
14962	static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
14963	bool &IsLoad, bool &IsMasked, SDValue &Ptr,
14964	const TargetLowering &TLI) {
14965	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
14966	if (LD->isIndexed())
14967	return false;
14968	EVT VT = LD->getMemoryVT();
14969	if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
14970	return false;
14971	Ptr = LD->getBasePtr();
14972	} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
14973	if (ST->isIndexed())
14974	return false;
14975	EVT VT = ST->getMemoryVT();
14976	if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
14977	return false;
14978	Ptr = ST->getBasePtr();
14979	IsLoad = false;
14980	} else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
14981	if (LD->isIndexed())
14982	return false;
14983	EVT VT = LD->getMemoryVT();
14984	if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
14985	!TLI.isIndexedMaskedLoadLegal(Dec, VT))
14986	return false;
14987	Ptr = LD->getBasePtr();
14988	IsMasked = true;
14989	} else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
14990	if (ST->isIndexed())
14991	return false;
14992	EVT VT = ST->getMemoryVT();
14993	if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
14994	!TLI.isIndexedMaskedStoreLegal(Dec, VT))
14995	return false;
14996	Ptr = ST->getBasePtr();
14997	IsLoad = false;
14998	IsMasked = true;
14999	} else {
15000	return false;
15001	}
15002	return true;
15003	}
15004
15005	/// Try turning a load/store into a pre-indexed load/store when the base
15006	/// pointer is an add or subtract and it has other uses besides the load/store.
15007	/// After the transformation, the new indexed load/store has effectively folded
15008	/// the add/subtract in and all of its other uses are redirected to the
15009	/// new load/store.
15010	bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
15011	if (Level < AfterLegalizeDAG)
15012	return false;
15013
15014	bool IsLoad = true;
15015	bool IsMasked = false;
15016	SDValue Ptr;
15017	if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
15018	Ptr, TLI))
15019	return false;
15020
15021	// If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
15022	// out. There is no reason to make this a preinc/predec.
15023	if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) \|\|
15024	Ptr.getNode()->hasOneUse())
15025	return false;
15026
15027	// Ask the target to do addressing mode selection.
15028	SDValue BasePtr;
15029	SDValue Offset;
15030	ISD::MemIndexedMode AM = ISD::UNINDEXED;
15031	if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
15032	return false;
15033
15034	// Backends without true r+i pre-indexed forms may need to pass a
15035	// constant base with a variable offset so that constant coercion
15036	// will work with the patterns in canonical form.
15037	bool Swapped = false;
15038	if (isa<ConstantSDNode>(BasePtr)) {
15039	std::swap(BasePtr, Offset);
15040	Swapped = true;
15041	}
15042
15043	// Don't create a indexed load / store with zero offset.
15044	if (isNullConstant(Offset))
15045	return false;
15046
15047	// Try turning it into a pre-indexed load / store except when:
15048	// 1) The new base ptr is a frame index.
15049	// 2) If N is a store and the new base ptr is either the same as or is a
15050	// predecessor of the value being stored.
15051	// 3) Another use of old base ptr is a predecessor of N. If ptr is folded
15052	// that would create a cycle.
15053	// 4) All uses are load / store ops that use it as old base ptr.
15054
15055	// Check #1. Preinc'ing a frame index would require copying the stack pointer
15056	// (plus the implicit offset) to a register to preinc anyway.
15057	if (isa<FrameIndexSDNode>(BasePtr) \|\| isa<RegisterSDNode>(BasePtr))
15058	return false;
15059
15060	// Check #2.
15061	if (!IsLoad) {
15062	SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
15063	: cast<StoreSDNode>(N)->getValue();
15064
15065	// Would require a copy.
15066	if (Val == BasePtr)
15067	return false;
15068
15069	// Would create a cycle.
15070	if (Val == Ptr \|\| Ptr->isPredecessorOf(Val.getNode()))
15071	return false;
15072	}
15073
15074	// Caches for hasPredecessorHelper.
15075	SmallPtrSet<const SDNode *, 32> Visited;
15076	SmallVector<const SDNode *, 16> Worklist;
15077	Worklist.push_back(N);
15078
15079	// If the offset is a constant, there may be other adds of constants that
15080	// can be folded with this one. We should do this to avoid having to keep
15081	// a copy of the original base pointer.
15082	SmallVector<SDNode *, 16> OtherUses;
15083	if (isa<ConstantSDNode>(Offset))
15084	for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
15085	UE = BasePtr.getNode()->use_end();
15086	UI != UE; ++UI) {
15087	SDUse &Use = UI.getUse();
15088	// Skip the use that is Ptr and uses of other results from BasePtr's
15089	// node (important for nodes that return multiple results).
15090	if (Use.getUser() == Ptr.getNode() \|\| Use != BasePtr)
15091	continue;
15092
15093	if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
15094	continue;
15095
15096	if (Use.getUser()->getOpcode() != ISD::ADD &&
15097	Use.getUser()->getOpcode() != ISD::SUB) {
15098	OtherUses.clear();
15099	break;
15100	}
15101
15102	SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
15103	if (!isa<ConstantSDNode>(Op1)) {
15104	OtherUses.clear();
15105	break;
15106	}
15107
15108	// FIXME: In some cases, we can be smarter about this.
15109	if (Op1.getValueType() != Offset.getValueType()) {
15110	OtherUses.clear();
15111	break;
15112	}
15113
15114	OtherUses.push_back(Use.getUser());
15115	}
15116
15117	if (Swapped)
15118	std::swap(BasePtr, Offset);
15119
15120	// Now check for #3 and #4.
15121	bool RealUse = false;
15122
15123	for (SDNode *Use : Ptr.getNode()->uses()) {
15124	if (Use == N)
15125	continue;
15126	if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
15127	return false;
15128
15129	// If Ptr may be folded in addressing mode of other use, then it's
15130	// not profitable to do this transformation.
15131	if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
15132	RealUse = true;
15133	}
15134
15135	if (!RealUse)
15136	return false;
15137
15138	SDValue Result;
15139	if (!IsMasked) {
15140	if (IsLoad)
15141	Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
15142	else
15143	Result =
15144	DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
15145	} else {
15146	if (IsLoad)
15147	Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
15148	Offset, AM);
15149	else
15150	Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
15151	Offset, AM);
15152	}
15153	++PreIndexedNodes;
15154	++NodesCombined;
15155	LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << "\nReplacing.4 "; N->dump (&DAG); dbgs() << "\nWith: "; Result.getNode()-> dump(&DAG); dbgs() << '\n'; } } while (false)
15156	Result.getNode()->dump(&DAG); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << "\nReplacing.4 "; N->dump (&DAG); dbgs() << "\nWith: "; Result.getNode()-> dump(&DAG); dbgs() << '\n'; } } while (false);
15157	WorklistRemover DeadNodes(*this);
15158	if (IsLoad) {
15159	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
15160	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
15161	} else {
15162	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
15163	}
15164
15165	// Finally, since the node is now dead, remove it from the graph.
15166	deleteAndRecombine(N);
15167
15168	if (Swapped)
15169	std::swap(BasePtr, Offset);
15170
15171	// Replace other uses of BasePtr that can be updated to use Ptr
15172	for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
15173	unsigned OffsetIdx = 1;
15174	if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
15175	OffsetIdx = 0;
15176	assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==((OtherUses[i]->getOperand(!OffsetIdx).getNode() == BasePtr .getNode() && "Expected BasePtr operand") ? static_cast <void> (0) : __assert_fail ("OtherUses[i]->getOperand(!OffsetIdx).getNode() == BasePtr.getNode() && \"Expected BasePtr operand\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 15177, __PRETTY_FUNCTION__))
15177	BasePtr.getNode() && "Expected BasePtr operand")((OtherUses[i]->getOperand(!OffsetIdx).getNode() == BasePtr .getNode() && "Expected BasePtr operand") ? static_cast <void> (0) : __assert_fail ("OtherUses[i]->getOperand(!OffsetIdx).getNode() == BasePtr.getNode() && \"Expected BasePtr operand\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 15177, __PRETTY_FUNCTION__));
15178
15179	// We need to replace ptr0 in the following expression:
15180	// x0 * offset0 + y0 * ptr0 = t0
15181	// knowing that
15182	// x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
15183	//
15184	// where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
15185	// indexed load/store and the expression that needs to be re-written.
15186	//
15187	// Therefore, we have:
15188	// t0 = (x0 * offset0 - x1 * y0 * y1 offset1) + (y0 y1) * t1
15189
15190	auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
15191	const APInt &Offset0 = CN->getAPIntValue();
15192	const APInt &Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
15193	int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
15194	int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
15195	int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
15196	int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
15197
15198	unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
15199
15200	APInt CNV = Offset0;
15201	if (X0 < 0) CNV = -CNV;
15202	if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
15203	else CNV = CNV - Offset1;
15204
15205	SDLoc DL(OtherUses[i]);
15206
15207	// We can now generate the new expression.
15208	SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
15209	SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
15210
15211	SDValue NewUse = DAG.getNode(Opcode,
15212	DL,
15213	OtherUses[i]->getValueType(0), NewOp1, NewOp2);
15214	DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
15215	deleteAndRecombine(OtherUses[i]);
15216	}
15217
15218	// Replace the uses of Ptr with uses of the updated base value.
15219	DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
15220	deleteAndRecombine(Ptr.getNode());
15221	AddToWorklist(Result.getNode());
15222
15223	return true;
15224	}
15225
15226	static bool shouldCombineToPostInc(SDNode N, SDValue Ptr, SDNode PtrUse,
15227	SDValue &BasePtr, SDValue &Offset,
15228	ISD::MemIndexedMode &AM,
15229	SelectionDAG &DAG,
15230	const TargetLowering &TLI) {
15231	if (PtrUse == N \|\|
15232	(PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
15233	return false;
15234
15235	if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
15236	return false;
15237
15238	// Don't create a indexed load / store with zero offset.
15239	if (isNullConstant(Offset))
15240	return false;
15241
15242	if (isa<FrameIndexSDNode>(BasePtr) \|\| isa<RegisterSDNode>(BasePtr))
15243	return false;
15244
15245	SmallPtrSet<const SDNode *, 32> Visited;
15246	for (SDNode *Use : BasePtr.getNode()->uses()) {
15247	if (Use == Ptr.getNode())
15248	continue;
15249
15250	// No if there's a later user which could perform the index instead.
15251	if (isa<MemSDNode>(Use)) {
15252	bool IsLoad = true;
15253	bool IsMasked = false;
15254	SDValue OtherPtr;
15255	if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad,
15256	IsMasked, OtherPtr, TLI)) {
15257	SmallVector<const SDNode *, 2> Worklist;
15258	Worklist.push_back(Use);
15259	if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
15260	return false;
15261	}
15262	}
15263
15264	// If all the uses are load / store addresses, then don't do the
15265	// transformation.
15266	if (Use->getOpcode() == ISD::ADD \|\| Use->getOpcode() == ISD::SUB) {
15267	for (SDNode *UseUse : Use->uses())
15268	if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
15269	return false;
15270	}
15271	}
15272	return true;
15273	}
15274
15275	static SDNode getPostIndexedLoadStoreOp(SDNode N, bool &IsLoad,
15276	bool &IsMasked, SDValue &Ptr,
15277	SDValue &BasePtr, SDValue &Offset,
15278	ISD::MemIndexedMode &AM,
15279	SelectionDAG &DAG,
15280	const TargetLowering &TLI) {
15281	if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
15282	IsMasked, Ptr, TLI) \|\|
15283	Ptr.getNode()->hasOneUse())
15284	return nullptr;
15285
15286	// Try turning it into a post-indexed load / store except when
15287	// 1) All uses are load / store ops that use it as base ptr (and
15288	// it may be folded as addressing mmode).
15289	// 2) Op must be independent of N, i.e. Op is neither a predecessor
15290	// nor a successor of N. Otherwise, if Op is folded that would
15291	// create a cycle.
15292	for (SDNode *Op : Ptr->uses()) {
15293	// Check for #1.
15294	if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
15295	continue;
15296
15297	// Check for #2.
15298	SmallPtrSet<const SDNode *, 32> Visited;
15299	SmallVector<const SDNode *, 8> Worklist;
15300	// Ptr is predecessor to both N and Op.
15301	Visited.insert(Ptr.getNode());
15302	Worklist.push_back(N);
15303	Worklist.push_back(Op);
15304	if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
15305	!SDNode::hasPredecessorHelper(Op, Visited, Worklist))
15306	return Op;
15307	}
15308	return nullptr;
15309	}
15310
15311	/// Try to combine a load/store with a add/sub of the base pointer node into a
15312	/// post-indexed load/store. The transformation folded the add/subtract into the
15313	/// new indexed load/store effectively and all of its uses are redirected to the
15314	/// new load/store.
15315	bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
15316	if (Level < AfterLegalizeDAG)
15317	return false;
15318
15319	bool IsLoad = true;
15320	bool IsMasked = false;
15321	SDValue Ptr;
15322	SDValue BasePtr;
15323	SDValue Offset;
15324	ISD::MemIndexedMode AM = ISD::UNINDEXED;
15325	SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
15326	Offset, AM, DAG, TLI);
15327	if (!Op)
15328	return false;
15329
15330	SDValue Result;
15331	if (!IsMasked)
15332	Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
15333	Offset, AM)
15334	: DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
15335	BasePtr, Offset, AM);
15336	else
15337	Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
15338	BasePtr, Offset, AM)
15339	: DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
15340	BasePtr, Offset, AM);
15341	++PostIndexedNodes;
15342	++NodesCombined;
15343	LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << "\nReplacing.5 "; N->dump (&DAG); dbgs() << "\nWith: "; Result.getNode()-> dump(&DAG); dbgs() << '\n'; } } while (false)
15344	dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << "\nReplacing.5 "; N->dump (&DAG); dbgs() << "\nWith: "; Result.getNode()-> dump(&DAG); dbgs() << '\n'; } } while (false)
15345	dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << "\nReplacing.5 "; N->dump (&DAG); dbgs() << "\nWith: "; Result.getNode()-> dump(&DAG); dbgs() << '\n'; } } while (false);
15346	WorklistRemover DeadNodes(*this);
15347	if (IsLoad) {
15348	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
15349	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
15350	} else {
15351	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
15352	}
15353
15354	// Finally, since the node is now dead, remove it from the graph.
15355	deleteAndRecombine(N);
15356
15357	// Replace the uses of Use with uses of the updated base value.
15358	DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
15359	Result.getValue(IsLoad ? 1 : 0));
15360	deleteAndRecombine(Op);
15361	return true;
15362	}
15363
15364	/// Return the base-pointer arithmetic from an indexed \p LD.
15365	SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
15366	ISD::MemIndexedMode AM = LD->getAddressingMode();
15367	assert(AM != ISD::UNINDEXED)((AM != ISD::UNINDEXED) ? static_cast<void> (0) : __assert_fail ("AM != ISD::UNINDEXED", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 15367, __PRETTY_FUNCTION__));
15368	SDValue BP = LD->getOperand(1);
15369	SDValue Inc = LD->getOperand(2);
15370
15371	// Some backends use TargetConstants for load offsets, but don't expect
15372	// TargetConstants in general ADD nodes. We can convert these constants into
15373	// regular Constants (if the constant is not opaque).
15374	assert((Inc.getOpcode() != ISD::TargetConstant \|\|(((Inc.getOpcode() != ISD::TargetConstant \|\| !cast<ConstantSDNode >(Inc)->isOpaque()) && "Cannot split out indexing using opaque target constants" ) ? static_cast<void> (0) : __assert_fail ("(Inc.getOpcode() != ISD::TargetConstant \|\| !cast<ConstantSDNode>(Inc)->isOpaque()) && \"Cannot split out indexing using opaque target constants\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 15376, __PRETTY_FUNCTION__))
15375	!cast<ConstantSDNode>(Inc)->isOpaque()) &&(((Inc.getOpcode() != ISD::TargetConstant \|\| !cast<ConstantSDNode >(Inc)->isOpaque()) && "Cannot split out indexing using opaque target constants" ) ? static_cast<void> (0) : __assert_fail ("(Inc.getOpcode() != ISD::TargetConstant \|\| !cast<ConstantSDNode>(Inc)->isOpaque()) && \"Cannot split out indexing using opaque target constants\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 15376, __PRETTY_FUNCTION__))
15376	"Cannot split out indexing using opaque target constants")(((Inc.getOpcode() != ISD::TargetConstant \|\| !cast<ConstantSDNode >(Inc)->isOpaque()) && "Cannot split out indexing using opaque target constants" ) ? static_cast<void> (0) : __assert_fail ("(Inc.getOpcode() != ISD::TargetConstant \|\| !cast<ConstantSDNode>(Inc)->isOpaque()) && \"Cannot split out indexing using opaque target constants\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 15376, __PRETTY_FUNCTION__));
15377	if (Inc.getOpcode() == ISD::TargetConstant) {
15378	ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
15379	Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
15380	ConstInc->getValueType(0));
15381	}
15382
15383	unsigned Opc =
15384	(AM == ISD::PRE_INC \|\| AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
15385	return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
15386	}
15387
15388	static inline ElementCount numVectorEltsOrZero(EVT T) {
15389	return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
15390	}
15391
15392	bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
15393	Val = ST->getValue();
15394	EVT STType = Val.getValueType();
15395	EVT STMemType = ST->getMemoryVT();
15396	if (STType == STMemType)
15397	return true;
15398	if (isTypeLegal(STMemType))
15399	return false; // fail.
15400	if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
15401	TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
15402	Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
15403	return true;
15404	}
15405	if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
15406	STType.isInteger() && STMemType.isInteger()) {
15407	Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
15408	return true;
15409	}
15410	if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
15411	Val = DAG.getBitcast(STMemType, Val);
15412	return true;
15413	}
15414	return false; // fail.
15415	}
15416
15417	bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
15418	EVT LDMemType = LD->getMemoryVT();
15419	EVT LDType = LD->getValueType(0);
15420	assert(Val.getValueType() == LDMemType &&((Val.getValueType() == LDMemType && "Attempting to extend value of non-matching type" ) ? static_cast<void> (0) : __assert_fail ("Val.getValueType() == LDMemType && \"Attempting to extend value of non-matching type\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 15421, __PRETTY_FUNCTION__))
15421	"Attempting to extend value of non-matching type")((Val.getValueType() == LDMemType && "Attempting to extend value of non-matching type" ) ? static_cast<void> (0) : __assert_fail ("Val.getValueType() == LDMemType && \"Attempting to extend value of non-matching type\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 15421, __PRETTY_FUNCTION__));
15422	if (LDType == LDMemType)
15423	return true;
15424	if (LDMemType.isInteger() && LDType.isInteger()) {
15425	switch (LD->getExtensionType()) {
15426	case ISD::NON_EXTLOAD:
15427	Val = DAG.getBitcast(LDType, Val);
15428	return true;
15429	case ISD::EXTLOAD:
15430	Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
15431	return true;
15432	case ISD::SEXTLOAD:
15433	Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
15434	return true;
15435	case ISD::ZEXTLOAD:
15436	Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
15437	return true;
15438	}
15439	}
15440	return false;
15441	}
15442
15443	SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
15444	if (OptLevel == CodeGenOpt::None \|\| !LD->isSimple())
15445	return SDValue();
15446	SDValue Chain = LD->getOperand(0);
15447	StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
15448	// TODO: Relax this restriction for unordered atomics (see D66309)
15449	if (!ST \|\| !ST->isSimple())
15450	return SDValue();
15451
15452	EVT LDType = LD->getValueType(0);
15453	EVT LDMemType = LD->getMemoryVT();
15454	EVT STMemType = ST->getMemoryVT();
15455	EVT STType = ST->getValue().getValueType();
15456
15457	// There are two cases to consider here:
15458	// 1. The store is fixed width and the load is scalable. In this case we
15459	// don't know at compile time if the store completely envelops the load
15460	// so we abandon the optimisation.
15461	// 2. The store is scalable and the load is fixed width. We could
15462	// potentially support a limited number of cases here, but there has been
15463	// no cost-benefit analysis to prove it's worth it.
15464	bool LdStScalable = LDMemType.isScalableVector();
15465	if (LdStScalable != STMemType.isScalableVector())
15466	return SDValue();
15467
15468	// If we are dealing with scalable vectors on a big endian platform the
15469	// calculation of offsets below becomes trickier, since we do not know at
15470	// compile time the absolute size of the vector. Until we've done more
15471	// analysis on big-endian platforms it seems better to bail out for now.
15472	if (LdStScalable && DAG.getDataLayout().isBigEndian())
15473	return SDValue();
15474
15475	BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
15476	BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
15477	int64_t Offset;
15478	if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
15479	return SDValue();
15480
15481	// Normalize for Endianness. After this Offset=0 will denote that the least
15482	// significant bit in the loaded value maps to the least significant bit in
15483	// the stored value). With Offset=n (for n > 0) the loaded value starts at the
15484	// n:th least significant byte of the stored value.
15485	if (DAG.getDataLayout().isBigEndian())
15486	Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() -
15487	(int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) /
15488	8 -
15489	Offset;
15490
15491	// Check that the stored value cover all bits that are loaded.
15492	bool STCoversLD;
15493
15494	TypeSize LdMemSize = LDMemType.getSizeInBits();
15495	TypeSize StMemSize = STMemType.getSizeInBits();
15496	if (LdStScalable)
15497	STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
15498	else
15499	STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <=
15500	StMemSize.getFixedSize());
15501
15502	auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
15503	if (LD->isIndexed()) {
15504	// Cannot handle opaque target constants and we must respect the user's
15505	// request not to split indexes from loads.
15506	if (!canSplitIdx(LD))
15507	return SDValue();
15508	SDValue Idx = SplitIndexingFromLoad(LD);
15509	SDValue Ops[] = {Val, Idx, Chain};
15510	return CombineTo(LD, Ops, 3);
15511	}
15512	return CombineTo(LD, Val, Chain);
15513	};
15514
15515	if (!STCoversLD)
15516	return SDValue();
15517
15518	// Memory as copy space (potentially masked).
15519	if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
15520	// Simple case: Direct non-truncating forwarding
15521	if (LDType.getSizeInBits() == LdMemSize)
15522	return ReplaceLd(LD, ST->getValue(), Chain);
15523	// Can we model the truncate and extension with an and mask?
15524	if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
15525	!LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
15526	// Mask to size of LDMemType
15527	auto Mask =
15528	DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(),
15529	StMemSize.getFixedSize()),
15530	SDLoc(ST), STType);
15531	auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
15532	return ReplaceLd(LD, Val, Chain);
15533	}
15534	}
15535
15536	// TODO: Deal with nonzero offset.
15537	if (LD->getBasePtr().isUndef() \|\| Offset != 0)
15538	return SDValue();
15539	// Model necessary truncations / extenstions.
15540	SDValue Val;
15541	// Truncate Value To Stored Memory Size.
15542	do {
15543	if (!getTruncatedStoreValue(ST, Val))
15544	continue;
15545	if (!isTypeLegal(LDMemType))
15546	continue;
15547	if (STMemType != LDMemType) {
15548	// TODO: Support vectors? This requires extract_subvector/bitcast.
15549	if (!STMemType.isVector() && !LDMemType.isVector() &&
15550	STMemType.isInteger() && LDMemType.isInteger())
15551	Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
15552	else
15553	continue;
15554	}
15555	if (!extendLoadedValueToExtension(LD, Val))
15556	continue;
15557	return ReplaceLd(LD, Val, Chain);
15558	} while (false);
15559
15560	// On failure, cleanup dead nodes we may have created.
15561	if (Val->use_empty())
15562	deleteAndRecombine(Val.getNode());
15563	return SDValue();
15564	}
15565
15566	SDValue DAGCombiner::visitLOAD(SDNode *N) {
15567	LoadSDNode *LD = cast<LoadSDNode>(N);
15568	SDValue Chain = LD->getChain();
15569	SDValue Ptr = LD->getBasePtr();
15570
15571	// If load is not volatile and there are no uses of the loaded value (and
15572	// the updated indexed value in case of indexed loads), change uses of the
15573	// chain value into uses of the chain input (i.e. delete the dead load).
15574	// TODO: Allow this for unordered atomics (see D66309)
15575	if (LD->isSimple()) {
15576	if (N->getValueType(1) == MVT::Other) {
15577	// Unindexed loads.
15578	if (!N->hasAnyUseOfValue(0)) {
15579	// It's not safe to use the two value CombineTo variant here. e.g.
15580	// v1, chain2 = load chain1, loc
15581	// v2, chain3 = load chain2, loc
15582	// v3 = add v2, c
15583	// Now we replace use of chain2 with chain1. This makes the second load
15584	// isomorphic to the one we are deleting, and thus makes this load live.
15585	LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << "\nReplacing.6 "; N->dump (&DAG); dbgs() << "\nWith chain: "; Chain.getNode() ->dump(&DAG); dbgs() << "\n"; } } while (false)
15586	dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << "\nReplacing.6 "; N->dump (&DAG); dbgs() << "\nWith chain: "; Chain.getNode() ->dump(&DAG); dbgs() << "\n"; } } while (false)
15587	dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << "\nReplacing.6 "; N->dump (&DAG); dbgs() << "\nWith chain: "; Chain.getNode() ->dump(&DAG); dbgs() << "\n"; } } while (false);
15588	WorklistRemover DeadNodes(*this);
15589	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
15590	AddUsersToWorklist(Chain.getNode());
15591	if (N->use_empty())
15592	deleteAndRecombine(N);
15593
15594	return SDValue(N, 0); // Return N so it doesn't get rechecked!
15595	}
15596	} else {
15597	// Indexed loads.
15598	assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?")((N->getValueType(2) == MVT::Other && "Malformed indexed loads?" ) ? static_cast<void> (0) : __assert_fail ("N->getValueType(2) == MVT::Other && \"Malformed indexed loads?\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 15598, __PRETTY_FUNCTION__));
15599
15600	// If this load has an opaque TargetConstant offset, then we cannot split
15601	// the indexing into an add/sub directly (that TargetConstant may not be
15602	// valid for a different type of node, and we cannot convert an opaque
15603	// target constant into a regular constant).
15604	bool CanSplitIdx = canSplitIdx(LD);
15605
15606	if (!N->hasAnyUseOfValue(0) && (CanSplitIdx \|\| !N->hasAnyUseOfValue(1))) {
15607	SDValue Undef = DAG.getUNDEF(N->getValueType(0));
15608	SDValue Index;
15609	if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
15610	Index = SplitIndexingFromLoad(LD);
15611	// Try to fold the base pointer arithmetic into subsequent loads and
15612	// stores.
15613	AddUsersToWorklist(N);
15614	} else
15615	Index = DAG.getUNDEF(N->getValueType(1));
15616	LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << "\nReplacing.7 "; N->dump (&DAG); dbgs() << "\nWith: "; Undef.getNode()->dump (&DAG); dbgs() << " and 2 other values\n"; } } while (false)
15617	dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << "\nReplacing.7 "; N->dump (&DAG); dbgs() << "\nWith: "; Undef.getNode()->dump (&DAG); dbgs() << " and 2 other values\n"; } } while (false)
15618	dbgs() << " and 2 other values\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << "\nReplacing.7 "; N->dump (&DAG); dbgs() << "\nWith: "; Undef.getNode()->dump (&DAG); dbgs() << " and 2 other values\n"; } } while (false);
15619	WorklistRemover DeadNodes(*this);
15620	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
15621	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
15622	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
15623	deleteAndRecombine(N);
15624	return SDValue(N, 0); // Return N so it doesn't get rechecked!
15625	}
15626	}
15627	}
15628
15629	// If this load is directly stored, replace the load value with the stored
15630	// value.
15631	if (auto V = ForwardStoreValueToDirectLoad(LD))
15632	return V;
15633
15634	// Try to infer better alignment information than the load already has.
15635	if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
15636	if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
15637	if (*Alignment > LD->getAlign() &&
15638	isAligned(*Alignment, LD->getSrcValueOffset())) {
15639	SDValue NewLoad = DAG.getExtLoad(
15640	LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
15641	LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
15642	LD->getMemOperand()->getFlags(), LD->getAAInfo());
15643	// NewLoad will always be N as we are only refining the alignment
15644	assert(NewLoad.getNode() == N)((NewLoad.getNode() == N) ? static_cast<void> (0) : __assert_fail ("NewLoad.getNode() == N", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 15644, __PRETTY_FUNCTION__));
15645	(void)NewLoad;
15646	}
15647	}
15648	}
15649
15650	if (LD->isUnindexed()) {
15651	// Walk up chain skipping non-aliasing memory nodes.
15652	SDValue BetterChain = FindBetterChain(LD, Chain);
15653
15654	// If there is a better chain.
15655	if (Chain != BetterChain) {
15656	SDValue ReplLoad;
15657
15658	// Replace the chain to void dependency.
15659	if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
15660	ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
15661	BetterChain, Ptr, LD->getMemOperand());
15662	} else {
15663	ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
15664	LD->getValueType(0),
15665	BetterChain, Ptr, LD->getMemoryVT(),
15666	LD->getMemOperand());
15667	}
15668
15669	// Create token factor to keep old chain connected.
15670	SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
15671	MVT::Other, Chain, ReplLoad.getValue(1));
15672
15673	// Replace uses with load result and token factor
15674	return CombineTo(N, ReplLoad.getValue(0), Token);
15675	}
15676	}
15677
15678	// Try transforming N to an indexed load.
15679	if (CombineToPreIndexedLoadStore(N) \|\| CombineToPostIndexedLoadStore(N))
15680	return SDValue(N, 0);
15681
15682	// Try to slice up N to more direct loads if the slices are mapped to
15683	// different register banks or pairing can take place.
15684	if (SliceUpLoad(N))
15685	return SDValue(N, 0);
15686
15687	return SDValue();
15688	}
15689
15690	namespace {
15691
15692	/// Helper structure used to slice a load in smaller loads.
15693	/// Basically a slice is obtained from the following sequence:
15694	/// Origin = load Ty1, Base
15695	/// Shift = srl Ty1 Origin, CstTy Amount
15696	/// Inst = trunc Shift to Ty2
15697	///
15698	/// Then, it will be rewritten into:
15699	/// Slice = load SliceTy, Base + SliceOffset
15700	/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
15701	///
15702	/// SliceTy is deduced from the number of bits that are actually used to
15703	/// build Inst.
15704	struct LoadedSlice {
15705	/// Helper structure used to compute the cost of a slice.
15706	struct Cost {
15707	/// Are we optimizing for code size.
15708	bool ForCodeSize = false;
15709
15710	/// Various cost.
15711	unsigned Loads = 0;
15712	unsigned Truncates = 0;
15713	unsigned CrossRegisterBanksCopies = 0;
15714	unsigned ZExts = 0;
15715	unsigned Shift = 0;
15716
15717	explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
15718
15719	/// Get the cost of one isolated slice.
15720	Cost(const LoadedSlice &LS, bool ForCodeSize)
15721	: ForCodeSize(ForCodeSize), Loads(1) {
15722	EVT TruncType = LS.Inst->getValueType(0);
15723	EVT LoadedType = LS.getLoadedType();
15724	if (TruncType != LoadedType &&
15725	!LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
15726	ZExts = 1;
15727	}
15728
15729	/// Account for slicing gain in the current cost.
15730	/// Slicing provide a few gains like removing a shift or a
15731	/// truncate. This method allows to grow the cost of the original
15732	/// load with the gain from this slice.
15733	void addSliceGain(const LoadedSlice &LS) {
15734	// Each slice saves a truncate.
15735	const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
15736	if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
15737	LS.Inst->getValueType(0)))
15738	++Truncates;
15739	// If there is a shift amount, this slice gets rid of it.
15740	if (LS.Shift)
15741	++Shift;
15742	// If this slice can merge a cross register bank copy, account for it.
15743	if (LS.canMergeExpensiveCrossRegisterBankCopy())
15744	++CrossRegisterBanksCopies;
15745	}
15746
15747	Cost &operator+=(const Cost &RHS) {
15748	Loads += RHS.Loads;
15749	Truncates += RHS.Truncates;
15750	CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
15751	ZExts += RHS.ZExts;
15752	Shift += RHS.Shift;
15753	return *this;
15754	}
15755
15756	bool operator==(const Cost &RHS) const {
15757	return Loads == RHS.Loads && Truncates == RHS.Truncates &&
15758	CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
15759	ZExts == RHS.ZExts && Shift == RHS.Shift;
15760	}
15761
15762	bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
15763
15764	bool operator<(const Cost &RHS) const {
15765	// Assume cross register banks copies are as expensive as loads.
15766	// FIXME: Do we want some more target hooks?
15767	unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
15768	unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
15769	// Unless we are optimizing for code size, consider the
15770	// expensive operation first.
15771	if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
15772	return ExpensiveOpsLHS < ExpensiveOpsRHS;
15773	return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
15774	(RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
15775	}
15776
15777	bool operator>(const Cost &RHS) const { return RHS < *this; }
15778
15779	bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
15780
15781	bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
15782	};
15783
15784	// The last instruction that represent the slice. This should be a
15785	// truncate instruction.
15786	SDNode *Inst;
15787
15788	// The original load instruction.
15789	LoadSDNode *Origin;
15790
15791	// The right shift amount in bits from the original load.
15792	unsigned Shift;
15793
15794	// The DAG from which Origin came from.
15795	// This is used to get some contextual information about legal types, etc.
15796	SelectionDAG *DAG;
15797
15798	LoadedSlice(SDNode Inst = nullptr, LoadSDNode Origin = nullptr,
15799	unsigned Shift = 0, SelectionDAG *DAG = nullptr)
15800	: Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
15801
15802	/// Get the bits used in a chunk of bits \p BitWidth large.
15803	/// \return Result is \p BitWidth and has used bits set to 1 and
15804	/// not used bits set to 0.
15805	APInt getUsedBits() const {
15806	// Reproduce the trunc(lshr) sequence:
15807	// - Start from the truncated value.
15808	// - Zero extend to the desired bit width.
15809	// - Shift left.
15810	assert(Origin && "No original load to compare against.")((Origin && "No original load to compare against.") ? static_cast<void> (0) : __assert_fail ("Origin && \"No original load to compare against.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 15810, __PRETTY_FUNCTION__));
15811	unsigned BitWidth = Origin->getValueSizeInBits(0);
15812	assert(Inst && "This slice is not bound to an instruction")((Inst && "This slice is not bound to an instruction" ) ? static_cast<void> (0) : __assert_fail ("Inst && \"This slice is not bound to an instruction\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 15812, __PRETTY_FUNCTION__));
15813	assert(Inst->getValueSizeInBits(0) <= BitWidth &&((Inst->getValueSizeInBits(0) <= BitWidth && "Extracted slice is bigger than the whole type!" ) ? static_cast<void> (0) : __assert_fail ("Inst->getValueSizeInBits(0) <= BitWidth && \"Extracted slice is bigger than the whole type!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 15814, __PRETTY_FUNCTION__))
15814	"Extracted slice is bigger than the whole type!")((Inst->getValueSizeInBits(0) <= BitWidth && "Extracted slice is bigger than the whole type!" ) ? static_cast<void> (0) : __assert_fail ("Inst->getValueSizeInBits(0) <= BitWidth && \"Extracted slice is bigger than the whole type!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 15814, __PRETTY_FUNCTION__));
15815	APInt UsedBits(Inst->getValueSizeInBits(0), 0);
15816	UsedBits.setAllBits();
15817	UsedBits = UsedBits.zext(BitWidth);
15818	UsedBits <<= Shift;
15819	return UsedBits;
15820	}
15821
15822	/// Get the size of the slice to be loaded in bytes.
15823	unsigned getLoadedSize() const {
15824	unsigned SliceSize = getUsedBits().countPopulation();
15825	assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.")((!(SliceSize & 0x7) && "Size is not a multiple of a byte." ) ? static_cast<void> (0) : __assert_fail ("!(SliceSize & 0x7) && \"Size is not a multiple of a byte.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 15825, __PRETTY_FUNCTION__));
15826	return SliceSize / 8;
15827	}
15828
15829	/// Get the type that will be loaded for this slice.
15830	/// Note: This may not be the final type for the slice.
15831	EVT getLoadedType() const {
15832	assert(DAG && "Missing context")((DAG && "Missing context") ? static_cast<void> (0) : __assert_fail ("DAG && \"Missing context\"", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 15832, __PRETTY_FUNCTION__));
15833	LLVMContext &Ctxt = *DAG->getContext();
15834	return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
15835	}
15836
15837	/// Get the alignment of the load used for this slice.
15838	Align getAlign() const {
15839	Align Alignment = Origin->getAlign();
15840	uint64_t Offset = getOffsetFromBase();
15841	if (Offset != 0)
15842	Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
15843	return Alignment;
15844	}
15845
15846	/// Check if this slice can be rewritten with legal operations.
15847	bool isLegal() const {
15848	// An invalid slice is not legal.
15849	if (!Origin \|\| !Inst \|\| !DAG)
15850	return false;
15851
15852	// Offsets are for indexed load only, we do not handle that.
15853	if (!Origin->getOffset().isUndef())
15854	return false;
15855
15856	const TargetLowering &TLI = DAG->getTargetLoweringInfo();
15857
15858	// Check that the type is legal.
15859	EVT SliceType = getLoadedType();
15860	if (!TLI.isTypeLegal(SliceType))
15861	return false;
15862
15863	// Check that the load is legal for this type.
15864	if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
15865	return false;
15866
15867	// Check that the offset can be computed.
15868	// 1. Check its type.
15869	EVT PtrType = Origin->getBasePtr().getValueType();
15870	if (PtrType == MVT::Untyped \|\| PtrType.isExtended())
15871	return false;
15872
15873	// 2. Check that it fits in the immediate.
15874	if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
15875	return false;
15876
15877	// 3. Check that the computation is legal.
15878	if (!TLI.isOperationLegal(ISD::ADD, PtrType))
15879	return false;
15880
15881	// Check that the zext is legal if it needs one.
15882	EVT TruncateType = Inst->getValueType(0);
15883	if (TruncateType != SliceType &&
15884	!TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
15885	return false;
15886
15887	return true;
15888	}
15889
15890	/// Get the offset in bytes of this slice in the original chunk of
15891	/// bits.
15892	/// \pre DAG != nullptr.
15893	uint64_t getOffsetFromBase() const {
15894	assert(DAG && "Missing context.")((DAG && "Missing context.") ? static_cast<void> (0) : __assert_fail ("DAG && \"Missing context.\"", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 15894, __PRETTY_FUNCTION__));
15895	bool IsBigEndian = DAG->getDataLayout().isBigEndian();
15896	assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.")((!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported." ) ? static_cast<void> (0) : __assert_fail ("!(Shift & 0x7) && \"Shifts not aligned on Bytes are not supported.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 15896, __PRETTY_FUNCTION__));
15897	uint64_t Offset = Shift / 8;
15898	unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
15899	assert(!(Origin->getValueSizeInBits(0) & 0x7) &&((!(Origin->getValueSizeInBits(0) & 0x7) && "The size of the original loaded type is not a multiple of a" " byte.") ? static_cast<void> (0) : __assert_fail ("!(Origin->getValueSizeInBits(0) & 0x7) && \"The size of the original loaded type is not a multiple of a\" \" byte.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 15901, __PRETTY_FUNCTION__))
15900	"The size of the original loaded type is not a multiple of a"((!(Origin->getValueSizeInBits(0) & 0x7) && "The size of the original loaded type is not a multiple of a" " byte.") ? static_cast<void> (0) : __assert_fail ("!(Origin->getValueSizeInBits(0) & 0x7) && \"The size of the original loaded type is not a multiple of a\" \" byte.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 15901, __PRETTY_FUNCTION__))
15901	" byte.")((!(Origin->getValueSizeInBits(0) & 0x7) && "The size of the original loaded type is not a multiple of a" " byte.") ? static_cast<void> (0) : __assert_fail ("!(Origin->getValueSizeInBits(0) & 0x7) && \"The size of the original loaded type is not a multiple of a\" \" byte.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 15901, __PRETTY_FUNCTION__));
15902	// If Offset is bigger than TySizeInBytes, it means we are loading all
15903	// zeros. This should have been optimized before in the process.
15904	assert(TySizeInBytes > Offset &&((TySizeInBytes > Offset && "Invalid shift amount for given loaded size" ) ? static_cast<void> (0) : __assert_fail ("TySizeInBytes > Offset && \"Invalid shift amount for given loaded size\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 15905, __PRETTY_FUNCTION__))
15905	"Invalid shift amount for given loaded size")((TySizeInBytes > Offset && "Invalid shift amount for given loaded size" ) ? static_cast<void> (0) : __assert_fail ("TySizeInBytes > Offset && \"Invalid shift amount for given loaded size\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 15905, __PRETTY_FUNCTION__));
15906	if (IsBigEndian)
15907	Offset = TySizeInBytes - Offset - getLoadedSize();
15908	return Offset;
15909	}
15910
15911	/// Generate the sequence of instructions to load the slice
15912	/// represented by this object and redirect the uses of this slice to
15913	/// this new sequence of instructions.
15914	/// \pre this->Inst && this->Origin are valid Instructions and this
15915	/// object passed the legal check: LoadedSlice::isLegal returned true.
15916	/// \return The last instruction of the sequence used to load the slice.
15917	SDValue loadSlice() const {
15918	assert(Inst && Origin && "Unable to replace a non-existing slice.")((Inst && Origin && "Unable to replace a non-existing slice." ) ? static_cast<void> (0) : __assert_fail ("Inst && Origin && \"Unable to replace a non-existing slice.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 15918, __PRETTY_FUNCTION__));
15919	const SDValue &OldBaseAddr = Origin->getBasePtr();
15920	SDValue BaseAddr = OldBaseAddr;
15921	// Get the offset in that chunk of bytes w.r.t. the endianness.
15922	int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
15923	assert(Offset >= 0 && "Offset too big to fit in int64_t!")((Offset >= 0 && "Offset too big to fit in int64_t!" ) ? static_cast<void> (0) : __assert_fail ("Offset >= 0 && \"Offset too big to fit in int64_t!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 15923, __PRETTY_FUNCTION__));
15924	if (Offset) {
15925	// BaseAddr = BaseAddr + Offset.
15926	EVT ArithType = BaseAddr.getValueType();
15927	SDLoc DL(Origin);
15928	BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
15929	DAG->getConstant(Offset, DL, ArithType));
15930	}
15931
15932	// Create the type of the loaded slice according to its size.
15933	EVT SliceType = getLoadedType();
15934
15935	// Create the load for the slice.
15936	SDValue LastInst =
15937	DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
15938	Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
15939	Origin->getMemOperand()->getFlags());
15940	// If the final type is not the same as the loaded type, this means that
15941	// we have to pad with zero. Create a zero extend for that.
15942	EVT FinalType = Inst->getValueType(0);
15943	if (SliceType != FinalType)
15944	LastInst =
15945	DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
15946	return LastInst;
15947	}
15948
15949	/// Check if this slice can be merged with an expensive cross register
15950	/// bank copy. E.g.,
15951	/// i = load i32
15952	/// f = bitcast i32 i to float
15953	bool canMergeExpensiveCrossRegisterBankCopy() const {
15954	if (!Inst \|\| !Inst->hasOneUse())
15955	return false;
15956	SDNode Use = Inst->use_begin();
15957	if (Use->getOpcode() != ISD::BITCAST)
15958	return false;
15959	assert(DAG && "Missing context")((DAG && "Missing context") ? static_cast<void> (0) : __assert_fail ("DAG && \"Missing context\"", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 15959, __PRETTY_FUNCTION__));
15960	const TargetLowering &TLI = DAG->getTargetLoweringInfo();
15961	EVT ResVT = Use->getValueType(0);
15962	const TargetRegisterClass *ResRC =
15963	TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
15964	const TargetRegisterClass *ArgRC =
15965	TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
15966	Use->getOperand(0)->isDivergent());
15967	if (ArgRC == ResRC \|\| !TLI.isOperationLegal(ISD::LOAD, ResVT))
15968	return false;
15969
15970	// At this point, we know that we perform a cross-register-bank copy.
15971	// Check if it is expensive.
15972	const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
15973	// Assume bitcasts are cheap, unless both register classes do not
15974	// explicitly share a common sub class.
15975	if (!TRI \|\| TRI->getCommonSubClass(ArgRC, ResRC))
15976	return false;
15977
15978	// Check if it will be merged with the load.
15979	// 1. Check the alignment constraint.
15980	Align RequiredAlignment = DAG->getDataLayout().getABITypeAlign(
15981	ResVT.getTypeForEVT(*DAG->getContext()));
15982
15983	if (RequiredAlignment > getAlign())
15984	return false;
15985
15986	// 2. Check that the load is a legal operation for that type.
15987	if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
15988	return false;
15989
15990	// 3. Check that we do not have a zext in the way.
15991	if (Inst->getValueType(0) != getLoadedType())
15992	return false;
15993
15994	return true;
15995	}
15996	};
15997
15998	} // end anonymous namespace
15999
16000	/// Check that all bits set in \p UsedBits form a dense region, i.e.,
16001	/// \p UsedBits looks like 0..0 1..1 0..0.
16002	static bool areUsedBitsDense(const APInt &UsedBits) {
16003	// If all the bits are one, this is dense!
16004	if (UsedBits.isAllOnesValue())
16005	return true;
16006
16007	// Get rid of the unused bits on the right.
16008	APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
16009	// Get rid of the unused bits on the left.
16010	if (NarrowedUsedBits.countLeadingZeros())
16011	NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
16012	// Check that the chunk of bits is completely used.
16013	return NarrowedUsedBits.isAllOnesValue();
16014	}
16015
16016	/// Check whether or not \p First and \p Second are next to each other
16017	/// in memory. This means that there is no hole between the bits loaded
16018	/// by \p First and the bits loaded by \p Second.
16019	static bool areSlicesNextToEachOther(const LoadedSlice &First,
16020	const LoadedSlice &Second) {
16021	assert(First.Origin == Second.Origin && First.Origin &&((First.Origin == Second.Origin && First.Origin && "Unable to match different memory origins.") ? static_cast< void> (0) : __assert_fail ("First.Origin == Second.Origin && First.Origin && \"Unable to match different memory origins.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 16022, __PRETTY_FUNCTION__))
16022	"Unable to match different memory origins.")((First.Origin == Second.Origin && First.Origin && "Unable to match different memory origins.") ? static_cast< void> (0) : __assert_fail ("First.Origin == Second.Origin && First.Origin && \"Unable to match different memory origins.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 16022, __PRETTY_FUNCTION__));
16023	APInt UsedBits = First.getUsedBits();
16024	assert((UsedBits & Second.getUsedBits()) == 0 &&(((UsedBits & Second.getUsedBits()) == 0 && "Slices are not supposed to overlap." ) ? static_cast<void> (0) : __assert_fail ("(UsedBits & Second.getUsedBits()) == 0 && \"Slices are not supposed to overlap.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 16025, __PRETTY_FUNCTION__))
16025	"Slices are not supposed to overlap.")(((UsedBits & Second.getUsedBits()) == 0 && "Slices are not supposed to overlap." ) ? static_cast<void> (0) : __assert_fail ("(UsedBits & Second.getUsedBits()) == 0 && \"Slices are not supposed to overlap.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 16025, __PRETTY_FUNCTION__));
16026	UsedBits \|= Second.getUsedBits();
16027	return areUsedBitsDense(UsedBits);
16028	}
16029
16030	/// Adjust the \p GlobalLSCost according to the target
16031	/// paring capabilities and the layout of the slices.
16032	/// \pre \p GlobalLSCost should account for at least as many loads as
16033	/// there is in the slices in \p LoadedSlices.
16034	static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
16035	LoadedSlice::Cost &GlobalLSCost) {
16036	unsigned NumberOfSlices = LoadedSlices.size();
16037	// If there is less than 2 elements, no pairing is possible.
16038	if (NumberOfSlices < 2)
16039	return;
16040
16041	// Sort the slices so that elements that are likely to be next to each
16042	// other in memory are next to each other in the list.
16043	llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
16044	assert(LHS.Origin == RHS.Origin && "Different bases not implemented.")((LHS.Origin == RHS.Origin && "Different bases not implemented." ) ? static_cast<void> (0) : __assert_fail ("LHS.Origin == RHS.Origin && \"Different bases not implemented.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 16044, __PRETTY_FUNCTION__));
16045	return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
16046	});
16047	const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
16048	// First (resp. Second) is the first (resp. Second) potentially candidate
16049	// to be placed in a paired load.
16050	const LoadedSlice *First = nullptr;
16051	const LoadedSlice *Second = nullptr;
16052	for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
16053	// Set the beginning of the pair.
16054	First = Second) {
16055	Second = &LoadedSlices[CurrSlice];
16056
16057	// If First is NULL, it means we start a new pair.
16058	// Get to the next slice.
16059	if (!First)
16060	continue;
16061
16062	EVT LoadedType = First->getLoadedType();
16063
16064	// If the types of the slices are different, we cannot pair them.
16065	if (LoadedType != Second->getLoadedType())
16066	continue;
16067
16068	// Check if the target supplies paired loads for this type.
16069	Align RequiredAlignment;
16070	if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
16071	// move to the next pair, this type is hopeless.
16072	Second = nullptr;
16073	continue;
16074	}
16075	// Check if we meet the alignment requirement.
16076	if (First->getAlign() < RequiredAlignment)
16077	continue;
16078
16079	// Check that both loads are next to each other in memory.
16080	if (!areSlicesNextToEachOther(First, Second))
16081	continue;
16082
16083	assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!")((GlobalLSCost.Loads > 0 && "We save more loads than we created!" ) ? static_cast<void> (0) : __assert_fail ("GlobalLSCost.Loads > 0 && \"We save more loads than we created!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 16083, __PRETTY_FUNCTION__));
16084	--GlobalLSCost.Loads;
16085	// Move to the next pair.
16086	Second = nullptr;
16087	}
16088	}
16089
16090	/// Check the profitability of all involved LoadedSlice.
16091	/// Currently, it is considered profitable if there is exactly two
16092	/// involved slices (1) which are (2) next to each other in memory, and
16093	/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
16094	///
16095	/// Note: The order of the elements in \p LoadedSlices may be modified, but not
16096	/// the elements themselves.
16097	///
16098	/// FIXME: When the cost model will be mature enough, we can relax
16099	/// constraints (1) and (2).
16100	static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
16101	const APInt &UsedBits, bool ForCodeSize) {
16102	unsigned NumberOfSlices = LoadedSlices.size();
16103	if (StressLoadSlicing)
16104	return NumberOfSlices > 1;
16105
16106	// Check (1).
16107	if (NumberOfSlices != 2)
16108	return false;
16109
16110	// Check (2).
16111	if (!areUsedBitsDense(UsedBits))
16112	return false;
16113
16114	// Check (3).
16115	LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
16116	// The original code has one big load.
16117	OrigCost.Loads = 1;
16118	for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
16119	const LoadedSlice &LS = LoadedSlices[CurrSlice];
16120	// Accumulate the cost of all the slices.
16121	LoadedSlice::Cost SliceCost(LS, ForCodeSize);
16122	GlobalSlicingCost += SliceCost;
16123
16124	// Account as cost in the original configuration the gain obtained
16125	// with the current slices.
16126	OrigCost.addSliceGain(LS);
16127	}
16128
16129	// If the target supports paired load, adjust the cost accordingly.
16130	adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
16131	return OrigCost > GlobalSlicingCost;
16132	}
16133
16134	/// If the given load, \p LI, is used only by trunc or trunc(lshr)
16135	/// operations, split it in the various pieces being extracted.
16136	///
16137	/// This sort of thing is introduced by SROA.
16138	/// This slicing takes care not to insert overlapping loads.
16139	/// \pre LI is a simple load (i.e., not an atomic or volatile load).
16140	bool DAGCombiner::SliceUpLoad(SDNode *N) {
16141	if (Level < AfterLegalizeDAG)
16142	return false;
16143
16144	LoadSDNode *LD = cast<LoadSDNode>(N);
16145	if (!LD->isSimple() \|\| !ISD::isNormalLoad(LD) \|\|
16146	!LD->getValueType(0).isInteger())
16147	return false;
16148
16149	// The algorithm to split up a load of a scalable vector into individual
16150	// elements currently requires knowing the length of the loaded type,
16151	// so will need adjusting to work on scalable vectors.
16152	if (LD->getValueType(0).isScalableVector())
16153	return false;
16154
16155	// Keep track of already used bits to detect overlapping values.
16156	// In that case, we will just abort the transformation.
16157	APInt UsedBits(LD->getValueSizeInBits(0), 0);
16158
16159	SmallVector<LoadedSlice, 4> LoadedSlices;
16160
16161	// Check if this load is used as several smaller chunks of bits.
16162	// Basically, look for uses in trunc or trunc(lshr) and record a new chain
16163	// of computation for each trunc.
16164	for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
16165	UI != UIEnd; ++UI) {
16166	// Skip the uses of the chain.
16167	if (UI.getUse().getResNo() != 0)
16168	continue;
16169
16170	SDNode User = UI;
16171	unsigned Shift = 0;
16172
16173	// Check if this is a trunc(lshr).
16174	if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
16175	isa<ConstantSDNode>(User->getOperand(1))) {
16176	Shift = User->getConstantOperandVal(1);
16177	User = *User->use_begin();
16178	}
16179
16180	// At this point, User is a Truncate, iff we encountered, trunc or
16181	// trunc(lshr).
16182	if (User->getOpcode() != ISD::TRUNCATE)
16183	return false;
16184
16185	// The width of the type must be a power of 2 and greater than 8-bits.
16186	// Otherwise the load cannot be represented in LLVM IR.
16187	// Moreover, if we shifted with a non-8-bits multiple, the slice
16188	// will be across several bytes. We do not support that.
16189	unsigned Width = User->getValueSizeInBits(0);
16190	if (Width < 8 \|\| !isPowerOf2_32(Width) \|\| (Shift & 0x7))
16191	return false;
16192
16193	// Build the slice for this chain of computations.
16194	LoadedSlice LS(User, LD, Shift, &DAG);
16195	APInt CurrentUsedBits = LS.getUsedBits();
16196
16197	// Check if this slice overlaps with another.
16198	if ((CurrentUsedBits & UsedBits) != 0)
16199	return false;
16200	// Update the bits used globally.
16201	UsedBits \|= CurrentUsedBits;
16202
16203	// Check if the new slice would be legal.
16204	if (!LS.isLegal())
16205	return false;
16206
16207	// Record the slice.
16208	LoadedSlices.push_back(LS);
16209	}
16210
16211	// Abort slicing if it does not seem to be profitable.
16212	if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
16213	return false;
16214
16215	++SlicedLoads;
16216
16217	// Rewrite each chain to use an independent load.
16218	// By construction, each chain can be represented by a unique load.
16219
16220	// Prepare the argument for the new token factor for all the slices.
16221	SmallVector<SDValue, 8> ArgChains;
16222	for (const LoadedSlice &LS : LoadedSlices) {
16223	SDValue SliceInst = LS.loadSlice();
16224	CombineTo(LS.Inst, SliceInst, true);
16225	if (SliceInst.getOpcode() != ISD::LOAD)
16226	SliceInst = SliceInst.getOperand(0);
16227	assert(SliceInst->getOpcode() == ISD::LOAD &&((SliceInst->getOpcode() == ISD::LOAD && "It takes more than a zext to get to the loaded slice!!" ) ? static_cast<void> (0) : __assert_fail ("SliceInst->getOpcode() == ISD::LOAD && \"It takes more than a zext to get to the loaded slice!!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 16228, __PRETTY_FUNCTION__))
16228	"It takes more than a zext to get to the loaded slice!!")((SliceInst->getOpcode() == ISD::LOAD && "It takes more than a zext to get to the loaded slice!!" ) ? static_cast<void> (0) : __assert_fail ("SliceInst->getOpcode() == ISD::LOAD && \"It takes more than a zext to get to the loaded slice!!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 16228, __PRETTY_FUNCTION__));
16229	ArgChains.push_back(SliceInst.getValue(1));
16230	}
16231
16232	SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
16233	ArgChains);
16234	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
16235	AddToWorklist(Chain.getNode());
16236	return true;
16237	}
16238
16239	/// Check to see if V is (and load (ptr), imm), where the load is having
16240	/// specific bytes cleared out. If so, return the byte size being masked out
16241	/// and the shift amount.
16242	static std::pair<unsigned, unsigned>
16243	CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
16244	std::pair<unsigned, unsigned> Result(0, 0);
16245
16246	// Check for the structure we're looking for.
16247	if (V->getOpcode() != ISD::AND \|\|
16248	!isa<ConstantSDNode>(V->getOperand(1)) \|\|
16249	!ISD::isNormalLoad(V->getOperand(0).getNode()))
16250	return Result;
16251
16252	// Check the chain and pointer.
16253	LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
16254	if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
16255
16256	// This only handles simple types.
16257	if (V.getValueType() != MVT::i16 &&
16258	V.getValueType() != MVT::i32 &&
16259	V.getValueType() != MVT::i64)
16260	return Result;
16261
16262	// Check the constant mask. Invert it so that the bits being masked out are
16263	// 0 and the bits being kept are 1. Use getSExtValue so that leading bits
16264	// follow the sign bit for uniformity.
16265	uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
16266	unsigned NotMaskLZ = countLeadingZeros(NotMask);
16267	if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
16268	unsigned NotMaskTZ = countTrailingZeros(NotMask);
16269	if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
16270	if (NotMaskLZ == 64) return Result; // All zero mask.
16271
16272	// See if we have a continuous run of bits. If so, we have 01+0
16273	if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
16274	return Result;
16275
16276	// Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
16277	if (V.getValueType() != MVT::i64 && NotMaskLZ)
16278	NotMaskLZ -= 64-V.getValueSizeInBits();
16279
16280	unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
16281	switch (MaskedBytes) {
16282	case 1:
16283	case 2:
16284	case 4: break;
16285	default: return Result; // All one mask, or 5-byte mask.
16286	}
16287
16288	// Verify that the first bit starts at a multiple of mask so that the access
16289	// is aligned the same as the access width.
16290	if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
16291
16292	// For narrowing to be valid, it must be the case that the load the
16293	// immediately preceding memory operation before the store.
16294	if (LD == Chain.getNode())
16295	; // ok.
16296	else if (Chain->getOpcode() == ISD::TokenFactor &&
16297	SDValue(LD, 1).hasOneUse()) {
16298	// LD has only 1 chain use so they are no indirect dependencies.
16299	if (!LD->isOperandOf(Chain.getNode()))
16300	return Result;
16301	} else
16302	return Result; // Fail.
16303
16304	Result.first = MaskedBytes;
16305	Result.second = NotMaskTZ/8;
16306	return Result;
16307	}
16308
16309	/// Check to see if IVal is something that provides a value as specified by
16310	/// MaskInfo. If so, replace the specified store with a narrower store of
16311	/// truncated IVal.
16312	static SDValue
16313	ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
16314	SDValue IVal, StoreSDNode *St,
16315	DAGCombiner *DC) {
16316	unsigned NumBytes = MaskInfo.first;
16317	unsigned ByteShift = MaskInfo.second;
16318	SelectionDAG &DAG = DC->getDAG();
16319
16320	// Check to see if IVal is all zeros in the part being masked in by the 'or'
16321	// that uses this. If not, this is not a replacement.
16322	APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
16323	ByteShift8, (ByteShift+NumBytes)8);
16324	if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
16325
16326	// Check that it is legal on the target to do this. It is legal if the new
16327	// VT we're shrinking to (i8/i16/i32) is legal or we're still before type
16328	// legalization (and the target doesn't explicitly think this is a bad idea).
16329	MVT VT = MVT::getIntegerVT(NumBytes * 8);
16330	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16331	if (!DC->isTypeLegal(VT))
16332	return SDValue();
16333	if (St->getMemOperand() &&
16334	!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
16335	*St->getMemOperand()))
16336	return SDValue();
16337
16338	// Okay, we can do this! Replace the 'St' store with a store of IVal that is
16339	// shifted by ByteShift and truncated down to NumBytes.
16340	if (ByteShift) {
16341	SDLoc DL(IVal);
16342	IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
16343	DAG.getConstant(ByteShift*8, DL,
16344	DC->getShiftAmountTy(IVal.getValueType())));
16345	}
16346
16347	// Figure out the offset for the store and the alignment of the access.
16348	unsigned StOffset;
16349	if (DAG.getDataLayout().isLittleEndian())
16350	StOffset = ByteShift;
16351	else
16352	StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
16353
16354	SDValue Ptr = St->getBasePtr();
16355	if (StOffset) {
16356	SDLoc DL(IVal);
16357	Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL);
16358	}
16359
16360	// Truncate down to the new size.
16361	IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
16362
16363	++OpsNarrowed;
16364	return DAG
16365	.getStore(St->getChain(), SDLoc(St), IVal, Ptr,
16366	St->getPointerInfo().getWithOffset(StOffset),
16367	St->getOriginalAlign());
16368	}
16369
16370	/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
16371	/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
16372	/// narrowing the load and store if it would end up being a win for performance
16373	/// or code size.
16374	SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
16375	StoreSDNode *ST = cast<StoreSDNode>(N);
16376	if (!ST->isSimple())
16377	return SDValue();
16378
16379	SDValue Chain = ST->getChain();
16380	SDValue Value = ST->getValue();
16381	SDValue Ptr = ST->getBasePtr();
16382	EVT VT = Value.getValueType();
16383
16384	if (ST->isTruncatingStore() \|\| VT.isVector() \|\| !Value.hasOneUse())
16385	return SDValue();
16386
16387	unsigned Opc = Value.getOpcode();
16388
16389	// If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
16390	// is a byte mask indicating a consecutive number of bytes, check to see if
16391	// Y is known to provide just those bytes. If so, we try to replace the
16392	// load + replace + store sequence with a single (narrower) store, which makes
16393	// the load dead.
16394	if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
16395	std::pair<unsigned, unsigned> MaskedLoad;
16396	MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
16397	if (MaskedLoad.first)
16398	if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
16399	Value.getOperand(1), ST,this))
16400	return NewST;
16401
16402	// Or is commutative, so try swapping X and Y.
16403	MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
16404	if (MaskedLoad.first)
16405	if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
16406	Value.getOperand(0), ST,this))
16407	return NewST;
16408	}
16409
16410	if (!EnableReduceLoadOpStoreWidth)
16411	return SDValue();
16412
16413	if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) \|\|
16414	Value.getOperand(1).getOpcode() != ISD::Constant)
16415	return SDValue();
16416
16417	SDValue N0 = Value.getOperand(0);
16418	if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
16419	Chain == SDValue(N0.getNode(), 1)) {
16420	LoadSDNode *LD = cast<LoadSDNode>(N0);
16421	if (LD->getBasePtr() != Ptr \|\|
16422	LD->getPointerInfo().getAddrSpace() !=
16423	ST->getPointerInfo().getAddrSpace())
16424	return SDValue();
16425
16426	// Find the type to narrow it the load / op / store to.
16427	SDValue N1 = Value.getOperand(1);
16428	unsigned BitWidth = N1.getValueSizeInBits();
16429	APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
16430	if (Opc == ISD::AND)
16431	Imm ^= APInt::getAllOnesValue(BitWidth);
16432	if (Imm == 0 \|\| Imm.isAllOnesValue())
16433	return SDValue();
16434	unsigned ShAmt = Imm.countTrailingZeros();
16435	unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
16436	unsigned NewBW = NextPowerOf2(MSB - ShAmt);
16437	EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
16438	// The narrowing should be profitable, the load/store operation should be
16439	// legal (or custom) and the store size should be equal to the NewVT width.
16440	while (NewBW < BitWidth &&
16441	(NewVT.getStoreSizeInBits() != NewBW \|\|
16442	!TLI.isOperationLegalOrCustom(Opc, NewVT) \|\|
16443	!TLI.isNarrowingProfitable(VT, NewVT))) {
16444	NewBW = NextPowerOf2(NewBW);
16445	NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
16446	}
16447	if (NewBW >= BitWidth)
16448	return SDValue();
16449
16450	// If the lsb changed does not start at the type bitwidth boundary,
16451	// start at the previous one.
16452	if (ShAmt % NewBW)
16453	ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
16454	APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
16455	std::min(BitWidth, ShAmt + NewBW));
16456	if ((Imm & Mask) == Imm) {
16457	APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
16458	if (Opc == ISD::AND)
16459	NewImm ^= APInt::getAllOnesValue(NewBW);
16460	uint64_t PtrOff = ShAmt / 8;
16461	// For big endian targets, we need to adjust the offset to the pointer to
16462	// load the correct bytes.
16463	if (DAG.getDataLayout().isBigEndian())
16464	PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
16465
16466	Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
16467	Type NewVTTy = NewVT.getTypeForEVT(DAG.getContext());
16468	if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy))
16469	return SDValue();
16470
16471	SDValue NewPtr =
16472	DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(PtrOff), SDLoc(LD));
16473	SDValue NewLD =
16474	DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
16475	LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
16476	LD->getMemOperand()->getFlags(), LD->getAAInfo());
16477	SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
16478	DAG.getConstant(NewImm, SDLoc(Value),
16479	NewVT));
16480	SDValue NewST =
16481	DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
16482	ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
16483
16484	AddToWorklist(NewPtr.getNode());
16485	AddToWorklist(NewLD.getNode());
16486	AddToWorklist(NewVal.getNode());
16487	WorklistRemover DeadNodes(*this);
16488	DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
16489	++OpsNarrowed;
16490	return NewST;
16491	}
16492	}
16493
16494	return SDValue();
16495	}
16496
16497	/// For a given floating point load / store pair, if the load value isn't used
16498	/// by any other operations, then consider transforming the pair to integer
16499	/// load / store operations if the target deems the transformation profitable.
16500	SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
16501	StoreSDNode *ST = cast<StoreSDNode>(N);
16502	SDValue Value = ST->getValue();
16503	if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
16504	Value.hasOneUse()) {
16505	LoadSDNode *LD = cast<LoadSDNode>(Value);
16506	EVT VT = LD->getMemoryVT();
16507	if (!VT.isFloatingPoint() \|\|
16508	VT != ST->getMemoryVT() \|\|
16509	LD->isNonTemporal() \|\|
16510	ST->isNonTemporal() \|\|
16511	LD->getPointerInfo().getAddrSpace() != 0 \|\|
16512	ST->getPointerInfo().getAddrSpace() != 0)
16513	return SDValue();
16514
16515	TypeSize VTSize = VT.getSizeInBits();
16516
16517	// We don't know the size of scalable types at compile time so we cannot
16518	// create an integer of the equivalent size.
16519	if (VTSize.isScalable())
16520	return SDValue();
16521
16522	EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
16523	if (!TLI.isOperationLegal(ISD::LOAD, IntVT) \|\|
16524	!TLI.isOperationLegal(ISD::STORE, IntVT) \|\|
16525	!TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) \|\|
16526	!TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
16527	return SDValue();
16528
16529	Align LDAlign = LD->getAlign();
16530	Align STAlign = ST->getAlign();
16531	Type IntVTTy = IntVT.getTypeForEVT(DAG.getContext());
16532	Align ABIAlign = DAG.getDataLayout().getABITypeAlign(IntVTTy);
16533	if (LDAlign < ABIAlign \|\| STAlign < ABIAlign)
16534	return SDValue();
16535
16536	SDValue NewLD =
16537	DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
16538	LD->getPointerInfo(), LDAlign);
16539
16540	SDValue NewST =
16541	DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
16542	ST->getPointerInfo(), STAlign);
16543
16544	AddToWorklist(NewLD.getNode());
16545	AddToWorklist(NewST.getNode());
16546	WorklistRemover DeadNodes(*this);
16547	DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
16548	++LdStFP2Int;
16549	return NewST;
16550	}
16551
16552	return SDValue();
16553	}
16554
16555	// This is a helper function for visitMUL to check the profitability
16556	// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
16557	// MulNode is the original multiply, AddNode is (add x, c1),
16558	// and ConstNode is c2.
16559	//
16560	// If the (add x, c1) has multiple uses, we could increase
16561	// the number of adds if we make this transformation.
16562	// It would only be worth doing this if we can remove a
16563	// multiply in the process. Check for that here.
16564	// To illustrate:
16565	// (A + c1) * c3
16566	// (A + c2) * c3
16567	// We're checking for cases where we have common "c3 * A" expressions.
16568	bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
16569	SDValue &AddNode,
16570	SDValue &ConstNode) {
16571	APInt Val;
16572
16573	// If the add only has one use, this would be OK to do.
16574	if (AddNode.getNode()->hasOneUse())
16575	return true;
16576
16577	// Walk all the users of the constant with which we're multiplying.
16578	for (SDNode *Use : ConstNode->uses()) {
16579	if (Use == MulNode) // This use is the one we're on right now. Skip it.
16580	continue;
16581
16582	if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
16583	SDNode *OtherOp;
16584	SDNode *MulVar = AddNode.getOperand(0).getNode();
16585
16586	// OtherOp is what we're multiplying against the constant.
16587	if (Use->getOperand(0) == ConstNode)
16588	OtherOp = Use->getOperand(1).getNode();
16589	else
16590	OtherOp = Use->getOperand(0).getNode();
16591
16592	// Check to see if multiply is with the same operand of our "add".
16593	//
16594	// ConstNode = CONST
16595	// Use = ConstNode * A <-- visiting Use. OtherOp is A.
16596	// ...
16597	// AddNode = (A + c1) <-- MulVar is A.
16598	// = AddNode * ConstNode <-- current visiting instruction.
16599	//
16600	// If we make this transformation, we will have a common
16601	// multiply (ConstNode * A) that we can save.
16602	if (OtherOp == MulVar)
16603	return true;
16604
16605	// Now check to see if a future expansion will give us a common
16606	// multiply.
16607	//
16608	// ConstNode = CONST
16609	// AddNode = (A + c1)
16610	// ... = AddNode * ConstNode <-- current visiting instruction.
16611	// ...
16612	// OtherOp = (A + c2)
16613	// Use = OtherOp * ConstNode <-- visiting Use.
16614	//
16615	// If we make this transformation, we will have a common
16616	// multiply (CONST * A) after we also do the same transformation
16617	// to the "t2" instruction.
16618	if (OtherOp->getOpcode() == ISD::ADD &&
16619	DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
16620	OtherOp->getOperand(0).getNode() == MulVar)
16621	return true;
16622	}
16623	}
16624
16625	// Didn't find a case where this would be profitable.
16626	return false;
16627	}
16628
16629	SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
16630	unsigned NumStores) {
16631	SmallVector<SDValue, 8> Chains;
16632	SmallPtrSet<const SDNode *, 8> Visited;
16633	SDLoc StoreDL(StoreNodes[0].MemNode);
16634
16635	for (unsigned i = 0; i < NumStores; ++i) {
16636	Visited.insert(StoreNodes[i].MemNode);
16637	}
16638
16639	// don't include nodes that are children or repeated nodes.
16640	for (unsigned i = 0; i < NumStores; ++i) {
16641	if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
16642	Chains.push_back(StoreNodes[i].MemNode->getChain());
16643	}
16644
16645	assert(Chains.size() > 0 && "Chain should have generated a chain")((Chains.size() > 0 && "Chain should have generated a chain" ) ? static_cast<void> (0) : __assert_fail ("Chains.size() > 0 && \"Chain should have generated a chain\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 16645, __PRETTY_FUNCTION__));
16646	return DAG.getTokenFactor(StoreDL, Chains);
16647	}
16648
16649	bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
16650	SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
16651	bool IsConstantSrc, bool UseVector, bool UseTrunc) {
16652	// Make sure we have something to merge.
16653	if (NumStores < 2)
16654	return false;
16655
16656	// The latest Node in the DAG.
16657	SDLoc DL(StoreNodes[0].MemNode);
16658
16659	TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
16660	unsigned SizeInBits = NumStores * ElementSizeBits;
16661	unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
16662
16663	EVT StoreTy;
16664	if (UseVector) {
16665	unsigned Elts = NumStores * NumMemElts;
16666	// Get the type for the merged vector store.
16667	StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
16668	} else
16669	StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
16670
16671	SDValue StoredVal;
16672	if (UseVector) {
16673	if (IsConstantSrc) {
16674	SmallVector<SDValue, 8> BuildVector;
16675	for (unsigned I = 0; I != NumStores; ++I) {
16676	StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
16677	SDValue Val = St->getValue();
16678	// If constant is of the wrong type, convert it now.
16679	if (MemVT != Val.getValueType()) {
16680	Val = peekThroughBitcasts(Val);
16681	// Deal with constants of wrong size.
16682	if (ElementSizeBits != Val.getValueSizeInBits()) {
16683	EVT IntMemVT =
16684	EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
16685	if (isa<ConstantFPSDNode>(Val)) {
16686	// Not clear how to truncate FP values.
16687	return false;
16688	} else if (auto *C = dyn_cast<ConstantSDNode>(Val))
16689	Val = DAG.getConstant(C->getAPIntValue()
16690	.zextOrTrunc(Val.getValueSizeInBits())
16691	.zextOrTrunc(ElementSizeBits),
16692	SDLoc(C), IntMemVT);
16693	}
16694	// Make sure correctly size type is the correct type.
16695	Val = DAG.getBitcast(MemVT, Val);
16696	}
16697	BuildVector.push_back(Val);
16698	}
16699	StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
16700	: ISD::BUILD_VECTOR,
16701	DL, StoreTy, BuildVector);
16702	} else {
16703	SmallVector<SDValue, 8> Ops;
16704	for (unsigned i = 0; i < NumStores; ++i) {
16705	StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
16706	SDValue Val = peekThroughBitcasts(St->getValue());
16707	// All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
16708	// type MemVT. If the underlying value is not the correct
16709	// type, but it is an extraction of an appropriate vector we
16710	// can recast Val to be of the correct type. This may require
16711	// converting between EXTRACT_VECTOR_ELT and
16712	// EXTRACT_SUBVECTOR.
16713	if ((MemVT != Val.getValueType()) &&
16714	(Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT \|\|
16715	Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
16716	EVT MemVTScalarTy = MemVT.getScalarType();
16717	// We may need to add a bitcast here to get types to line up.
16718	if (MemVTScalarTy != Val.getValueType().getScalarType()) {
16719	Val = DAG.getBitcast(MemVT, Val);
16720	} else {
16721	unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
16722	: ISD::EXTRACT_VECTOR_ELT;
16723	SDValue Vec = Val.getOperand(0);
16724	SDValue Idx = Val.getOperand(1);
16725	Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
16726	}
16727	}
16728	Ops.push_back(Val);
16729	}
16730
16731	// Build the extracted vector elements back into a vector.
16732	StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
16733	: ISD::BUILD_VECTOR,
16734	DL, StoreTy, Ops);
16735	}
16736	} else {
16737	// We should always use a vector store when merging extracted vector
16738	// elements, so this path implies a store of constants.
16739	assert(IsConstantSrc && "Merged vector elements should use vector store")((IsConstantSrc && "Merged vector elements should use vector store" ) ? static_cast<void> (0) : __assert_fail ("IsConstantSrc && \"Merged vector elements should use vector store\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 16739, __PRETTY_FUNCTION__));
16740
16741	APInt StoreInt(SizeInBits, 0);
16742
16743	// Construct a single integer constant which is made of the smaller
16744	// constant inputs.
16745	bool IsLE = DAG.getDataLayout().isLittleEndian();
16746	for (unsigned i = 0; i < NumStores; ++i) {
16747	unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
16748	StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
16749
16750	SDValue Val = St->getValue();
16751	Val = peekThroughBitcasts(Val);
16752	StoreInt <<= ElementSizeBits;
16753	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
16754	StoreInt \|= C->getAPIntValue()
16755	.zextOrTrunc(ElementSizeBits)
16756	.zextOrTrunc(SizeInBits);
16757	} else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
16758	StoreInt \|= C->getValueAPF()
16759	.bitcastToAPInt()
16760	.zextOrTrunc(ElementSizeBits)
16761	.zextOrTrunc(SizeInBits);
16762	// If fp truncation is necessary give up for now.
16763	if (MemVT.getSizeInBits() != ElementSizeBits)
16764	return false;
16765	} else {
16766	llvm_unreachable("Invalid constant element type")::llvm::llvm_unreachable_internal("Invalid constant element type" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 16766);
16767	}
16768	}
16769
16770	// Create the new Load and Store operations.
16771	StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
16772	}
16773
16774	LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
16775	SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
16776
16777	// make sure we use trunc store if it's necessary to be legal.
16778	SDValue NewStore;
16779	if (!UseTrunc) {
16780	NewStore =
16781	DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
16782	FirstInChain->getPointerInfo(), FirstInChain->getAlign());
16783	} else { // Must be realized as a trunc store
16784	EVT LegalizedStoredValTy =
16785	TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
16786	unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
16787	ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
16788	SDValue ExtendedStoreVal =
16789	DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
16790	LegalizedStoredValTy);
16791	NewStore = DAG.getTruncStore(
16792	NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
16793	FirstInChain->getPointerInfo(), StoredVal.getValueType() /TVT/,
16794	FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
16795	}
16796
16797	// Replace all merged stores with the new store.
16798	for (unsigned i = 0; i < NumStores; ++i)
16799	CombineTo(StoreNodes[i].MemNode, NewStore);
16800
16801	AddToWorklist(NewChain.getNode());
16802	return true;
16803	}
16804
16805	void DAGCombiner::getStoreMergeCandidates(
16806	StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
16807	SDNode *&RootNode) {
16808	// This holds the base pointer, index, and the offset in bytes from the base
16809	// pointer. We must have a base and an offset. Do not handle stores to undef
16810	// base pointers.
16811	BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
16812	if (!BasePtr.getBase().getNode() \|\| BasePtr.getBase().isUndef())
16813	return;
16814
16815	SDValue Val = peekThroughBitcasts(St->getValue());
16816	StoreSource StoreSrc = getStoreSource(Val);
16817	assert(StoreSrc != StoreSource::Unknown && "Expected known source for store")((StoreSrc != StoreSource::Unknown && "Expected known source for store" ) ? static_cast<void> (0) : __assert_fail ("StoreSrc != StoreSource::Unknown && \"Expected known source for store\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 16817, __PRETTY_FUNCTION__));
16818
16819	// Match on loadbaseptr if relevant.
16820	EVT MemVT = St->getMemoryVT();
16821	BaseIndexOffset LBasePtr;
16822	EVT LoadVT;
16823	if (StoreSrc == StoreSource::Load) {
16824	auto *Ld = cast<LoadSDNode>(Val);
16825	LBasePtr = BaseIndexOffset::match(Ld, DAG);
16826	LoadVT = Ld->getMemoryVT();
16827	// Load and store should be the same type.
16828	if (MemVT != LoadVT)
16829	return;
16830	// Loads must only have one use.
16831	if (!Ld->hasNUsesOfValue(1, 0))
16832	return;
16833	// The memory operands must not be volatile/indexed/atomic.
16834	// TODO: May be able to relax for unordered atomics (see D66309)
16835	if (!Ld->isSimple() \|\| Ld->isIndexed())
16836	return;
16837	}
16838	auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
16839	int64_t &Offset) -> bool {
16840	// The memory operands must not be volatile/indexed/atomic.
16841	// TODO: May be able to relax for unordered atomics (see D66309)
16842	if (!Other->isSimple() \|\| Other->isIndexed())
16843	return false;
16844	// Don't mix temporal stores with non-temporal stores.
16845	if (St->isNonTemporal() != Other->isNonTemporal())
16846	return false;
16847	SDValue OtherBC = peekThroughBitcasts(Other->getValue());
16848	// Allow merging constants of different types as integers.
16849	bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
16850	: Other->getMemoryVT() != MemVT;
16851	switch (StoreSrc) {
16852	case StoreSource::Load: {
16853	if (NoTypeMatch)
16854	return false;
16855	// The Load's Base Ptr must also match.
16856	auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
16857	if (!OtherLd)
16858	return false;
16859	BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
16860	if (LoadVT != OtherLd->getMemoryVT())
16861	return false;
16862	// Loads must only have one use.
16863	if (!OtherLd->hasNUsesOfValue(1, 0))
16864	return false;
16865	// The memory operands must not be volatile/indexed/atomic.
16866	// TODO: May be able to relax for unordered atomics (see D66309)
16867	if (!OtherLd->isSimple() \|\| OtherLd->isIndexed())
16868	return false;
16869	// Don't mix temporal loads with non-temporal loads.
16870	if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
16871	return false;
16872	if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
16873	return false;
16874	break;
16875	}
16876	case StoreSource::Constant:
16877	if (NoTypeMatch)
16878	return false;
16879	if (!(isa<ConstantSDNode>(OtherBC) \|\| isa<ConstantFPSDNode>(OtherBC)))
16880	return false;
16881	break;
16882	case StoreSource::Extract:
16883	// Do not merge truncated stores here.
16884	if (Other->isTruncatingStore())
16885	return false;
16886	if (!MemVT.bitsEq(OtherBC.getValueType()))
16887	return false;
16888	if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
16889	OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
16890	return false;
16891	break;
16892	default:
16893	llvm_unreachable("Unhandled store source for merging")::llvm::llvm_unreachable_internal("Unhandled store source for merging" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 16893);
16894	}
16895	Ptr = BaseIndexOffset::match(Other, DAG);
16896	return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
16897	};
16898
16899	// Check if the pair of StoreNode and the RootNode already bail out many
16900	// times which is over the limit in dependence check.
16901	auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
16902	SDNode *RootNode) -> bool {
16903	auto RootCount = StoreRootCountMap.find(StoreNode);
16904	return RootCount != StoreRootCountMap.end() &&
16905	RootCount->second.first == RootNode &&
16906	RootCount->second.second > StoreMergeDependenceLimit;
16907	};
16908
16909	auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
16910	// This must be a chain use.
16911	if (UseIter.getOperandNo() != 0)
16912	return;
16913	if (auto OtherStore = dyn_cast<StoreSDNode>(UseIter)) {
16914	BaseIndexOffset Ptr;
16915	int64_t PtrDiff;
16916	if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
16917	!OverLimitInDependenceCheck(OtherStore, RootNode))
16918	StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
16919	}
16920	};
16921
16922	// We looking for a root node which is an ancestor to all mergable
16923	// stores. We search up through a load, to our root and then down
16924	// through all children. For instance we will find Store{1,2,3} if
16925	// St is Store1, Store2. or Store3 where the root is not a load
16926	// which always true for nonvolatile ops. TODO: Expand
16927	// the search to find all valid candidates through multiple layers of loads.
16928	//
16929	// Root
16930	// \|-------\|-------\|
16931	// Load Load Store3
16932	// \| \|
16933	// Store1 Store2
16934	//
16935	// FIXME: We should be able to climb and
16936	// descend TokenFactors to find candidates as well.
16937
16938	RootNode = St->getChain().getNode();
16939
16940	unsigned NumNodesExplored = 0;
16941	const unsigned MaxSearchNodes = 1024;
16942	if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
16943	RootNode = Ldn->getChain().getNode();
16944	for (auto I = RootNode->use_begin(), E = RootNode->use_end();
16945	I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
16946	if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
16947	for (auto I2 = (I)->use_begin(), E2 = (I)->use_end(); I2 != E2; ++I2)
16948	TryToAddCandidate(I2);
16949	}
16950	}
16951	} else {
16952	for (auto I = RootNode->use_begin(), E = RootNode->use_end();
16953	I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
16954	TryToAddCandidate(I);
16955	}
16956	}
16957
16958	// We need to check that merging these stores does not cause a loop in
16959	// the DAG. Any store candidate may depend on another candidate
16960	// indirectly through its operand (we already consider dependencies
16961	// through the chain). Check in parallel by searching up from
16962	// non-chain operands of candidates.
16963	bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
16964	SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
16965	SDNode *RootNode) {
16966	// FIXME: We should be able to truncate a full search of
16967	// predecessors by doing a BFS and keeping tabs the originating
16968	// stores from which worklist nodes come from in a similar way to
16969	// TokenFactor simplfication.
16970
16971	SmallPtrSet<const SDNode *, 32> Visited;
16972	SmallVector<const SDNode *, 8> Worklist;
16973
16974	// RootNode is a predecessor to all candidates so we need not search
16975	// past it. Add RootNode (peeking through TokenFactors). Do not count
16976	// these towards size check.
16977
16978	Worklist.push_back(RootNode);
16979	while (!Worklist.empty()) {
16980	auto N = Worklist.pop_back_val();
16981	if (!Visited.insert(N).second)
16982	continue; // Already present in Visited.
16983	if (N->getOpcode() == ISD::TokenFactor) {
16984	for (SDValue Op : N->ops())
16985	Worklist.push_back(Op.getNode());
16986	}
16987	}
16988
16989	// Don't count pruning nodes towards max.
16990	unsigned int Max = 1024 + Visited.size();
16991	// Search Ops of store candidates.
16992	for (unsigned i = 0; i < NumStores; ++i) {
16993	SDNode *N = StoreNodes[i].MemNode;
16994	// Of the 4 Store Operands:
16995	// * Chain (Op 0) -> We have already considered these
16996	// in candidate selection and can be
16997	// safely ignored
16998	// * Value (Op 1) -> Cycles may happen (e.g. through load chains)
16999	// * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
17000	// but aren't necessarily fromt the same base node, so
17001	// cycles possible (e.g. via indexed store).
17002	// * (Op 3) -> Represents the pre or post-indexing offset (or undef for
17003	// non-indexed stores). Not constant on all targets (e.g. ARM)
17004	// and so can participate in a cycle.
17005	for (unsigned j = 1; j < N->getNumOperands(); ++j)
17006	Worklist.push_back(N->getOperand(j).getNode());
17007	}
17008	// Search through DAG. We can stop early if we find a store node.
17009	for (unsigned i = 0; i < NumStores; ++i)
17010	if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
17011	Max)) {
17012	// If the searching bail out, record the StoreNode and RootNode in the
17013	// StoreRootCountMap. If we have seen the pair many times over a limit,
17014	// we won't add the StoreNode into StoreNodes set again.
17015	if (Visited.size() >= Max) {
17016	auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
17017	if (RootCount.first == RootNode)
17018	RootCount.second++;
17019	else
17020	RootCount = {RootNode, 1};
17021	}
17022	return false;
17023	}
17024	return true;
17025	}
17026
17027	unsigned
17028	DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
17029	int64_t ElementSizeBytes) const {
17030	while (true) {
17031	// Find a store past the width of the first store.
17032	size_t StartIdx = 0;
17033	while ((StartIdx + 1 < StoreNodes.size()) &&
17034	StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
17035	StoreNodes[StartIdx + 1].OffsetFromBase)
17036	++StartIdx;
17037
17038	// Bail if we don't have enough candidates to merge.
17039	if (StartIdx + 1 >= StoreNodes.size())
17040	return 0;
17041
17042	// Trim stores that overlapped with the first store.
17043	if (StartIdx)
17044	StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
17045
17046	// Scan the memory operations on the chain and find the first
17047	// non-consecutive store memory address.
17048	unsigned NumConsecutiveStores = 1;
17049	int64_t StartAddress = StoreNodes[0].OffsetFromBase;
17050	// Check that the addresses are consecutive starting from the second
17051	// element in the list of stores.
17052	for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
17053	int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
17054	if (CurrAddress - StartAddress != (ElementSizeBytes * i))
17055	break;
17056	NumConsecutiveStores = i + 1;
17057	}
17058	if (NumConsecutiveStores > 1)
17059	return NumConsecutiveStores;
17060
17061	// There are no consecutive stores at the start of the list.
17062	// Remove the first store and try again.
17063	StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
17064	}
17065	}
17066
17067	bool DAGCombiner::tryStoreMergeOfConstants(
17068	SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
17069	EVT MemVT, SDNode *RootNode, bool AllowVectors) {
17070	LLVMContext &Context = *DAG.getContext();
17071	const DataLayout &DL = DAG.getDataLayout();
17072	int64_t ElementSizeBytes = MemVT.getStoreSize();
17073	unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17074	bool MadeChange = false;
17075
17076	// Store the constants into memory as one consecutive store.
17077	while (NumConsecutiveStores >= 2) {
17078	LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17079	unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17080	unsigned FirstStoreAlign = FirstInChain->getAlignment();
17081	unsigned LastLegalType = 1;
17082	unsigned LastLegalVectorType = 1;
17083	bool LastIntegerTrunc = false;
17084	bool NonZero = false;
17085	unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
17086	for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17087	StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
17088	SDValue StoredVal = ST->getValue();
17089	bool IsElementZero = false;
17090	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
17091	IsElementZero = C->isNullValue();
17092	else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
17093	IsElementZero = C->getConstantFPValue()->isNullValue();
17094	if (IsElementZero) {
17095	if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
17096	FirstZeroAfterNonZero = i;
17097	}
17098	NonZero \|= !IsElementZero;
17099
17100	// Find a legal type for the constant store.
17101	unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
17102	EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
17103	bool IsFast = false;
17104
17105	// Break early when size is too large to be legal.
17106	if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
17107	break;
17108
17109	if (TLI.isTypeLegal(StoreTy) &&
17110	TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
17111	TLI.allowsMemoryAccess(Context, DL, StoreTy,
17112	*FirstInChain->getMemOperand(), &IsFast) &&
17113	IsFast) {
17114	LastIntegerTrunc = false;
17115	LastLegalType = i + 1;
17116	// Or check whether a truncstore is legal.
17117	} else if (TLI.getTypeAction(Context, StoreTy) ==
17118	TargetLowering::TypePromoteInteger) {
17119	EVT LegalizedStoredValTy =
17120	TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
17121	if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
17122	TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
17123	TLI.allowsMemoryAccess(Context, DL, StoreTy,
17124	*FirstInChain->getMemOperand(), &IsFast) &&
17125	IsFast) {
17126	LastIntegerTrunc = true;
17127	LastLegalType = i + 1;
17128	}
17129	}
17130
17131	// We only use vectors if the constant is known to be zero or the
17132	// target allows it and the function is not marked with the
17133	// noimplicitfloat attribute.
17134	if ((!NonZero \|\|
17135	TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
17136	AllowVectors) {
17137	// Find a legal type for the vector store.
17138	unsigned Elts = (i + 1) * NumMemElts;
17139	EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17140	if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
17141	TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
17142	TLI.allowsMemoryAccess(Context, DL, Ty,
17143	*FirstInChain->getMemOperand(), &IsFast) &&
17144	IsFast)
17145	LastLegalVectorType = i + 1;
17146	}
17147	}
17148
17149	bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
17150	unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
17151
17152	// Check if we found a legal integer type that creates a meaningful
17153	// merge.
17154	if (NumElem < 2) {
17155	// We know that candidate stores are in order and of correct
17156	// shape. While there is no mergeable sequence from the
17157	// beginning one may start later in the sequence. The only
17158	// reason a merge of size N could have failed where another of
17159	// the same size would not have, is if the alignment has
17160	// improved or we've dropped a non-zero value. Drop as many
17161	// candidates as we can here.
17162	unsigned NumSkip = 1;
17163	while ((NumSkip < NumConsecutiveStores) &&
17164	(NumSkip < FirstZeroAfterNonZero) &&
17165	(StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
17166	NumSkip++;
17167
17168	StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17169	NumConsecutiveStores -= NumSkip;
17170	continue;
17171	}
17172
17173	// Check that we can merge these candidates without causing a cycle.
17174	if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
17175	RootNode)) {
17176	StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17177	NumConsecutiveStores -= NumElem;
17178	continue;
17179	}
17180
17181	MadeChange \|= mergeStoresOfConstantsOrVecElts(
17182	StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
17183
17184	// Remove merged stores for next iteration.
17185	StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17186	NumConsecutiveStores -= NumElem;
17187	}
17188	return MadeChange;
17189	}
17190
17191	bool DAGCombiner::tryStoreMergeOfExtracts(
17192	SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
17193	EVT MemVT, SDNode *RootNode) {
17194	LLVMContext &Context = *DAG.getContext();
17195	const DataLayout &DL = DAG.getDataLayout();
17196	unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17197	bool MadeChange = false;
17198
17199	// Loop on Consecutive Stores on success.
17200	while (NumConsecutiveStores >= 2) {
17201	LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17202	unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17203	unsigned FirstStoreAlign = FirstInChain->getAlignment();
17204	unsigned NumStoresToMerge = 1;
17205	for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17206	// Find a legal type for the vector store.
17207	unsigned Elts = (i + 1) * NumMemElts;
17208	EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
17209	bool IsFast = false;
17210
17211	// Break early when size is too large to be legal.
17212	if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
17213	break;
17214
17215	if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
17216	TLI.allowsMemoryAccess(Context, DL, Ty,
17217	*FirstInChain->getMemOperand(), &IsFast) &&
17218	IsFast)
17219	NumStoresToMerge = i + 1;
17220	}
17221
17222	// Check if we found a legal integer type creating a meaningful
17223	// merge.
17224	if (NumStoresToMerge < 2) {
17225	// We know that candidate stores are in order and of correct
17226	// shape. While there is no mergeable sequence from the
17227	// beginning one may start later in the sequence. The only
17228	// reason a merge of size N could have failed where another of
17229	// the same size would not have, is if the alignment has
17230	// improved. Drop as many candidates as we can here.
17231	unsigned NumSkip = 1;
17232	while ((NumSkip < NumConsecutiveStores) &&
17233	(StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
17234	NumSkip++;
17235
17236	StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17237	NumConsecutiveStores -= NumSkip;
17238	continue;
17239	}
17240
17241	// Check that we can merge these candidates without causing a cycle.
17242	if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
17243	RootNode)) {
17244	StoreNodes.erase(StoreNodes.begin(),
17245	StoreNodes.begin() + NumStoresToMerge);
17246	NumConsecutiveStores -= NumStoresToMerge;
17247	continue;
17248	}
17249
17250	MadeChange \|= mergeStoresOfConstantsOrVecElts(
17251	StoreNodes, MemVT, NumStoresToMerge, false, true, false);
17252
17253	StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
17254	NumConsecutiveStores -= NumStoresToMerge;
17255	}
17256	return MadeChange;
17257	}
17258
17259	bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
17260	unsigned NumConsecutiveStores, EVT MemVT,
17261	SDNode *RootNode, bool AllowVectors,
17262	bool IsNonTemporalStore,
17263	bool IsNonTemporalLoad) {
17264	LLVMContext &Context = *DAG.getContext();
17265	const DataLayout &DL = DAG.getDataLayout();
17266	int64_t ElementSizeBytes = MemVT.getStoreSize();
17267	unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17268	bool MadeChange = false;
17269
17270	int64_t StartAddress = StoreNodes[0].OffsetFromBase;
	Value stored to 'StartAddress' during its initialization is never read
17271
17272	// Look for load nodes which are used by the stored values.
17273	SmallVector<MemOpLink, 8> LoadNodes;
17274
17275	// Find acceptable loads. Loads need to have the same chain (token factor),
17276	// must not be zext, volatile, indexed, and they must be consecutive.
17277	BaseIndexOffset LdBasePtr;
17278
17279	for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17280	StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
17281	SDValue Val = peekThroughBitcasts(St->getValue());
17282	LoadSDNode *Ld = cast<LoadSDNode>(Val);
17283
17284	BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
17285	// If this is not the first ptr that we check.
17286	int64_t LdOffset = 0;
17287	if (LdBasePtr.getBase().getNode()) {
17288	// The base ptr must be the same.
17289	if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
17290	break;
17291	} else {
17292	// Check that all other base pointers are the same as this one.
17293	LdBasePtr = LdPtr;
17294	}
17295
17296	// We found a potential memory operand to merge.
17297	LoadNodes.push_back(MemOpLink(Ld, LdOffset));
17298	}
17299
17300	while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
17301	Align RequiredAlignment;
17302	bool NeedRotate = false;
17303	if (LoadNodes.size() == 2) {
17304	// If we have load/store pair instructions and we only have two values,
17305	// don't bother merging.
17306	if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
17307	StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
17308	StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
17309	LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
17310	break;
17311	}
17312	// If the loads are reversed, see if we can rotate the halves into place.
17313	int64_t Offset0 = LoadNodes[0].OffsetFromBase;
17314	int64_t Offset1 = LoadNodes[1].OffsetFromBase;
17315	EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
17316	if (Offset0 - Offset1 == ElementSizeBytes &&
17317	(hasOperation(ISD::ROTL, PairVT) \|\|
17318	hasOperation(ISD::ROTR, PairVT))) {
17319	std::swap(LoadNodes[0], LoadNodes[1]);
17320	NeedRotate = true;
17321	}
17322	}
17323	LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17324	unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17325	Align FirstStoreAlign = FirstInChain->getAlign();
17326	LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
17327
17328	// Scan the memory operations on the chain and find the first
17329	// non-consecutive load memory address. These variables hold the index in
17330	// the store node array.
17331
17332	unsigned LastConsecutiveLoad = 1;
17333
17334	// This variable refers to the size and not index in the array.
17335	unsigned LastLegalVectorType = 1;
17336	unsigned LastLegalIntegerType = 1;
17337	bool isDereferenceable = true;
17338	bool DoIntegerTruncate = false;
17339	StartAddress = LoadNodes[0].OffsetFromBase;
17340	SDValue LoadChain = FirstLoad->getChain();
17341	for (unsigned i = 1; i < LoadNodes.size(); ++i) {
17342	// All loads must share the same chain.
17343	if (LoadNodes[i].MemNode->getChain() != LoadChain)
17344	break;
17345
17346	int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
17347	if (CurrAddress - StartAddress != (ElementSizeBytes * i))
17348	break;
17349	LastConsecutiveLoad = i;
17350
17351	if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
17352	isDereferenceable = false;
17353
17354	// Find a legal type for the vector store.
17355	unsigned Elts = (i + 1) * NumMemElts;
17356	EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17357
17358	// Break early when size is too large to be legal.
17359	if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
17360	break;
17361
17362	bool IsFastSt = false;
17363	bool IsFastLd = false;
17364	if (TLI.isTypeLegal(StoreTy) &&
17365	TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
17366	TLI.allowsMemoryAccess(Context, DL, StoreTy,
17367	*FirstInChain->getMemOperand(), &IsFastSt) &&
17368	IsFastSt &&
17369	TLI.allowsMemoryAccess(Context, DL, StoreTy,
17370	*FirstLoad->getMemOperand(), &IsFastLd) &&
17371	IsFastLd) {
17372	LastLegalVectorType = i + 1;
17373	}
17374
17375	// Find a legal type for the integer store.
17376	unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
17377	StoreTy = EVT::getIntegerVT(Context, SizeInBits);
17378	if (TLI.isTypeLegal(StoreTy) &&
17379	TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
17380	TLI.allowsMemoryAccess(Context, DL, StoreTy,
17381	*FirstInChain->getMemOperand(), &IsFastSt) &&
17382	IsFastSt &&
17383	TLI.allowsMemoryAccess(Context, DL, StoreTy,
17384	*FirstLoad->getMemOperand(), &IsFastLd) &&
17385	IsFastLd) {
17386	LastLegalIntegerType = i + 1;
17387	DoIntegerTruncate = false;
17388	// Or check whether a truncstore and extload is legal.
17389	} else if (TLI.getTypeAction(Context, StoreTy) ==
17390	TargetLowering::TypePromoteInteger) {
17391	EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
17392	if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
17393	TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
17394	TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
17395	TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
17396	TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
17397	TLI.allowsMemoryAccess(Context, DL, StoreTy,
17398	*FirstInChain->getMemOperand(), &IsFastSt) &&
17399	IsFastSt &&
17400	TLI.allowsMemoryAccess(Context, DL, StoreTy,
17401	*FirstLoad->getMemOperand(), &IsFastLd) &&
17402	IsFastLd) {
17403	LastLegalIntegerType = i + 1;
17404	DoIntegerTruncate = true;
17405	}
17406	}
17407	}
17408
17409	// Only use vector types if the vector type is larger than the integer
17410	// type. If they are the same, use integers.
17411	bool UseVectorTy =
17412	LastLegalVectorType > LastLegalIntegerType && AllowVectors;
17413	unsigned LastLegalType =
17414	std::max(LastLegalVectorType, LastLegalIntegerType);
17415
17416	// We add +1 here because the LastXXX variables refer to location while
17417	// the NumElem refers to array/index size.
17418	unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
17419	NumElem = std::min(LastLegalType, NumElem);
17420	Align FirstLoadAlign = FirstLoad->getAlign();
17421
17422	if (NumElem < 2) {
17423	// We know that candidate stores are in order and of correct
17424	// shape. While there is no mergeable sequence from the
17425	// beginning one may start later in the sequence. The only
17426	// reason a merge of size N could have failed where another of
17427	// the same size would not have is if the alignment or either
17428	// the load or store has improved. Drop as many candidates as we
17429	// can here.
17430	unsigned NumSkip = 1;
17431	while ((NumSkip < LoadNodes.size()) &&
17432	(LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
17433	(StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
17434	NumSkip++;
17435	StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17436	LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
17437	NumConsecutiveStores -= NumSkip;
17438	continue;
17439	}
17440
17441	// Check that we can merge these candidates without causing a cycle.
17442	if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
17443	RootNode)) {
17444	StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17445	LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
17446	NumConsecutiveStores -= NumElem;
17447	continue;
17448	}
17449
17450	// Find if it is better to use vectors or integers to load and store
17451	// to memory.
17452	EVT JointMemOpVT;
17453	if (UseVectorTy) {
17454	// Find a legal type for the vector store.
17455	unsigned Elts = NumElem * NumMemElts;
17456	JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17457	} else {
17458	unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
17459	JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
17460	}
17461
17462	SDLoc LoadDL(LoadNodes[0].MemNode);
17463	SDLoc StoreDL(StoreNodes[0].MemNode);
17464
17465	// The merged loads are required to have the same incoming chain, so
17466	// using the first's chain is acceptable.
17467
17468	SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
17469	AddToWorklist(NewStoreChain.getNode());
17470
17471	MachineMemOperand::Flags LdMMOFlags =
17472	isDereferenceable ? MachineMemOperand::MODereferenceable
17473	: MachineMemOperand::MONone;
17474	if (IsNonTemporalLoad)
17475	LdMMOFlags \|= MachineMemOperand::MONonTemporal;
17476
17477	MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
17478	? MachineMemOperand::MONonTemporal
17479	: MachineMemOperand::MONone;
17480
17481	SDValue NewLoad, NewStore;
17482	if (UseVectorTy \|\| !DoIntegerTruncate) {
17483	NewLoad = DAG.getLoad(
17484	JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
17485	FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
17486	SDValue StoreOp = NewLoad;
17487	if (NeedRotate) {
17488	unsigned LoadWidth = ElementSizeBytes * 8 * 2;
17489	assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&((JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) && "Unexpected type for rotate-able load pair") ? static_cast< void> (0) : __assert_fail ("JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) && \"Unexpected type for rotate-able load pair\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 17490, __PRETTY_FUNCTION__))
17490	"Unexpected type for rotate-able load pair")((JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) && "Unexpected type for rotate-able load pair") ? static_cast< void> (0) : __assert_fail ("JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) && \"Unexpected type for rotate-able load pair\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 17490, __PRETTY_FUNCTION__));
17491	SDValue RotAmt =
17492	DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
17493	// Target can convert to the identical ROTR if it does not have ROTL.
17494	StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
17495	}
17496	NewStore = DAG.getStore(
17497	NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
17498	FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
17499	} else { // This must be the truncstore/extload case
17500	EVT ExtendedTy =
17501	TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
17502	NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
17503	FirstLoad->getChain(), FirstLoad->getBasePtr(),
17504	FirstLoad->getPointerInfo(), JointMemOpVT,
17505	FirstLoadAlign, LdMMOFlags);
17506	NewStore = DAG.getTruncStore(
17507	NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
17508	FirstInChain->getPointerInfo(), JointMemOpVT,
17509	FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
17510	}
17511
17512	// Transfer chain users from old loads to the new load.
17513	for (unsigned i = 0; i < NumElem; ++i) {
17514	LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
17515	DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
17516	SDValue(NewLoad.getNode(), 1));
17517	}
17518
17519	// Replace all stores with the new store. Recursively remove corresponding
17520	// values if they are no longer used.
17521	for (unsigned i = 0; i < NumElem; ++i) {
17522	SDValue Val = StoreNodes[i].MemNode->getOperand(1);
17523	CombineTo(StoreNodes[i].MemNode, NewStore);
17524	if (Val.getNode()->use_empty())
17525	recursivelyDeleteUnusedNodes(Val.getNode());
17526	}
17527
17528	MadeChange = true;
17529	StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17530	LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
17531	NumConsecutiveStores -= NumElem;
17532	}
17533	return MadeChange;
17534	}
17535
17536	bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
17537	if (OptLevel == CodeGenOpt::None \|\| !EnableStoreMerging)
17538	return false;
17539
17540	// TODO: Extend this function to merge stores of scalable vectors.
17541	// (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
17542	// store since we know <vscale x 16 x i8> is exactly twice as large as
17543	// <vscale x 8 x i8>). Until then, bail out for scalable vectors.
17544	EVT MemVT = St->getMemoryVT();
17545	if (MemVT.isScalableVector())
17546	return false;
17547	if (!MemVT.isSimple() \|\| MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
17548	return false;
17549
17550	// This function cannot currently deal with non-byte-sized memory sizes.
17551	int64_t ElementSizeBytes = MemVT.getStoreSize();
17552	if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
17553	return false;
17554
17555	// Do not bother looking at stored values that are not constants, loads, or
17556	// extracted vector elements.
17557	SDValue StoredVal = peekThroughBitcasts(St->getValue());
17558	const StoreSource StoreSrc = getStoreSource(StoredVal);
17559	if (StoreSrc == StoreSource::Unknown)
17560	return false;
17561
17562	SmallVector<MemOpLink, 8> StoreNodes;
17563	SDNode *RootNode;
17564	// Find potential store merge candidates by searching through chain sub-DAG
17565	getStoreMergeCandidates(St, StoreNodes, RootNode);
17566
17567	// Check if there is anything to merge.
17568	if (StoreNodes.size() < 2)
17569	return false;
17570
17571	// Sort the memory operands according to their distance from the
17572	// base pointer.
17573	llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
17574	return LHS.OffsetFromBase < RHS.OffsetFromBase;
17575	});
17576
17577	bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
17578	Attribute::NoImplicitFloat);
17579	bool IsNonTemporalStore = St->isNonTemporal();
17580	bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
17581	cast<LoadSDNode>(StoredVal)->isNonTemporal();
17582
17583	// Store Merge attempts to merge the lowest stores. This generally
17584	// works out as if successful, as the remaining stores are checked
17585	// after the first collection of stores is merged. However, in the
17586	// case that a non-mergeable store is found first, e.g., {p[-2],
17587	// p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
17588	// mergeable cases. To prevent this, we prune such stores from the
17589	// front of StoreNodes here.
17590	bool MadeChange = false;
17591	while (StoreNodes.size() > 1) {
17592	unsigned NumConsecutiveStores =
17593	getConsecutiveStores(StoreNodes, ElementSizeBytes);
17594	// There are no more stores in the list to examine.
17595	if (NumConsecutiveStores == 0)
17596	return MadeChange;
17597
17598	// We have at least 2 consecutive stores. Try to merge them.
17599	assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores")((NumConsecutiveStores >= 2 && "Expected at least 2 stores" ) ? static_cast<void> (0) : __assert_fail ("NumConsecutiveStores >= 2 && \"Expected at least 2 stores\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 17599, __PRETTY_FUNCTION__));
17600	switch (StoreSrc) {
17601	case StoreSource::Constant:
17602	MadeChange \|= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
17603	MemVT, RootNode, AllowVectors);
17604	break;
17605
17606	case StoreSource::Extract:
17607	MadeChange \|= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
17608	MemVT, RootNode);
17609	break;
17610
17611	case StoreSource::Load:
17612	MadeChange \|= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
17613	MemVT, RootNode, AllowVectors,
17614	IsNonTemporalStore, IsNonTemporalLoad);
17615	break;
17616
17617	default:
17618	llvm_unreachable("Unhandled store source type")::llvm::llvm_unreachable_internal("Unhandled store source type" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 17618);
17619	}
17620	}
17621	return MadeChange;
17622	}
17623
17624	SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
17625	SDLoc SL(ST);
17626	SDValue ReplStore;
17627
17628	// Replace the chain to avoid dependency.
17629	if (ST->isTruncatingStore()) {
17630	ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
17631	ST->getBasePtr(), ST->getMemoryVT(),
17632	ST->getMemOperand());
17633	} else {
17634	ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
17635	ST->getMemOperand());
17636	}
17637
17638	// Create token to keep both nodes around.
17639	SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
17640	MVT::Other, ST->getChain(), ReplStore);
17641
17642	// Make sure the new and old chains are cleaned up.
17643	AddToWorklist(Token.getNode());
17644
17645	// Don't add users to work list.
17646	return CombineTo(ST, Token, false);
17647	}
17648
17649	SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
17650	SDValue Value = ST->getValue();
17651	if (Value.getOpcode() == ISD::TargetConstantFP)
17652	return SDValue();
17653
17654	if (!ISD::isNormalStore(ST))
17655	return SDValue();
17656
17657	SDLoc DL(ST);
17658
17659	SDValue Chain = ST->getChain();
17660	SDValue Ptr = ST->getBasePtr();
17661
17662	const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
17663
17664	// NOTE: If the original store is volatile, this transform must not increase
17665	// the number of stores. For example, on x86-32 an f64 can be stored in one
17666	// processor operation but an i64 (which is not legal) requires two. So the
17667	// transform should not be done in this case.
17668
17669	SDValue Tmp;
17670	switch (CFP->getSimpleValueType(0).SimpleTy) {
17671	default:
17672	llvm_unreachable("Unknown FP type")::llvm::llvm_unreachable_internal("Unknown FP type", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 17672);
17673	case MVT::f16: // We don't do this for these yet.
17674	case MVT::f80:
17675	case MVT::f128:
17676	case MVT::ppcf128:
17677	return SDValue();
17678	case MVT::f32:
17679	if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) \|\|
17680	TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
17681	;
17682	Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
17683	bitcastToAPInt().getZExtValue(), SDLoc(CFP),
17684	MVT::i32);
17685	return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
17686	}
17687
17688	return SDValue();
17689	case MVT::f64:
17690	if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
17691	ST->isSimple()) \|\|
17692	TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
17693	;
17694	Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
17695	getZExtValue(), SDLoc(CFP), MVT::i64);
17696	return DAG.getStore(Chain, DL, Tmp,
17697	Ptr, ST->getMemOperand());
17698	}
17699
17700	if (ST->isSimple() &&
17701	TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
17702	// Many FP stores are not made apparent until after legalize, e.g. for
17703	// argument passing. Since this is so common, custom legalize the
17704	// 64-bit integer store into two 32-bit stores.
17705	uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
17706	SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
17707	SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
17708	if (DAG.getDataLayout().isBigEndian())
17709	std::swap(Lo, Hi);
17710
17711	MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
17712	AAMDNodes AAInfo = ST->getAAInfo();
17713
17714	SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
17715	ST->getOriginalAlign(), MMOFlags, AAInfo);
17716	Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL);
17717	SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
17718	ST->getPointerInfo().getWithOffset(4),
17719	ST->getOriginalAlign(), MMOFlags, AAInfo);
17720	return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
17721	St0, St1);
17722	}
17723
17724	return SDValue();
17725	}
17726	}
17727
17728	SDValue DAGCombiner::visitSTORE(SDNode *N) {
17729	StoreSDNode *ST = cast<StoreSDNode>(N);
17730	SDValue Chain = ST->getChain();
17731	SDValue Value = ST->getValue();
17732	SDValue Ptr = ST->getBasePtr();
17733
17734	// If this is a store of a bit convert, store the input value if the
17735	// resultant store does not need a higher alignment than the original.
17736	if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
17737	ST->isUnindexed()) {
17738	EVT SVT = Value.getOperand(0).getValueType();
17739	// If the store is volatile, we only want to change the store type if the
17740	// resulting store is legal. Otherwise we might increase the number of
17741	// memory accesses. We don't care if the original type was legal or not
17742	// as we assume software couldn't rely on the number of accesses of an
17743	// illegal type.
17744	// TODO: May be able to relax for unordered atomics (see D66309)
17745	if (((!LegalOperations && ST->isSimple()) \|\|
17746	TLI.isOperationLegal(ISD::STORE, SVT)) &&
17747	TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
17748	DAG, *ST->getMemOperand())) {
17749	return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
17750	ST->getMemOperand());
17751	}
17752	}
17753
17754	// Turn 'store undef, Ptr' -> nothing.
17755	if (Value.isUndef() && ST->isUnindexed())
17756	return Chain;
17757
17758	// Try to infer better alignment information than the store already has.
17759	if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
17760	if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
17761	if (*Alignment > ST->getAlign() &&
17762	isAligned(*Alignment, ST->getSrcValueOffset())) {
17763	SDValue NewStore =
17764	DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
17765	ST->getMemoryVT(), *Alignment,
17766	ST->getMemOperand()->getFlags(), ST->getAAInfo());
17767	// NewStore will always be N as we are only refining the alignment
17768	assert(NewStore.getNode() == N)((NewStore.getNode() == N) ? static_cast<void> (0) : __assert_fail ("NewStore.getNode() == N", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 17768, __PRETTY_FUNCTION__));
17769	(void)NewStore;
17770	}
17771	}
17772	}
17773
17774	// Try transforming a pair floating point load / store ops to integer
17775	// load / store ops.
17776	if (SDValue NewST = TransformFPLoadStorePair(N))
17777	return NewST;
17778
17779	// Try transforming several stores into STORE (BSWAP).
17780	if (SDValue Store = mergeTruncStores(ST))
17781	return Store;
17782
17783	if (ST->isUnindexed()) {
17784	// Walk up chain skipping non-aliasing memory nodes, on this store and any
17785	// adjacent stores.
17786	if (findBetterNeighborChains(ST)) {
17787	// replaceStoreChain uses CombineTo, which handled all of the worklist
17788	// manipulation. Return the original node to not do anything else.
17789	return SDValue(ST, 0);
17790	}
17791	Chain = ST->getChain();
17792	}
17793
17794	// FIXME: is there such a thing as a truncating indexed store?
17795	if (ST->isTruncatingStore() && ST->isUnindexed() &&
17796	Value.getValueType().isInteger() &&
17797	(!isa<ConstantSDNode>(Value) \|\|
17798	!cast<ConstantSDNode>(Value)->isOpaque())) {
17799	APInt TruncDemandedBits =
17800	APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
17801	ST->getMemoryVT().getScalarSizeInBits());
17802
17803	// See if we can simplify the input to this truncstore with knowledge that
17804	// only the low bits are being used. For example:
17805	// "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
17806	AddToWorklist(Value.getNode());
17807	if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
17808	return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
17809	ST->getMemOperand());
17810
17811	// Otherwise, see if we can simplify the operation with
17812	// SimplifyDemandedBits, which only works if the value has a single use.
17813	if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
17814	// Re-visit the store if anything changed and the store hasn't been merged
17815	// with another node (N is deleted) SimplifyDemandedBits will add Value's
17816	// node back to the worklist if necessary, but we also need to re-visit
17817	// the Store node itself.
17818	if (N->getOpcode() != ISD::DELETED_NODE)
17819	AddToWorklist(N);
17820	return SDValue(N, 0);
17821	}
17822	}
17823
17824	// If this is a load followed by a store to the same location, then the store
17825	// is dead/noop.
17826	// TODO: Can relax for unordered atomics (see D66309)
17827	if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
17828	if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
17829	ST->isUnindexed() && ST->isSimple() &&
17830	// There can't be any side effects between the load and store, such as
17831	// a call or store.
17832	Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
17833	// The store is dead, remove it.
17834	return Chain;
17835	}
17836	}
17837
17838	// TODO: Can relax for unordered atomics (see D66309)
17839	if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
17840	if (ST->isUnindexed() && ST->isSimple() &&
17841	ST1->isUnindexed() && ST1->isSimple()) {
17842	if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
17843	ST->getMemoryVT() == ST1->getMemoryVT()) {
17844	// If this is a store followed by a store with the same value to the
17845	// same location, then the store is dead/noop.
17846	return Chain;
17847	}
17848
17849	if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
17850	!ST1->getBasePtr().isUndef() &&
17851	// BaseIndexOffset and the code below requires knowing the size
17852	// of a vector, so bail out if MemoryVT is scalable.
17853	!ST->getMemoryVT().isScalableVector() &&
17854	!ST1->getMemoryVT().isScalableVector()) {
17855	const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
17856	const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
17857	unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits();
17858	unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits();
17859	// If this is a store who's preceding store to a subset of the current
17860	// location and no one other node is chained to that store we can
17861	// effectively drop the store. Do not remove stores to undef as they may
17862	// be used as data sinks.
17863	if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
17864	CombineTo(ST1, ST1->getChain());
17865	return SDValue();
17866	}
17867	}
17868	}
17869	}
17870
17871	// If this is an FP_ROUND or TRUNC followed by a store, fold this into a
17872	// truncating store. We can do this even if this is already a truncstore.
17873	if ((Value.getOpcode() == ISD::FP_ROUND \|\| Value.getOpcode() == ISD::TRUNCATE)
17874	&& Value.getNode()->hasOneUse() && ST->isUnindexed() &&
17875	TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
17876	ST->getMemoryVT())) {
17877	return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
17878	Ptr, ST->getMemoryVT(), ST->getMemOperand());
17879	}
17880
17881	// Always perform this optimization before types are legal. If the target
17882	// prefers, also try this after legalization to catch stores that were created
17883	// by intrinsics or other nodes.
17884	if (!LegalTypes \|\| (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
17885	while (true) {
17886	// There can be multiple store sequences on the same chain.
17887	// Keep trying to merge store sequences until we are unable to do so
17888	// or until we merge the last store on the chain.
17889	bool Changed = mergeConsecutiveStores(ST);
17890	if (!Changed) break;
17891	// Return N as merge only uses CombineTo and no worklist clean
17892	// up is necessary.
17893	if (N->getOpcode() == ISD::DELETED_NODE \|\| !isa<StoreSDNode>(N))
17894	return SDValue(N, 0);
17895	}
17896	}
17897
17898	// Try transforming N to an indexed store.
17899	if (CombineToPreIndexedLoadStore(N) \|\| CombineToPostIndexedLoadStore(N))
17900	return SDValue(N, 0);
17901
17902	// Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
17903	//
17904	// Make sure to do this only after attempting to merge stores in order to
17905	// avoid changing the types of some subset of stores due to visit order,
17906	// preventing their merging.
17907	if (isa<ConstantFPSDNode>(ST->getValue())) {
17908	if (SDValue NewSt = replaceStoreOfFPConstant(ST))
17909	return NewSt;
17910	}
17911
17912	if (SDValue NewSt = splitMergedValStore(ST))
17913	return NewSt;
17914
17915	return ReduceLoadOpStoreWidth(N);
17916	}
17917
17918	SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
17919	const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
17920	if (!LifetimeEnd->hasOffset())
17921	return SDValue();
17922
17923	const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
17924	LifetimeEnd->getOffset(), false);
17925
17926	// We walk up the chains to find stores.
17927	SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
17928	while (!Chains.empty()) {
17929	SDValue Chain = Chains.pop_back_val();
17930	if (!Chain.hasOneUse())
17931	continue;
17932	switch (Chain.getOpcode()) {
17933	case ISD::TokenFactor:
17934	for (unsigned Nops = Chain.getNumOperands(); Nops;)
17935	Chains.push_back(Chain.getOperand(--Nops));
17936	break;
17937	case ISD::LIFETIME_START:
17938	case ISD::LIFETIME_END:
17939	// We can forward past any lifetime start/end that can be proven not to
17940	// alias the node.
17941	if (!isAlias(Chain.getNode(), N))
17942	Chains.push_back(Chain.getOperand(0));
17943	break;
17944	case ISD::STORE: {
17945	StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
17946	// TODO: Can relax for unordered atomics (see D66309)
17947	if (!ST->isSimple() \|\| ST->isIndexed())
17948	continue;
17949	const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
17950	// The bounds of a scalable store are not known until runtime, so this
17951	// store cannot be elided.
17952	if (StoreSize.isScalable())
17953	continue;
17954	const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
17955	// If we store purely within object bounds just before its lifetime ends,
17956	// we can remove the store.
17957	if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
17958	StoreSize.getFixedSize() * 8)) {
17959	LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << "\nRemoving store:"; StoreBase .dump(); dbgs() << "\nwithin LIFETIME_END of : "; LifetimeEndBase .dump(); dbgs() << "\n"; } } while (false)
17960	dbgs() << "\nwithin LIFETIME_END of : ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << "\nRemoving store:"; StoreBase .dump(); dbgs() << "\nwithin LIFETIME_END of : "; LifetimeEndBase .dump(); dbgs() << "\n"; } } while (false)
17961	LifetimeEndBase.dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("dagcombine")) { dbgs() << "\nRemoving store:"; StoreBase .dump(); dbgs() << "\nwithin LIFETIME_END of : "; LifetimeEndBase .dump(); dbgs() << "\n"; } } while (false);
17962	CombineTo(ST, ST->getChain());
17963	return SDValue(N, 0);
17964	}
17965	}
17966	}
17967	}
17968	return SDValue();
17969	}
17970
17971	/// For the instruction sequence of store below, F and I values
17972	/// are bundled together as an i64 value before being stored into memory.
17973	/// Sometimes it is more efficent to generate separate stores for F and I,
17974	/// which can remove the bitwise instructions or sink them to colder places.
17975	///
17976	/// (store (or (zext (bitcast F to i32) to i64),
17977	/// (shl (zext I to i64), 32)), addr) -->
17978	/// (store F, addr) and (store I, addr+4)
17979	///
17980	/// Similarly, splitting for other merged store can also be beneficial, like:
17981	/// For pair of {i32, i32}, i64 store --> two i32 stores.
17982	/// For pair of {i32, i16}, i64 store --> two i32 stores.
17983	/// For pair of {i16, i16}, i32 store --> two i16 stores.
17984	/// For pair of {i16, i8}, i32 store --> two i16 stores.
17985	/// For pair of {i8, i8}, i16 store --> two i8 stores.
17986	///
17987	/// We allow each target to determine specifically which kind of splitting is
17988	/// supported.
17989	///
17990	/// The store patterns are commonly seen from the simple code snippet below
17991	/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
17992	/// void goo(const std::pair<int, float> &);
17993	/// hoo() {
17994	/// ...
17995	/// goo(std::make_pair(tmp, ftmp));
17996	/// ...
17997	/// }
17998	///
17999	SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
18000	if (OptLevel == CodeGenOpt::None)
18001	return SDValue();
18002
18003	// Can't change the number of memory accesses for a volatile store or break
18004	// atomicity for an atomic one.
18005	if (!ST->isSimple())
18006	return SDValue();
18007
18008	SDValue Val = ST->getValue();
18009	SDLoc DL(ST);
18010
18011	// Match OR operand.
18012	if (!Val.getValueType().isScalarInteger() \|\| Val.getOpcode() != ISD::OR)
18013	return SDValue();
18014
18015	// Match SHL operand and get Lower and Higher parts of Val.
18016	SDValue Op1 = Val.getOperand(0);
18017	SDValue Op2 = Val.getOperand(1);
18018	SDValue Lo, Hi;
18019	if (Op1.getOpcode() != ISD::SHL) {
18020	std::swap(Op1, Op2);
18021	if (Op1.getOpcode() != ISD::SHL)
18022	return SDValue();
18023	}
18024	Lo = Op2;
18025	Hi = Op1.getOperand(0);
18026	if (!Op1.hasOneUse())
18027	return SDValue();
18028
18029	// Match shift amount to HalfValBitSize.
18030	unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
18031	ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
18032	if (!ShAmt \|\| ShAmt->getAPIntValue() != HalfValBitSize)
18033	return SDValue();
18034
18035	// Lo and Hi are zero-extended from int with size less equal than 32
18036	// to i64.
18037	if (Lo.getOpcode() != ISD::ZERO_EXTEND \|\| !Lo.hasOneUse() \|\|
18038	!Lo.getOperand(0).getValueType().isScalarInteger() \|\|
18039	Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize \|\|
18040	Hi.getOpcode() != ISD::ZERO_EXTEND \|\| !Hi.hasOneUse() \|\|
18041	!Hi.getOperand(0).getValueType().isScalarInteger() \|\|
18042	Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
18043	return SDValue();
18044
18045	// Use the EVT of low and high parts before bitcast as the input
18046	// of target query.
18047	EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
18048	? Lo.getOperand(0).getValueType()
18049	: Lo.getValueType();
18050	EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
18051	? Hi.getOperand(0).getValueType()
18052	: Hi.getValueType();
18053	if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
18054	return SDValue();
18055
18056	// Start to split store.
18057	MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
18058	AAMDNodes AAInfo = ST->getAAInfo();
18059
18060	// Change the sizes of Lo and Hi's value types to HalfValBitSize.
18061	EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
18062	Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
18063	Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
18064
18065	SDValue Chain = ST->getChain();
18066	SDValue Ptr = ST->getBasePtr();
18067	// Lower value store.
18068	SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
18069	ST->getOriginalAlign(), MMOFlags, AAInfo);
18070	Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL);
18071	// Higher value store.
18072	SDValue St1 = DAG.getStore(
18073	St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
18074	ST->getOriginalAlign(), MMOFlags, AAInfo);
18075	return St1;
18076	}
18077
18078	/// Convert a disguised subvector insertion into a shuffle:
18079	SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
18080	assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&((N->getOpcode() == ISD::INSERT_VECTOR_ELT && "Expected extract_vector_elt" ) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::INSERT_VECTOR_ELT && \"Expected extract_vector_elt\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 18081, __PRETTY_FUNCTION__))
18081	"Expected extract_vector_elt")((N->getOpcode() == ISD::INSERT_VECTOR_ELT && "Expected extract_vector_elt" ) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::INSERT_VECTOR_ELT && \"Expected extract_vector_elt\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 18081, __PRETTY_FUNCTION__));
18082	SDValue InsertVal = N->getOperand(1);
18083	SDValue Vec = N->getOperand(0);
18084
18085	// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
18086	// InsIndex)
18087	// --> (vector_shuffle X, Y) and variations where shuffle operands may be
18088	// CONCAT_VECTORS.
18089	if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
18090	InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18091	isa<ConstantSDNode>(InsertVal.getOperand(1))) {
18092	ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
18093	ArrayRef<int> Mask = SVN->getMask();
18094
18095	SDValue X = Vec.getOperand(0);
18096	SDValue Y = Vec.getOperand(1);
18097
18098	// Vec's operand 0 is using indices from 0 to N-1 and
18099	// operand 1 from N to 2N - 1, where N is the number of
18100	// elements in the vectors.
18101	SDValue InsertVal0 = InsertVal.getOperand(0);
18102	int ElementOffset = -1;
18103
18104	// We explore the inputs of the shuffle in order to see if we find the
18105	// source of the extract_vector_elt. If so, we can use it to modify the
18106	// shuffle rather than perform an insert_vector_elt.
18107	SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
18108	ArgWorkList.emplace_back(Mask.size(), Y);
18109	ArgWorkList.emplace_back(0, X);
18110
18111	while (!ArgWorkList.empty()) {
18112	int ArgOffset;
18113	SDValue ArgVal;
18114	std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
18115
18116	if (ArgVal == InsertVal0) {
18117	ElementOffset = ArgOffset;
18118	break;
18119	}
18120
18121	// Peek through concat_vector.
18122	if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
18123	int CurrentArgOffset =
18124	ArgOffset + ArgVal.getValueType().getVectorNumElements();
18125	int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
18126	for (SDValue Op : reverse(ArgVal->ops())) {
18127	CurrentArgOffset -= Step;
18128	ArgWorkList.emplace_back(CurrentArgOffset, Op);
18129	}
18130
18131	// Make sure we went through all the elements and did not screw up index
18132	// computation.
18133	assert(CurrentArgOffset == ArgOffset)((CurrentArgOffset == ArgOffset) ? static_cast<void> (0 ) : __assert_fail ("CurrentArgOffset == ArgOffset", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 18133, __PRETTY_FUNCTION__));
18134	}
18135	}
18136
18137	if (ElementOffset != -1) {
18138	SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
18139
18140	auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
18141	NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
18142	assert(NewMask[InsIndex] <((NewMask[InsIndex] < (int)(2 * Vec.getValueType().getVectorNumElements ()) && NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound" ) ? static_cast<void> (0) : __assert_fail ("NewMask[InsIndex] < (int)(2 * Vec.getValueType().getVectorNumElements()) && NewMask[InsIndex] >= 0 && \"NewMask[InsIndex] is out of bound\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 18144, __PRETTY_FUNCTION__))
18143	(int)(2 * Vec.getValueType().getVectorNumElements()) &&((NewMask[InsIndex] < (int)(2 * Vec.getValueType().getVectorNumElements ()) && NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound" ) ? static_cast<void> (0) : __assert_fail ("NewMask[InsIndex] < (int)(2 * Vec.getValueType().getVectorNumElements()) && NewMask[InsIndex] >= 0 && \"NewMask[InsIndex] is out of bound\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 18144, __PRETTY_FUNCTION__))
18144	NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound")((NewMask[InsIndex] < (int)(2 * Vec.getValueType().getVectorNumElements ()) && NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound" ) ? static_cast<void> (0) : __assert_fail ("NewMask[InsIndex] < (int)(2 * Vec.getValueType().getVectorNumElements()) && NewMask[InsIndex] >= 0 && \"NewMask[InsIndex] is out of bound\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 18144, __PRETTY_FUNCTION__));
18145
18146	SDValue LegalShuffle =
18147	TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
18148	Y, NewMask, DAG);
18149	if (LegalShuffle)
18150	return LegalShuffle;
18151	}
18152	}
18153
18154	// insert_vector_elt V, (bitcast X from vector type), IdxC -->
18155	// bitcast(shuffle (bitcast V), (extended X), Mask)
18156	// Note: We do not use an insert_subvector node because that requires a
18157	// legal subvector type.
18158	if (InsertVal.getOpcode() != ISD::BITCAST \|\| !InsertVal.hasOneUse() \|\|
18159	!InsertVal.getOperand(0).getValueType().isVector())
18160	return SDValue();
18161
18162	SDValue SubVec = InsertVal.getOperand(0);
18163	SDValue DestVec = N->getOperand(0);
18164	EVT SubVecVT = SubVec.getValueType();
18165	EVT VT = DestVec.getValueType();
18166	unsigned NumSrcElts = SubVecVT.getVectorNumElements();
18167	// If the source only has a single vector element, the cost of creating adding
18168	// it to a vector is likely to exceed the cost of a insert_vector_elt.
18169	if (NumSrcElts == 1)
18170	return SDValue();
18171	unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
18172	unsigned NumMaskVals = ExtendRatio * NumSrcElts;
18173
18174	// Step 1: Create a shuffle mask that implements this insert operation. The
18175	// vector that we are inserting into will be operand 0 of the shuffle, so
18176	// those elements are just 'i'. The inserted subvector is in the first
18177	// positions of operand 1 of the shuffle. Example:
18178	// insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
18179	SmallVector<int, 16> Mask(NumMaskVals);
18180	for (unsigned i = 0; i != NumMaskVals; ++i) {
18181	if (i / NumSrcElts == InsIndex)
18182	Mask[i] = (i % NumSrcElts) + NumMaskVals;
18183	else
18184	Mask[i] = i;
18185	}
18186
18187	// Bail out if the target can not handle the shuffle we want to create.
18188	EVT SubVecEltVT = SubVecVT.getVectorElementType();
18189	EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
18190	if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
18191	return SDValue();
18192
18193	// Step 2: Create a wide vector from the inserted source vector by appending
18194	// undefined elements. This is the same size as our destination vector.
18195	SDLoc DL(N);
18196	SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
18197	ConcatOps[0] = SubVec;
18198	SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
18199
18200	// Step 3: Shuffle in the padded subvector.
18201	SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
18202	SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
18203	AddToWorklist(PaddedSubV.getNode());
18204	AddToWorklist(DestVecBC.getNode());
18205	AddToWorklist(Shuf.getNode());
18206	return DAG.getBitcast(VT, Shuf);
18207	}
18208
18209	SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
18210	SDValue InVec = N->getOperand(0);
18211	SDValue InVal = N->getOperand(1);
18212	SDValue EltNo = N->getOperand(2);
18213	SDLoc DL(N);
18214
18215	EVT VT = InVec.getValueType();
18216	auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
18217
18218	// Insert into out-of-bounds element is undefined.
18219	if (IndexC && VT.isFixedLengthVector() &&
18220	IndexC->getZExtValue() >= VT.getVectorNumElements())
18221	return DAG.getUNDEF(VT);
18222
18223	// Remove redundant insertions:
18224	// (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
18225	if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18226	InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
18227	return InVec;
18228
18229	if (!IndexC) {
18230	// If this is variable insert to undef vector, it might be better to splat:
18231	// inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
18232	if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
18233	if (VT.isScalableVector())
18234	return DAG.getSplatVector(VT, DL, InVal);
18235	else {
18236	SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
18237	return DAG.getBuildVector(VT, DL, Ops);
18238	}
18239	}
18240	return SDValue();
18241	}
18242
18243	if (VT.isScalableVector())
18244	return SDValue();
18245
18246	unsigned NumElts = VT.getVectorNumElements();
18247
18248	// We must know which element is being inserted for folds below here.
18249	unsigned Elt = IndexC->getZExtValue();
18250	if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
18251	return Shuf;
18252
18253	// Canonicalize insert_vector_elt dag nodes.
18254	// Example:
18255	// (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
18256	// -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
18257	//
18258	// Do this only if the child insert_vector node has one use; also
18259	// do this only if indices are both constants and Idx1 < Idx0.
18260	if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
18261	&& isa<ConstantSDNode>(InVec.getOperand(2))) {
18262	unsigned OtherElt = InVec.getConstantOperandVal(2);
18263	if (Elt < OtherElt) {
18264	// Swap nodes.
18265	SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
18266	InVec.getOperand(0), InVal, EltNo);
18267	AddToWorklist(NewOp.getNode());
18268	return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
18269	VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
18270	}
18271	}
18272
18273	// If we can't generate a legal BUILD_VECTOR, exit
18274	if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
18275	return SDValue();
18276
18277	// Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
18278	// be converted to a BUILD_VECTOR). Fill in the Ops vector with the
18279	// vector elements.
18280	SmallVector<SDValue, 8> Ops;
18281	// Do not combine these two vectors if the output vector will not replace
18282	// the input vector.
18283	if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
18284	Ops.append(InVec.getNode()->op_begin(),
18285	InVec.getNode()->op_end());
18286	} else if (InVec.isUndef()) {
18287	Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
18288	} else {
18289	return SDValue();
18290	}
18291	assert(Ops.size() == NumElts && "Unexpected vector size")((Ops.size() == NumElts && "Unexpected vector size") ? static_cast<void> (0) : __assert_fail ("Ops.size() == NumElts && \"Unexpected vector size\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 18291, __PRETTY_FUNCTION__));
18292
18293	// Insert the element
18294	if (Elt < Ops.size()) {
18295	// All the operands of BUILD_VECTOR must have the same type;
18296	// we enforce that here.
18297	EVT OpVT = Ops[0].getValueType();
18298	Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
18299	}
18300
18301	// Return the new vector
18302	return DAG.getBuildVector(VT, DL, Ops);
18303	}
18304
18305	SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
18306	SDValue EltNo,
18307	LoadSDNode *OriginalLoad) {
18308	assert(OriginalLoad->isSimple())((OriginalLoad->isSimple()) ? static_cast<void> (0) : __assert_fail ("OriginalLoad->isSimple()", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 18308, __PRETTY_FUNCTION__));
18309
18310	EVT ResultVT = EVE->getValueType(0);
18311	EVT VecEltVT = InVecVT.getVectorElementType();
18312
18313	// If the vector element type is not a multiple of a byte then we are unable
18314	// to correctly compute an address to load only the extracted element as a
18315	// scalar.
18316	if (!VecEltVT.isByteSized())
18317	return SDValue();
18318
18319	Align Alignment = OriginalLoad->getAlign();
18320	Align NewAlign = DAG.getDataLayout().getABITypeAlign(
18321	VecEltVT.getTypeForEVT(*DAG.getContext()));
18322
18323	if (NewAlign > Alignment \|\|
18324	!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
18325	return SDValue();
18326
18327	ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
18328	ISD::NON_EXTLOAD : ISD::EXTLOAD;
18329	if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
18330	return SDValue();
18331
18332	Alignment = NewAlign;
18333
18334	SDValue NewPtr = OriginalLoad->getBasePtr();
18335	SDValue Offset;
18336	EVT PtrType = NewPtr.getValueType();
18337	MachinePointerInfo MPI;
18338	SDLoc DL(EVE);
18339	if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
18340	int Elt = ConstEltNo->getZExtValue();
18341	unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
18342	Offset = DAG.getConstant(PtrOff, DL, PtrType);
18343	MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
18344	} else {
18345	Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
18346	Offset = DAG.getNode(
18347	ISD::MUL, DL, PtrType, Offset,
18348	DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
18349	// Discard the pointer info except the address space because the memory
18350	// operand can't represent this new access since the offset is variable.
18351	MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
18352	}
18353	NewPtr = DAG.getMemBasePlusOffset(NewPtr, Offset, DL);
18354
18355	// The replacement we need to do here is a little tricky: we need to
18356	// replace an extractelement of a load with a load.
18357	// Use ReplaceAllUsesOfValuesWith to do the replacement.
18358	// Note that this replacement assumes that the extractvalue is the only
18359	// use of the load; that's okay because we don't want to perform this
18360	// transformation in other cases anyway.
18361	SDValue Load;
18362	SDValue Chain;
18363	if (ResultVT.bitsGT(VecEltVT)) {
18364	// If the result type of vextract is wider than the load, then issue an
18365	// extending load instead.
18366	ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
18367	VecEltVT)
18368	? ISD::ZEXTLOAD
18369	: ISD::EXTLOAD;
18370	Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
18371	OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
18372	Alignment, OriginalLoad->getMemOperand()->getFlags(),
18373	OriginalLoad->getAAInfo());
18374	Chain = Load.getValue(1);
18375	} else {
18376	Load = DAG.getLoad(
18377	VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, Alignment,
18378	OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo());
18379	Chain = Load.getValue(1);
18380	if (ResultVT.bitsLT(VecEltVT))
18381	Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
18382	else
18383	Load = DAG.getBitcast(ResultVT, Load);
18384	}
18385	WorklistRemover DeadNodes(*this);
18386	SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
18387	SDValue To[] = { Load, Chain };
18388	DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
18389	// Make sure to revisit this node to clean it up; it will usually be dead.
18390	AddToWorklist(EVE);
18391	// Since we're explicitly calling ReplaceAllUses, add the new node to the
18392	// worklist explicitly as well.
18393	AddToWorklistWithUsers(Load.getNode());
18394	++OpsNarrowed;
18395	return SDValue(EVE, 0);
18396	}
18397
18398	/// Transform a vector binary operation into a scalar binary operation by moving
18399	/// the math/logic after an extract element of a vector.
18400	static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
18401	bool LegalOperations) {
18402	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18403	SDValue Vec = ExtElt->getOperand(0);
18404	SDValue Index = ExtElt->getOperand(1);
18405	auto *IndexC = dyn_cast<ConstantSDNode>(Index);
18406	if (!IndexC \|\| !TLI.isBinOp(Vec.getOpcode()) \|\| !Vec.hasOneUse() \|\|
18407	Vec.getNode()->getNumValues() != 1)
18408	return SDValue();
18409
18410	// Targets may want to avoid this to prevent an expensive register transfer.
18411	if (!TLI.shouldScalarizeBinop(Vec))
18412	return SDValue();
18413
18414	// Extracting an element of a vector constant is constant-folded, so this
18415	// transform is just replacing a vector op with a scalar op while moving the
18416	// extract.
18417	SDValue Op0 = Vec.getOperand(0);
18418	SDValue Op1 = Vec.getOperand(1);
18419	if (isAnyConstantBuildVector(Op0, true) \|\|
18420	isAnyConstantBuildVector(Op1, true)) {
18421	// extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
18422	// extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
18423	SDLoc DL(ExtElt);
18424	EVT VT = ExtElt->getValueType(0);
18425	SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
18426	SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
18427	return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
18428	}
18429
18430	return SDValue();
18431	}
18432
18433	SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
18434	SDValue VecOp = N->getOperand(0);
18435	SDValue Index = N->getOperand(1);
18436	EVT ScalarVT = N->getValueType(0);
18437	EVT VecVT = VecOp.getValueType();
18438	if (VecOp.isUndef())
18439	return DAG.getUNDEF(ScalarVT);
18440
18441	// extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
18442	//
18443	// This only really matters if the index is non-constant since other combines
18444	// on the constant elements already work.
18445	SDLoc DL(N);
18446	if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
18447	Index == VecOp.getOperand(2)) {
18448	SDValue Elt = VecOp.getOperand(1);
18449	return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
18450	}
18451
18452	// (vextract (scalar_to_vector val, 0) -> val
18453	if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
18454	// Only 0'th element of SCALAR_TO_VECTOR is defined.
18455	if (DAG.isKnownNeverZero(Index))
18456	return DAG.getUNDEF(ScalarVT);
18457
18458	// Check if the result type doesn't match the inserted element type. A
18459	// SCALAR_TO_VECTOR may truncate the inserted element and the
18460	// EXTRACT_VECTOR_ELT may widen the extracted vector.
18461	SDValue InOp = VecOp.getOperand(0);
18462	if (InOp.getValueType() != ScalarVT) {
18463	assert(InOp.getValueType().isInteger() && ScalarVT.isInteger())((InOp.getValueType().isInteger() && ScalarVT.isInteger ()) ? static_cast<void> (0) : __assert_fail ("InOp.getValueType().isInteger() && ScalarVT.isInteger()" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 18463, __PRETTY_FUNCTION__));
18464	return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
18465	}
18466	return InOp;
18467	}
18468
18469	// extract_vector_elt of out-of-bounds element -> UNDEF
18470	auto *IndexC = dyn_cast<ConstantSDNode>(Index);
18471	if (IndexC && VecVT.isFixedLengthVector() &&
18472	IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
18473	return DAG.getUNDEF(ScalarVT);
18474
18475	// extract_vector_elt (build_vector x, y), 1 -> y
18476	if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) \|\|
18477	VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
18478	TLI.isTypeLegal(VecVT) &&
18479	(VecOp.hasOneUse() \|\| TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
18480	assert((VecOp.getOpcode() != ISD::BUILD_VECTOR \|\|(((VecOp.getOpcode() != ISD::BUILD_VECTOR \|\| VecVT.isFixedLengthVector ()) && "BUILD_VECTOR used for scalable vectors") ? static_cast <void> (0) : __assert_fail ("(VecOp.getOpcode() != ISD::BUILD_VECTOR \|\| VecVT.isFixedLengthVector()) && \"BUILD_VECTOR used for scalable vectors\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 18482, __PRETTY_FUNCTION__))
18481	VecVT.isFixedLengthVector()) &&(((VecOp.getOpcode() != ISD::BUILD_VECTOR \|\| VecVT.isFixedLengthVector ()) && "BUILD_VECTOR used for scalable vectors") ? static_cast <void> (0) : __assert_fail ("(VecOp.getOpcode() != ISD::BUILD_VECTOR \|\| VecVT.isFixedLengthVector()) && \"BUILD_VECTOR used for scalable vectors\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 18482, __PRETTY_FUNCTION__))
18482	"BUILD_VECTOR used for scalable vectors")(((VecOp.getOpcode() != ISD::BUILD_VECTOR \|\| VecVT.isFixedLengthVector ()) && "BUILD_VECTOR used for scalable vectors") ? static_cast <void> (0) : __assert_fail ("(VecOp.getOpcode() != ISD::BUILD_VECTOR \|\| VecVT.isFixedLengthVector()) && \"BUILD_VECTOR used for scalable vectors\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 18482, __PRETTY_FUNCTION__));
18483	unsigned IndexVal =
18484	VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
18485	SDValue Elt = VecOp.getOperand(IndexVal);
18486	EVT InEltVT = Elt.getValueType();
18487
18488	// Sometimes build_vector's scalar input types do not match result type.
18489	if (ScalarVT == InEltVT)
18490	return Elt;
18491
18492	// TODO: It may be useful to truncate if free if the build_vector implicitly
18493	// converts.
18494	}
18495
18496	if (VecVT.isScalableVector())
18497	return SDValue();
18498
18499	// All the code from this point onwards assumes fixed width vectors, but it's
18500	// possible that some of the combinations could be made to work for scalable
18501	// vectors too.
18502	unsigned NumElts = VecVT.getVectorNumElements();
18503	unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
18504
18505	// TODO: These transforms should not require the 'hasOneUse' restriction, but
18506	// there are regressions on multiple targets without it. We can end up with a
18507	// mess of scalar and vector code if we reduce only part of the DAG to scalar.
18508	if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
18509	VecOp.hasOneUse()) {
18510	// The vector index of the LSBs of the source depend on the endian-ness.
18511	bool IsLE = DAG.getDataLayout().isLittleEndian();
18512	unsigned ExtractIndex = IndexC->getZExtValue();
18513	// extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
18514	unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
18515	SDValue BCSrc = VecOp.getOperand(0);
18516	if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
18517	return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
18518
18519	if (LegalTypes && BCSrc.getValueType().isInteger() &&
18520	BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
18521	// ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
18522	// trunc i64 X to i32
18523	SDValue X = BCSrc.getOperand(0);
18524	assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&((X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger () && "Extract element and scalar to vector can't change element type " "from FP to integer.") ? static_cast<void> (0) : __assert_fail ("X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() && \"Extract element and scalar to vector can't change element type \" \"from FP to integer.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 18526, __PRETTY_FUNCTION__))
18525	"Extract element and scalar to vector can't change element type "((X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger () && "Extract element and scalar to vector can't change element type " "from FP to integer.") ? static_cast<void> (0) : __assert_fail ("X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() && \"Extract element and scalar to vector can't change element type \" \"from FP to integer.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 18526, __PRETTY_FUNCTION__))
18526	"from FP to integer.")((X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger () && "Extract element and scalar to vector can't change element type " "from FP to integer.") ? static_cast<void> (0) : __assert_fail ("X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() && \"Extract element and scalar to vector can't change element type \" \"from FP to integer.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 18526, __PRETTY_FUNCTION__));
18527	unsigned XBitWidth = X.getValueSizeInBits();
18528	BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
18529
18530	// An extract element return value type can be wider than its vector
18531	// operand element type. In that case, the high bits are undefined, so
18532	// it's possible that we may need to extend rather than truncate.
18533	if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
18534	assert(XBitWidth % VecEltBitWidth == 0 &&((XBitWidth % VecEltBitWidth == 0 && "Scalar bitwidth must be a multiple of vector element bitwidth" ) ? static_cast<void> (0) : __assert_fail ("XBitWidth % VecEltBitWidth == 0 && \"Scalar bitwidth must be a multiple of vector element bitwidth\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 18535, __PRETTY_FUNCTION__))
18535	"Scalar bitwidth must be a multiple of vector element bitwidth")((XBitWidth % VecEltBitWidth == 0 && "Scalar bitwidth must be a multiple of vector element bitwidth" ) ? static_cast<void> (0) : __assert_fail ("XBitWidth % VecEltBitWidth == 0 && \"Scalar bitwidth must be a multiple of vector element bitwidth\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 18535, __PRETTY_FUNCTION__));
18536	return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
18537	}
18538	}
18539	}
18540
18541	if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
18542	return BO;
18543
18544	// Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
18545	// We only perform this optimization before the op legalization phase because
18546	// we may introduce new vector instructions which are not backed by TD
18547	// patterns. For example on AVX, extracting elements from a wide vector
18548	// without using extract_subvector. However, if we can find an underlying
18549	// scalar value, then we can always use that.
18550	if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
18551	auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
18552	// Find the new index to extract from.
18553	int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
18554
18555	// Extracting an undef index is undef.
18556	if (OrigElt == -1)
18557	return DAG.getUNDEF(ScalarVT);
18558
18559	// Select the right vector half to extract from.
18560	SDValue SVInVec;
18561	if (OrigElt < (int)NumElts) {
18562	SVInVec = VecOp.getOperand(0);
18563	} else {
18564	SVInVec = VecOp.getOperand(1);
18565	OrigElt -= NumElts;
18566	}
18567
18568	if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
18569	SDValue InOp = SVInVec.getOperand(OrigElt);
18570	if (InOp.getValueType() != ScalarVT) {
18571	assert(InOp.getValueType().isInteger() && ScalarVT.isInteger())((InOp.getValueType().isInteger() && ScalarVT.isInteger ()) ? static_cast<void> (0) : __assert_fail ("InOp.getValueType().isInteger() && ScalarVT.isInteger()" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 18571, __PRETTY_FUNCTION__));
18572	InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
18573	}
18574
18575	return InOp;
18576	}
18577
18578	// FIXME: We should handle recursing on other vector shuffles and
18579	// scalar_to_vector here as well.
18580
18581	if (!LegalOperations \|\|
18582	// FIXME: Should really be just isOperationLegalOrCustom.
18583	TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) \|\|
18584	TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
18585	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
18586	DAG.getVectorIdxConstant(OrigElt, DL));
18587	}
18588	}
18589
18590	// If only EXTRACT_VECTOR_ELT nodes use the source vector we can
18591	// simplify it based on the (valid) extraction indices.
18592	if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
18593	return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18594	Use->getOperand(0) == VecOp &&
18595	isa<ConstantSDNode>(Use->getOperand(1));
18596	})) {
18597	APInt DemandedElts = APInt::getNullValue(NumElts);
18598	for (SDNode *Use : VecOp->uses()) {
18599	auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
18600	if (CstElt->getAPIntValue().ult(NumElts))
18601	DemandedElts.setBit(CstElt->getZExtValue());
18602	}
18603	if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
18604	// We simplified the vector operand of this extract element. If this
18605	// extract is not dead, visit it again so it is folded properly.
18606	if (N->getOpcode() != ISD::DELETED_NODE)
18607	AddToWorklist(N);
18608	return SDValue(N, 0);
18609	}
18610	APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth);
18611	if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
18612	// We simplified the vector operand of this extract element. If this
18613	// extract is not dead, visit it again so it is folded properly.
18614	if (N->getOpcode() != ISD::DELETED_NODE)
18615	AddToWorklist(N);
18616	return SDValue(N, 0);
18617	}
18618	}
18619
18620	// Everything under here is trying to match an extract of a loaded value.
18621	// If the result of load has to be truncated, then it's not necessarily
18622	// profitable.
18623	bool BCNumEltsChanged = false;
18624	EVT ExtVT = VecVT.getVectorElementType();
18625	EVT LVT = ExtVT;
18626	if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
18627	return SDValue();
18628
18629	if (VecOp.getOpcode() == ISD::BITCAST) {
18630	// Don't duplicate a load with other uses.
18631	if (!VecOp.hasOneUse())
18632	return SDValue();
18633
18634	EVT BCVT = VecOp.getOperand(0).getValueType();
18635	if (!BCVT.isVector() \|\| ExtVT.bitsGT(BCVT.getVectorElementType()))
18636	return SDValue();
18637	if (NumElts != BCVT.getVectorNumElements())
18638	BCNumEltsChanged = true;
18639	VecOp = VecOp.getOperand(0);
18640	ExtVT = BCVT.getVectorElementType();
18641	}
18642
18643	// extract (vector load $addr), i --> load $addr + i * size
18644	if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
18645	ISD::isNormalLoad(VecOp.getNode()) &&
18646	!Index->hasPredecessor(VecOp.getNode())) {
18647	auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
18648	if (VecLoad && VecLoad->isSimple())
18649	return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
18650	}
18651
18652	// Perform only after legalization to ensure build_vector / vector_shuffle
18653	// optimizations have already been done.
18654	if (!LegalOperations \|\| !IndexC)
18655	return SDValue();
18656
18657	// (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
18658	// (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
18659	// (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
18660	int Elt = IndexC->getZExtValue();
18661	LoadSDNode *LN0 = nullptr;
18662	if (ISD::isNormalLoad(VecOp.getNode())) {
18663	LN0 = cast<LoadSDNode>(VecOp);
18664	} else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
18665	VecOp.getOperand(0).getValueType() == ExtVT &&
18666	ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
18667	// Don't duplicate a load with other uses.
18668	if (!VecOp.hasOneUse())
18669	return SDValue();
18670
18671	LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
18672	}
18673	if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
18674	// (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
18675	// =>
18676	// (load $addr+1*size)
18677
18678	// Don't duplicate a load with other uses.
18679	if (!VecOp.hasOneUse())
18680	return SDValue();
18681
18682	// If the bit convert changed the number of elements, it is unsafe
18683	// to examine the mask.
18684	if (BCNumEltsChanged)
18685	return SDValue();
18686
18687	// Select the input vector, guarding against out of range extract vector.
18688	int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
18689	VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
18690
18691	if (VecOp.getOpcode() == ISD::BITCAST) {
18692	// Don't duplicate a load with other uses.
18693	if (!VecOp.hasOneUse())
18694	return SDValue();
18695
18696	VecOp = VecOp.getOperand(0);
18697	}
18698	if (ISD::isNormalLoad(VecOp.getNode())) {
18699	LN0 = cast<LoadSDNode>(VecOp);
18700	Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
18701	Index = DAG.getConstant(Elt, DL, Index.getValueType());
18702	}
18703	} else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
18704	VecVT.getVectorElementType() == ScalarVT &&
18705	(!LegalTypes \|\|
18706	TLI.isTypeLegal(
18707	VecOp.getOperand(0).getValueType().getVectorElementType()))) {
18708	// extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
18709	// -> extract_vector_elt a, 0
18710	// extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
18711	// -> extract_vector_elt a, 1
18712	// extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
18713	// -> extract_vector_elt b, 0
18714	// extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
18715	// -> extract_vector_elt b, 1
18716	SDLoc SL(N);
18717	EVT ConcatVT = VecOp.getOperand(0).getValueType();
18718	unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
18719	SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
18720	Index.getValueType());
18721
18722	SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
18723	SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
18724	ConcatVT.getVectorElementType(),
18725	ConcatOp, NewIdx);
18726	return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
18727	}
18728
18729	// Make sure we found a non-volatile load and the extractelement is
18730	// the only use.
18731	if (!LN0 \|\| !LN0->hasNUsesOfValue(1,0) \|\| !LN0->isSimple())
18732	return SDValue();
18733
18734	// If Idx was -1 above, Elt is going to be -1, so just return undef.
18735	if (Elt == -1)
18736	return DAG.getUNDEF(LVT);
18737
18738	return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
18739	}
18740
18741	// Simplify (build_vec (ext )) to (bitcast (build_vec ))
18742	SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
18743	// We perform this optimization post type-legalization because
18744	// the type-legalizer often scalarizes integer-promoted vectors.
18745	// Performing this optimization before may create bit-casts which
18746	// will be type-legalized to complex code sequences.
18747	// We perform this optimization only before the operation legalizer because we
18748	// may introduce illegal operations.
18749	if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
18750	return SDValue();
18751
18752	unsigned NumInScalars = N->getNumOperands();
18753	SDLoc DL(N);
18754	EVT VT = N->getValueType(0);
18755
18756	// Check to see if this is a BUILD_VECTOR of a bunch of values
18757	// which come from any_extend or zero_extend nodes. If so, we can create
18758	// a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
18759	// optimizations. We do not handle sign-extend because we can't fill the sign
18760	// using shuffles.
18761	EVT SourceType = MVT::Other;
18762	bool AllAnyExt = true;
18763
18764	for (unsigned i = 0; i != NumInScalars; ++i) {
18765	SDValue In = N->getOperand(i);
18766	// Ignore undef inputs.
18767	if (In.isUndef()) continue;
18768
18769	bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
18770	bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
18771
18772	// Abort if the element is not an extension.
18773	if (!ZeroExt && !AnyExt) {
18774	SourceType = MVT::Other;
18775	break;
18776	}
18777
18778	// The input is a ZeroExt or AnyExt. Check the original type.
18779	EVT InTy = In.getOperand(0).getValueType();
18780
18781	// Check that all of the widened source types are the same.
18782	if (SourceType == MVT::Other)
18783	// First time.
18784	SourceType = InTy;
18785	else if (InTy != SourceType) {
18786	// Multiple income types. Abort.
18787	SourceType = MVT::Other;
18788	break;
18789	}
18790
18791	// Check if all of the extends are ANY_EXTENDs.
18792	AllAnyExt &= AnyExt;
18793	}
18794
18795	// In order to have valid types, all of the inputs must be extended from the
18796	// same source type and all of the inputs must be any or zero extend.
18797	// Scalar sizes must be a power of two.
18798	EVT OutScalarTy = VT.getScalarType();
18799	bool ValidTypes = SourceType != MVT::Other &&
18800	isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
18801	isPowerOf2_32(SourceType.getSizeInBits());
18802
18803	// Create a new simpler BUILD_VECTOR sequence which other optimizations can
18804	// turn into a single shuffle instruction.
18805	if (!ValidTypes)
18806	return SDValue();
18807
18808	// If we already have a splat buildvector, then don't fold it if it means
18809	// introducing zeros.
18810	if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /AllowUndefs/ true))
18811	return SDValue();
18812
18813	bool isLE = DAG.getDataLayout().isLittleEndian();
18814	unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
18815	assert(ElemRatio > 1 && "Invalid element size ratio")((ElemRatio > 1 && "Invalid element size ratio") ? static_cast<void> (0) : __assert_fail ("ElemRatio > 1 && \"Invalid element size ratio\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 18815, __PRETTY_FUNCTION__));
18816	SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
18817	DAG.getConstant(0, DL, SourceType);
18818
18819	unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
18820	SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
18821
18822	// Populate the new build_vector
18823	for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
18824	SDValue Cast = N->getOperand(i);
18825	assert((Cast.getOpcode() == ISD::ANY_EXTEND \|\|(((Cast.getOpcode() == ISD::ANY_EXTEND \|\| Cast.getOpcode() == ISD::ZERO_EXTEND \|\| Cast.isUndef()) && "Invalid cast opcode" ) ? static_cast<void> (0) : __assert_fail ("(Cast.getOpcode() == ISD::ANY_EXTEND \|\| Cast.getOpcode() == ISD::ZERO_EXTEND \|\| Cast.isUndef()) && \"Invalid cast opcode\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 18827, __PRETTY_FUNCTION__))
18826	Cast.getOpcode() == ISD::ZERO_EXTEND \|\|(((Cast.getOpcode() == ISD::ANY_EXTEND \|\| Cast.getOpcode() == ISD::ZERO_EXTEND \|\| Cast.isUndef()) && "Invalid cast opcode" ) ? static_cast<void> (0) : __assert_fail ("(Cast.getOpcode() == ISD::ANY_EXTEND \|\| Cast.getOpcode() == ISD::ZERO_EXTEND \|\| Cast.isUndef()) && \"Invalid cast opcode\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 18827, __PRETTY_FUNCTION__))
18827	Cast.isUndef()) && "Invalid cast opcode")(((Cast.getOpcode() == ISD::ANY_EXTEND \|\| Cast.getOpcode() == ISD::ZERO_EXTEND \|\| Cast.isUndef()) && "Invalid cast opcode" ) ? static_cast<void> (0) : __assert_fail ("(Cast.getOpcode() == ISD::ANY_EXTEND \|\| Cast.getOpcode() == ISD::ZERO_EXTEND \|\| Cast.isUndef()) && \"Invalid cast opcode\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 18827, __PRETTY_FUNCTION__));
18828	SDValue In;
18829	if (Cast.isUndef())
18830	In = DAG.getUNDEF(SourceType);
18831	else
18832	In = Cast->getOperand(0);
18833	unsigned Index = isLE ? (i * ElemRatio) :
18834	(i * ElemRatio + (ElemRatio - 1));
18835
18836	assert(Index < Ops.size() && "Invalid index")((Index < Ops.size() && "Invalid index") ? static_cast <void> (0) : __assert_fail ("Index < Ops.size() && \"Invalid index\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 18836, __PRETTY_FUNCTION__));
18837	Ops[Index] = In;
18838	}
18839
18840	// The type of the new BUILD_VECTOR node.
18841	EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
18842	assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&((VecVT.getSizeInBits() == VT.getSizeInBits() && "Invalid vector size" ) ? static_cast<void> (0) : __assert_fail ("VecVT.getSizeInBits() == VT.getSizeInBits() && \"Invalid vector size\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 18843, __PRETTY_FUNCTION__))
18843	"Invalid vector size")((VecVT.getSizeInBits() == VT.getSizeInBits() && "Invalid vector size" ) ? static_cast<void> (0) : __assert_fail ("VecVT.getSizeInBits() == VT.getSizeInBits() && \"Invalid vector size\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 18843, __PRETTY_FUNCTION__));
18844	// Check if the new vector type is legal.
18845	if (!isTypeLegal(VecVT) \|\|
18846	(!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
18847	TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
18848	return SDValue();
18849
18850	// Make the new BUILD_VECTOR.
18851	SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
18852
18853	// The new BUILD_VECTOR node has the potential to be further optimized.
18854	AddToWorklist(BV.getNode());
18855	// Bitcast to the desired type.
18856	return DAG.getBitcast(VT, BV);
18857	}
18858
18859	// Simplify (build_vec (trunc $1)
18860	// (trunc (srl $1 half-width))
18861	// (trunc (srl $1 (2 * half-width))) …)
18862	// to (bitcast $1)
18863	SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
18864	assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector")((N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector" ) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Expected build vector\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 18864, __PRETTY_FUNCTION__));
18865
18866	// Only for little endian
18867	if (!DAG.getDataLayout().isLittleEndian())
18868	return SDValue();
18869
18870	SDLoc DL(N);
18871	EVT VT = N->getValueType(0);
18872	EVT OutScalarTy = VT.getScalarType();
18873	uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
18874
18875	// Only for power of two types to be sure that bitcast works well
18876	if (!isPowerOf2_64(ScalarTypeBitsize))
18877	return SDValue();
18878
18879	unsigned NumInScalars = N->getNumOperands();
18880
18881	// Look through bitcasts
18882	auto PeekThroughBitcast = [](SDValue Op) {
18883	if (Op.getOpcode() == ISD::BITCAST)
18884	return Op.getOperand(0);
18885	return Op;
18886	};
18887
18888	// The source value where all the parts are extracted.
18889	SDValue Src;
18890	for (unsigned i = 0; i != NumInScalars; ++i) {
18891	SDValue In = PeekThroughBitcast(N->getOperand(i));
18892	// Ignore undef inputs.
18893	if (In.isUndef()) continue;
18894
18895	if (In.getOpcode() != ISD::TRUNCATE)
18896	return SDValue();
18897
18898	In = PeekThroughBitcast(In.getOperand(0));
18899
18900	if (In.getOpcode() != ISD::SRL) {
18901	// For now only build_vec without shuffling, handle shifts here in the
18902	// future.
18903	if (i != 0)
18904	return SDValue();
18905
18906	Src = In;
18907	} else {
18908	// In is SRL
18909	SDValue part = PeekThroughBitcast(In.getOperand(0));
18910
18911	if (!Src) {
18912	Src = part;
18913	} else if (Src != part) {
18914	// Vector parts do not stem from the same variable
18915	return SDValue();
18916	}
18917
18918	SDValue ShiftAmtVal = In.getOperand(1);
18919	if (!isa<ConstantSDNode>(ShiftAmtVal))
18920	return SDValue();
18921
18922	uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1);
18923
18924	// The extracted value is not extracted at the right position
18925	if (ShiftAmt != i * ScalarTypeBitsize)
18926	return SDValue();
18927	}
18928	}
18929
18930	// Only cast if the size is the same
18931	if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
18932	return SDValue();
18933
18934	return DAG.getBitcast(VT, Src);
18935	}
18936
18937	SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
18938	ArrayRef<int> VectorMask,
18939	SDValue VecIn1, SDValue VecIn2,
18940	unsigned LeftIdx, bool DidSplitVec) {
18941	SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
18942
18943	EVT VT = N->getValueType(0);
18944	EVT InVT1 = VecIn1.getValueType();
18945	EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
18946
18947	unsigned NumElems = VT.getVectorNumElements();
18948	unsigned ShuffleNumElems = NumElems;
18949
18950	// If we artificially split a vector in two already, then the offsets in the
18951	// operands will all be based off of VecIn1, even those in VecIn2.
18952	unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
18953
18954	uint64_t VTSize = VT.getFixedSizeInBits();
18955	uint64_t InVT1Size = InVT1.getFixedSizeInBits();
18956	uint64_t InVT2Size = InVT2.getFixedSizeInBits();
18957
18958	// We can't generate a shuffle node with mismatched input and output types.
18959	// Try to make the types match the type of the output.
18960	if (InVT1 != VT \|\| InVT2 != VT) {
18961	if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
18962	// If the output vector length is a multiple of both input lengths,
18963	// we can concatenate them and pad the rest with undefs.
18964	unsigned NumConcats = VTSize / InVT1Size;
18965	assert(NumConcats >= 2 && "Concat needs at least two inputs!")((NumConcats >= 2 && "Concat needs at least two inputs!" ) ? static_cast<void> (0) : __assert_fail ("NumConcats >= 2 && \"Concat needs at least two inputs!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 18965, __PRETTY_FUNCTION__));
18966	SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
18967	ConcatOps[0] = VecIn1;
18968	ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
18969	VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
18970	VecIn2 = SDValue();
18971	} else if (InVT1Size == VTSize * 2) {
18972	if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
18973	return SDValue();
18974
18975	if (!VecIn2.getNode()) {
18976	// If we only have one input vector, and it's twice the size of the
18977	// output, split it in two.
18978	VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
18979	DAG.getVectorIdxConstant(NumElems, DL));
18980	VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
18981	// Since we now have shorter input vectors, adjust the offset of the
18982	// second vector's start.
18983	Vec2Offset = NumElems;
18984	} else if (InVT2Size <= InVT1Size) {
18985	// VecIn1 is wider than the output, and we have another, possibly
18986	// smaller input. Pad the smaller input with undefs, shuffle at the
18987	// input vector width, and extract the output.
18988	// The shuffle type is different than VT, so check legality again.
18989	if (LegalOperations &&
18990	!TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
18991	return SDValue();
18992
18993	// Legalizing INSERT_SUBVECTOR is tricky - you basically have to
18994	// lower it back into a BUILD_VECTOR. So if the inserted type is
18995	// illegal, don't even try.
18996	if (InVT1 != InVT2) {
18997	if (!TLI.isTypeLegal(InVT2))
18998	return SDValue();
18999	VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
19000	DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
19001	}
19002	ShuffleNumElems = NumElems * 2;
19003	} else {
19004	// Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
19005	// than VecIn1. We can't handle this for now - this case will disappear
19006	// when we start sorting the vectors by type.
19007	return SDValue();
19008	}
19009	} else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
19010	SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
19011	ConcatOps[0] = VecIn2;
19012	VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19013	} else {
19014	// TODO: Support cases where the length mismatch isn't exactly by a
19015	// factor of 2.
19016	// TODO: Move this check upwards, so that if we have bad type
19017	// mismatches, we don't create any DAG nodes.
19018	return SDValue();
19019	}
19020	}
19021
19022	// Initialize mask to undef.
19023	SmallVector<int, 8> Mask(ShuffleNumElems, -1);
19024
19025	// Only need to run up to the number of elements actually used, not the
19026	// total number of elements in the shuffle - if we are shuffling a wider
19027	// vector, the high lanes should be set to undef.
19028	for (unsigned i = 0; i != NumElems; ++i) {
19029	if (VectorMask[i] <= 0)
19030	continue;
19031
19032	unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
19033	if (VectorMask[i] == (int)LeftIdx) {
19034	Mask[i] = ExtIndex;
19035	} else if (VectorMask[i] == (int)LeftIdx + 1) {
19036	Mask[i] = Vec2Offset + ExtIndex;
19037	}
19038	}
19039
19040	// The type the input vectors may have changed above.
19041	InVT1 = VecIn1.getValueType();
19042
19043	// If we already have a VecIn2, it should have the same type as VecIn1.
19044	// If we don't, get an undef/zero vector of the appropriate type.
19045	VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
19046	assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.")((InVT1 == VecIn2.getValueType() && "Unexpected second input type." ) ? static_cast<void> (0) : __assert_fail ("InVT1 == VecIn2.getValueType() && \"Unexpected second input type.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 19046, __PRETTY_FUNCTION__));
19047
19048	SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
19049	if (ShuffleNumElems > NumElems)
19050	Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
19051
19052	return Shuffle;
19053	}
19054
19055	static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
19056	assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector")((BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector" ) ? static_cast<void> (0) : __assert_fail ("BV->getOpcode() == ISD::BUILD_VECTOR && \"Expected build vector\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 19056, __PRETTY_FUNCTION__));
19057
19058	// First, determine where the build vector is not undef.
19059	// TODO: We could extend this to handle zero elements as well as undefs.
19060	int NumBVOps = BV->getNumOperands();
19061	int ZextElt = -1;
19062	for (int i = 0; i != NumBVOps; ++i) {
19063	SDValue Op = BV->getOperand(i);
19064	if (Op.isUndef())
19065	continue;
19066	if (ZextElt == -1)
19067	ZextElt = i;
19068	else
19069	return SDValue();
19070	}
19071	// Bail out if there's no non-undef element.
19072	if (ZextElt == -1)
19073	return SDValue();
19074
19075	// The build vector contains some number of undef elements and exactly
19076	// one other element. That other element must be a zero-extended scalar
19077	// extracted from a vector at a constant index to turn this into a shuffle.
19078	// Also, require that the build vector does not implicitly truncate/extend
19079	// its elements.
19080	// TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
19081	EVT VT = BV->getValueType(0);
19082	SDValue Zext = BV->getOperand(ZextElt);
19083	if (Zext.getOpcode() != ISD::ZERO_EXTEND \|\| !Zext.hasOneUse() \|\|
19084	Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
19085	!isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) \|\|
19086	Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
19087	return SDValue();
19088
19089	// The zero-extend must be a multiple of the source size, and we must be
19090	// building a vector of the same size as the source of the extract element.
19091	SDValue Extract = Zext.getOperand(0);
19092	unsigned DestSize = Zext.getValueSizeInBits();
19093	unsigned SrcSize = Extract.getValueSizeInBits();
19094	if (DestSize % SrcSize != 0 \|\|
19095	Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
19096	return SDValue();
19097
19098	// Create a shuffle mask that will combine the extracted element with zeros
19099	// and undefs.
19100	int ZextRatio = DestSize / SrcSize;
19101	int NumMaskElts = NumBVOps * ZextRatio;
19102	SmallVector<int, 32> ShufMask(NumMaskElts, -1);
19103	for (int i = 0; i != NumMaskElts; ++i) {
19104	if (i / ZextRatio == ZextElt) {
19105	// The low bits of the (potentially translated) extracted element map to
19106	// the source vector. The high bits map to zero. We will use a zero vector
19107	// as the 2nd source operand of the shuffle, so use the 1st element of
19108	// that vector (mask value is number-of-elements) for the high bits.
19109	if (i % ZextRatio == 0)
19110	ShufMask[i] = Extract.getConstantOperandVal(1);
19111	else
19112	ShufMask[i] = NumMaskElts;
19113	}
19114
19115	// Undef elements of the build vector remain undef because we initialize
19116	// the shuffle mask with -1.
19117	}
19118
19119	// buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
19120	// bitcast (shuffle V, ZeroVec, VectorMask)
19121	SDLoc DL(BV);
19122	EVT VecVT = Extract.getOperand(0).getValueType();
19123	SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
19124	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19125	SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
19126	ZeroVec, ShufMask, DAG);
19127	if (!Shuf)
19128	return SDValue();
19129	return DAG.getBitcast(VT, Shuf);
19130	}
19131
19132	// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
19133	// operations. If the types of the vectors we're extracting from allow it,
19134	// turn this into a vector_shuffle node.
19135	SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
19136	SDLoc DL(N);
19137	EVT VT = N->getValueType(0);
19138
19139	// Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
19140	if (!isTypeLegal(VT))
19141	return SDValue();
19142
19143	if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
19144	return V;
19145
19146	// May only combine to shuffle after legalize if shuffle is legal.
19147	if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
19148	return SDValue();
19149
19150	bool UsesZeroVector = false;
19151	unsigned NumElems = N->getNumOperands();
19152
19153	// Record, for each element of the newly built vector, which input vector
19154	// that element comes from. -1 stands for undef, 0 for the zero vector,
19155	// and positive values for the input vectors.
19156	// VectorMask maps each element to its vector number, and VecIn maps vector
19157	// numbers to their initial SDValues.
19158
19159	SmallVector<int, 8> VectorMask(NumElems, -1);
19160	SmallVector<SDValue, 8> VecIn;
19161	VecIn.push_back(SDValue());
19162
19163	for (unsigned i = 0; i != NumElems; ++i) {
19164	SDValue Op = N->getOperand(i);
19165
19166	if (Op.isUndef())
19167	continue;
19168
19169	// See if we can use a blend with a zero vector.
19170	// TODO: Should we generalize this to a blend with an arbitrary constant
19171	// vector?
19172	if (isNullConstant(Op) \|\| isNullFPConstant(Op)) {
19173	UsesZeroVector = true;
19174	VectorMask[i] = 0;
19175	continue;
19176	}
19177
19178	// Not an undef or zero. If the input is something other than an
19179	// EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
19180	if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
19181	!isa<ConstantSDNode>(Op.getOperand(1)))
19182	return SDValue();
19183	SDValue ExtractedFromVec = Op.getOperand(0);
19184
19185	if (ExtractedFromVec.getValueType().isScalableVector())
19186	return SDValue();
19187
19188	const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
19189	if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
19190	return SDValue();
19191
19192	// All inputs must have the same element type as the output.
19193	if (VT.getVectorElementType() !=
19194	ExtractedFromVec.getValueType().getVectorElementType())
19195	return SDValue();
19196
19197	// Have we seen this input vector before?
19198	// The vectors are expected to be tiny (usually 1 or 2 elements), so using
19199	// a map back from SDValues to numbers isn't worth it.
19200	unsigned Idx = std::distance(VecIn.begin(), find(VecIn, ExtractedFromVec));
19201	if (Idx == VecIn.size())
19202	VecIn.push_back(ExtractedFromVec);
19203
19204	VectorMask[i] = Idx;
19205	}
19206
19207	// If we didn't find at least one input vector, bail out.
19208	if (VecIn.size() < 2)
19209	return SDValue();
19210
19211	// If all the Operands of BUILD_VECTOR extract from same
19212	// vector, then split the vector efficiently based on the maximum
19213	// vector access index and adjust the VectorMask and
19214	// VecIn accordingly.
19215	bool DidSplitVec = false;
19216	if (VecIn.size() == 2) {
19217	unsigned MaxIndex = 0;
19218	unsigned NearestPow2 = 0;
19219	SDValue Vec = VecIn.back();
19220	EVT InVT = Vec.getValueType();
19221	SmallVector<unsigned, 8> IndexVec(NumElems, 0);
19222
19223	for (unsigned i = 0; i < NumElems; i++) {
19224	if (VectorMask[i] <= 0)
19225	continue;
19226	unsigned Index = N->getOperand(i).getConstantOperandVal(1);
19227	IndexVec[i] = Index;
19228	MaxIndex = std::max(MaxIndex, Index);
19229	}
19230
19231	NearestPow2 = PowerOf2Ceil(MaxIndex);
19232	if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
19233	NumElems * 2 < NearestPow2) {
19234	unsigned SplitSize = NearestPow2 / 2;
19235	EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
19236	InVT.getVectorElementType(), SplitSize);
19237	if (TLI.isTypeLegal(SplitVT)) {
19238	SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
19239	DAG.getVectorIdxConstant(SplitSize, DL));
19240	SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
19241	DAG.getVectorIdxConstant(0, DL));
19242	VecIn.pop_back();
19243	VecIn.push_back(VecIn1);
19244	VecIn.push_back(VecIn2);
19245	DidSplitVec = true;
19246
19247	for (unsigned i = 0; i < NumElems; i++) {
19248	if (VectorMask[i] <= 0)
19249	continue;
19250	VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
19251	}
19252	}
19253	}
19254	}
19255
19256	// TODO: We want to sort the vectors by descending length, so that adjacent
19257	// pairs have similar length, and the longer vector is always first in the
19258	// pair.
19259
19260	// TODO: Should this fire if some of the input vectors has illegal type (like
19261	// it does now), or should we let legalization run its course first?
19262
19263	// Shuffle phase:
19264	// Take pairs of vectors, and shuffle them so that the result has elements
19265	// from these vectors in the correct places.
19266	// For example, given:
19267	// t10: i32 = extract_vector_elt t1, Constant:i64<0>
19268	// t11: i32 = extract_vector_elt t2, Constant:i64<0>
19269	// t12: i32 = extract_vector_elt t3, Constant:i64<0>
19270	// t13: i32 = extract_vector_elt t1, Constant:i64<1>
19271	// t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
19272	// We will generate:
19273	// t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
19274	// t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
19275	SmallVector<SDValue, 4> Shuffles;
19276	for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
19277	unsigned LeftIdx = 2 * In + 1;
19278	SDValue VecLeft = VecIn[LeftIdx];
19279	SDValue VecRight =
19280	(LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
19281
19282	if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
19283	VecRight, LeftIdx, DidSplitVec))
19284	Shuffles.push_back(Shuffle);
19285	else
19286	return SDValue();
19287	}
19288
19289	// If we need the zero vector as an "ingredient" in the blend tree, add it
19290	// to the list of shuffles.
19291	if (UsesZeroVector)
19292	Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
19293	: DAG.getConstantFP(0.0, DL, VT));
19294
19295	// If we only have one shuffle, we're done.
19296	if (Shuffles.size() == 1)
19297	return Shuffles[0];
19298
19299	// Update the vector mask to point to the post-shuffle vectors.
19300	for (int &Vec : VectorMask)
19301	if (Vec == 0)
19302	Vec = Shuffles.size() - 1;
19303	else
19304	Vec = (Vec - 1) / 2;
19305
19306	// More than one shuffle. Generate a binary tree of blends, e.g. if from
19307	// the previous step we got the set of shuffles t10, t11, t12, t13, we will
19308	// generate:
19309	// t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
19310	// t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
19311	// t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
19312	// t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
19313	// t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
19314	// t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
19315	// t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
19316
19317	// Make sure the initial size of the shuffle list is even.
19318	if (Shuffles.size() % 2)
19319	Shuffles.push_back(DAG.getUNDEF(VT));
19320
19321	for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
19322	if (CurSize % 2) {
19323	Shuffles[CurSize] = DAG.getUNDEF(VT);
19324	CurSize++;
19325	}
19326	for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
19327	int Left = 2 * In;
19328	int Right = 2 * In + 1;
19329	SmallVector<int, 8> Mask(NumElems, -1);
19330	for (unsigned i = 0; i != NumElems; ++i) {
19331	if (VectorMask[i] == Left) {
19332	Mask[i] = i;
19333	VectorMask[i] = In;
19334	} else if (VectorMask[i] == Right) {
19335	Mask[i] = i + NumElems;
19336	VectorMask[i] = In;
19337	}
19338	}
19339
19340	Shuffles[In] =
19341	DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
19342	}
19343	}
19344	return Shuffles[0];
19345	}
19346
19347	// Try to turn a build vector of zero extends of extract vector elts into a
19348	// a vector zero extend and possibly an extract subvector.
19349	// TODO: Support sign extend?
19350	// TODO: Allow undef elements?
19351	SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
19352	if (LegalOperations)
19353	return SDValue();
19354
19355	EVT VT = N->getValueType(0);
19356
19357	bool FoundZeroExtend = false;
19358	SDValue Op0 = N->getOperand(0);
19359	auto checkElem = [&](SDValue Op) -> int64_t {
19360	unsigned Opc = Op.getOpcode();
19361	FoundZeroExtend \|= (Opc == ISD::ZERO_EXTEND);
19362	if ((Opc == ISD::ZERO_EXTEND \|\| Opc == ISD::ANY_EXTEND) &&
19363	Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19364	Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
19365	if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
19366	return C->getZExtValue();
19367	return -1;
19368	};
19369
19370	// Make sure the first element matches
19371	// (zext (extract_vector_elt X, C))
19372	int64_t Offset = checkElem(Op0);
19373	if (Offset < 0)
19374	return SDValue();
19375
19376	unsigned NumElems = N->getNumOperands();
19377	SDValue In = Op0.getOperand(0).getOperand(0);
19378	EVT InSVT = In.getValueType().getScalarType();
19379	EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
19380
19381	// Don't create an illegal input type after type legalization.
19382	if (LegalTypes && !TLI.isTypeLegal(InVT))
19383	return SDValue();
19384
19385	// Ensure all the elements come from the same vector and are adjacent.
19386	for (unsigned i = 1; i != NumElems; ++i) {
19387	if ((Offset + i) != checkElem(N->getOperand(i)))
19388	return SDValue();
19389	}
19390
19391	SDLoc DL(N);
19392	In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
19393	Op0.getOperand(0).getOperand(1));
19394	return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
19395	VT, In);
19396	}
19397
19398	SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
19399	EVT VT = N->getValueType(0);
19400
19401	// A vector built entirely of undefs is undef.
19402	if (ISD::allOperandsUndef(N))
19403	return DAG.getUNDEF(VT);
19404
19405	// If this is a splat of a bitcast from another vector, change to a
19406	// concat_vector.
19407	// For example:
19408	// (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
19409	// (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
19410	//
19411	// If X is a build_vector itself, the concat can become a larger build_vector.
19412	// TODO: Maybe this is useful for non-splat too?
19413	if (!LegalOperations) {
19414	if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
19415	Splat = peekThroughBitcasts(Splat);
19416	EVT SrcVT = Splat.getValueType();
19417	if (SrcVT.isVector()) {
19418	unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
19419	EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
19420	SrcVT.getVectorElementType(), NumElts);
19421	if (!LegalTypes \|\| TLI.isTypeLegal(NewVT)) {
19422	SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
19423	SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
19424	NewVT, Ops);
19425	return DAG.getBitcast(VT, Concat);
19426	}
19427	}
19428	}
19429	}
19430
19431	// A splat of a single element is a SPLAT_VECTOR if supported on the target.
19432	if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
19433	if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
19434	assert(!V.isUndef() && "Splat of undef should have been handled earlier")((!V.isUndef() && "Splat of undef should have been handled earlier" ) ? static_cast<void> (0) : __assert_fail ("!V.isUndef() && \"Splat of undef should have been handled earlier\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 19434, __PRETTY_FUNCTION__));
19435	return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
19436	}
19437
19438	// Check if we can express BUILD VECTOR via subvector extract.
19439	if (!LegalTypes && (N->getNumOperands() > 1)) {
19440	SDValue Op0 = N->getOperand(0);
19441	auto checkElem = [&](SDValue Op) -> uint64_t {
19442	if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
19443	(Op0.getOperand(0) == Op.getOperand(0)))
19444	if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
19445	return CNode->getZExtValue();
19446	return -1;
19447	};
19448
19449	int Offset = checkElem(Op0);
19450	for (unsigned i = 0; i < N->getNumOperands(); ++i) {
19451	if (Offset + i != checkElem(N->getOperand(i))) {
19452	Offset = -1;
19453	break;
19454	}
19455	}
19456
19457	if ((Offset == 0) &&
19458	(Op0.getOperand(0).getValueType() == N->getValueType(0)))
19459	return Op0.getOperand(0);
19460	if ((Offset != -1) &&
19461	((Offset % N->getValueType(0).getVectorNumElements()) ==
19462	0)) // IDX must be multiple of output size.
19463	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
19464	Op0.getOperand(0), Op0.getOperand(1));
19465	}
19466
19467	if (SDValue V = convertBuildVecZextToZext(N))
19468	return V;
19469
19470	if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
19471	return V;
19472
19473	if (SDValue V = reduceBuildVecTruncToBitCast(N))
19474	return V;
19475
19476	if (SDValue V = reduceBuildVecToShuffle(N))
19477	return V;
19478
19479	return SDValue();
19480	}
19481
19482	static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
19483	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19484	EVT OpVT = N->getOperand(0).getValueType();
19485
19486	// If the operands are legal vectors, leave them alone.
19487	if (TLI.isTypeLegal(OpVT))
19488	return SDValue();
19489
19490	SDLoc DL(N);
19491	EVT VT = N->getValueType(0);
19492	SmallVector<SDValue, 8> Ops;
19493
19494	EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
19495	SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
19496
19497	// Keep track of what we encounter.
19498	bool AnyInteger = false;
19499	bool AnyFP = false;
19500	for (const SDValue &Op : N->ops()) {
19501	if (ISD::BITCAST == Op.getOpcode() &&
19502	!Op.getOperand(0).getValueType().isVector())
19503	Ops.push_back(Op.getOperand(0));
19504	else if (ISD::UNDEF == Op.getOpcode())
19505	Ops.push_back(ScalarUndef);
19506	else
19507	return SDValue();
19508
19509	// Note whether we encounter an integer or floating point scalar.
19510	// If it's neither, bail out, it could be something weird like x86mmx.
19511	EVT LastOpVT = Ops.back().getValueType();
19512	if (LastOpVT.isFloatingPoint())
19513	AnyFP = true;
19514	else if (LastOpVT.isInteger())
19515	AnyInteger = true;
19516	else
19517	return SDValue();
19518	}
19519
19520	// If any of the operands is a floating point scalar bitcast to a vector,
19521	// use floating point types throughout, and bitcast everything.
19522	// Replace UNDEFs by another scalar UNDEF node, of the final desired type.
19523	if (AnyFP) {
19524	SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
19525	ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
19526	if (AnyInteger) {
19527	for (SDValue &Op : Ops) {
19528	if (Op.getValueType() == SVT)
19529	continue;
19530	if (Op.isUndef())
19531	Op = ScalarUndef;
19532	else
19533	Op = DAG.getBitcast(SVT, Op);
19534	}
19535	}
19536	}
19537
19538	EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
19539	VT.getSizeInBits() / SVT.getSizeInBits());
19540	return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
19541	}
19542
19543	// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
19544	// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
19545	// most two distinct vectors the same size as the result, attempt to turn this
19546	// into a legal shuffle.
19547	static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
19548	EVT VT = N->getValueType(0);
19549	EVT OpVT = N->getOperand(0).getValueType();
19550
19551	// We currently can't generate an appropriate shuffle for a scalable vector.
19552	if (VT.isScalableVector())
19553	return SDValue();
19554
19555	int NumElts = VT.getVectorNumElements();
19556	int NumOpElts = OpVT.getVectorNumElements();
19557
19558	SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
19559	SmallVector<int, 8> Mask;
19560
19561	for (SDValue Op : N->ops()) {
19562	Op = peekThroughBitcasts(Op);
19563
19564	// UNDEF nodes convert to UNDEF shuffle mask values.
19565	if (Op.isUndef()) {
19566	Mask.append((unsigned)NumOpElts, -1);
19567	continue;
19568	}
19569
19570	if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
19571	return SDValue();
19572
19573	// What vector are we extracting the subvector from and at what index?
19574	SDValue ExtVec = Op.getOperand(0);
19575	int ExtIdx = Op.getConstantOperandVal(1);
19576
19577	// We want the EVT of the original extraction to correctly scale the
19578	// extraction index.
19579	EVT ExtVT = ExtVec.getValueType();
19580	ExtVec = peekThroughBitcasts(ExtVec);
19581
19582	// UNDEF nodes convert to UNDEF shuffle mask values.
19583	if (ExtVec.isUndef()) {
19584	Mask.append((unsigned)NumOpElts, -1);
19585	continue;
19586	}
19587
19588	// Ensure that we are extracting a subvector from a vector the same
19589	// size as the result.
19590	if (ExtVT.getSizeInBits() != VT.getSizeInBits())
19591	return SDValue();
19592
19593	// Scale the subvector index to account for any bitcast.
19594	int NumExtElts = ExtVT.getVectorNumElements();
19595	if (0 == (NumExtElts % NumElts))
19596	ExtIdx /= (NumExtElts / NumElts);
19597	else if (0 == (NumElts % NumExtElts))
19598	ExtIdx *= (NumElts / NumExtElts);
19599	else
19600	return SDValue();
19601
19602	// At most we can reference 2 inputs in the final shuffle.
19603	if (SV0.isUndef() \|\| SV0 == ExtVec) {
19604	SV0 = ExtVec;
19605	for (int i = 0; i != NumOpElts; ++i)
19606	Mask.push_back(i + ExtIdx);
19607	} else if (SV1.isUndef() \|\| SV1 == ExtVec) {
19608	SV1 = ExtVec;
19609	for (int i = 0; i != NumOpElts; ++i)
19610	Mask.push_back(i + ExtIdx + NumElts);
19611	} else {
19612	return SDValue();
19613	}
19614	}
19615
19616	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19617	return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
19618	DAG.getBitcast(VT, SV1), Mask, DAG);
19619	}
19620
19621	static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
19622	unsigned CastOpcode = N->getOperand(0).getOpcode();
19623	switch (CastOpcode) {
19624	case ISD::SINT_TO_FP:
19625	case ISD::UINT_TO_FP:
19626	case ISD::FP_TO_SINT:
19627	case ISD::FP_TO_UINT:
19628	// TODO: Allow more opcodes?
19629	// case ISD::BITCAST:
19630	// case ISD::TRUNCATE:
19631	// case ISD::ZERO_EXTEND:
19632	// case ISD::SIGN_EXTEND:
19633	// case ISD::FP_EXTEND:
19634	break;
19635	default:
19636	return SDValue();
19637	}
19638
19639	EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
19640	if (!SrcVT.isVector())
19641	return SDValue();
19642
19643	// All operands of the concat must be the same kind of cast from the same
19644	// source type.
19645	SmallVector<SDValue, 4> SrcOps;
19646	for (SDValue Op : N->ops()) {
19647	if (Op.getOpcode() != CastOpcode \|\| !Op.hasOneUse() \|\|
19648	Op.getOperand(0).getValueType() != SrcVT)
19649	return SDValue();
19650	SrcOps.push_back(Op.getOperand(0));
19651	}
19652
19653	// The wider cast must be supported by the target. This is unusual because
19654	// the operation support type parameter depends on the opcode. In addition,
19655	// check the other type in the cast to make sure this is really legal.
19656	EVT VT = N->getValueType(0);
19657	EVT SrcEltVT = SrcVT.getVectorElementType();
19658	ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
19659	EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
19660	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19661	switch (CastOpcode) {
19662	case ISD::SINT_TO_FP:
19663	case ISD::UINT_TO_FP:
19664	if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) \|\|
19665	!TLI.isTypeLegal(VT))
19666	return SDValue();
19667	break;
19668	case ISD::FP_TO_SINT:
19669	case ISD::FP_TO_UINT:
19670	if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) \|\|
19671	!TLI.isTypeLegal(ConcatSrcVT))
19672	return SDValue();
19673	break;
19674	default:
19675	llvm_unreachable("Unexpected cast opcode")::llvm::llvm_unreachable_internal("Unexpected cast opcode", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 19675);
19676	}
19677
19678	// concat (cast X), (cast Y)... -> cast (concat X, Y...)
19679	SDLoc DL(N);
19680	SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
19681	return DAG.getNode(CastOpcode, DL, VT, NewConcat);
19682	}
19683
19684	SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
19685	// If we only have one input vector, we don't need to do any concatenation.
19686	if (N->getNumOperands() == 1)
19687	return N->getOperand(0);
19688
19689	// Check if all of the operands are undefs.
19690	EVT VT = N->getValueType(0);
19691	if (ISD::allOperandsUndef(N))
19692	return DAG.getUNDEF(VT);
19693
19694	// Optimize concat_vectors where all but the first of the vectors are undef.
19695	if (all_of(drop_begin(N->ops()),
19696	[](const SDValue &Op) { return Op.isUndef(); })) {
19697	SDValue In = N->getOperand(0);
19698	assert(In.getValueType().isVector() && "Must concat vectors")((In.getValueType().isVector() && "Must concat vectors" ) ? static_cast<void> (0) : __assert_fail ("In.getValueType().isVector() && \"Must concat vectors\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 19698, __PRETTY_FUNCTION__));
19699
19700	// If the input is a concat_vectors, just make a larger concat by padding
19701	// with smaller undefs.
19702	if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
19703	unsigned NumOps = N->getNumOperands() * In.getNumOperands();
19704	SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
19705	Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
19706	return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
19707	}
19708
19709	SDValue Scalar = peekThroughOneUseBitcasts(In);
19710
19711	// concat_vectors(scalar_to_vector(scalar), undef) ->
19712	// scalar_to_vector(scalar)
19713	if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
19714	Scalar.hasOneUse()) {
19715	EVT SVT = Scalar.getValueType().getVectorElementType();
19716	if (SVT == Scalar.getOperand(0).getValueType())
19717	Scalar = Scalar.getOperand(0);
19718	}
19719
19720	// concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
19721	if (!Scalar.getValueType().isVector()) {
19722	// If the bitcast type isn't legal, it might be a trunc of a legal type;
19723	// look through the trunc so we can still do the transform:
19724	// concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
19725	if (Scalar->getOpcode() == ISD::TRUNCATE &&
19726	!TLI.isTypeLegal(Scalar.getValueType()) &&
19727	TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
19728	Scalar = Scalar->getOperand(0);
19729
19730	EVT SclTy = Scalar.getValueType();
19731
19732	if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
19733	return SDValue();
19734
19735	// Bail out if the vector size is not a multiple of the scalar size.
19736	if (VT.getSizeInBits() % SclTy.getSizeInBits())
19737	return SDValue();
19738
19739	unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
19740	if (VNTNumElms < 2)
19741	return SDValue();
19742
19743	EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
19744	if (!TLI.isTypeLegal(NVT) \|\| !TLI.isTypeLegal(Scalar.getValueType()))
19745	return SDValue();
19746
19747	SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
19748	return DAG.getBitcast(VT, Res);
19749	}
19750	}
19751
19752	// Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
19753	// We have already tested above for an UNDEF only concatenation.
19754	// fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
19755	// -> (BUILD_VECTOR A, B, ..., C, D, ...)
19756	auto IsBuildVectorOrUndef = [](const SDValue &Op) {
19757	return ISD::UNDEF == Op.getOpcode() \|\| ISD::BUILD_VECTOR == Op.getOpcode();
19758	};
19759	if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
19760	SmallVector<SDValue, 8> Opnds;
19761	EVT SVT = VT.getScalarType();
19762
19763	EVT MinVT = SVT;
19764	if (!SVT.isFloatingPoint()) {
19765	// If BUILD_VECTOR are from built from integer, they may have different
19766	// operand types. Get the smallest type and truncate all operands to it.
19767	bool FoundMinVT = false;
19768	for (const SDValue &Op : N->ops())
19769	if (ISD::BUILD_VECTOR == Op.getOpcode()) {
19770	EVT OpSVT = Op.getOperand(0).getValueType();
19771	MinVT = (!FoundMinVT \|\| OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
19772	FoundMinVT = true;
19773	}
19774	assert(FoundMinVT && "Concat vector type mismatch")((FoundMinVT && "Concat vector type mismatch") ? static_cast <void> (0) : __assert_fail ("FoundMinVT && \"Concat vector type mismatch\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 19774, __PRETTY_FUNCTION__));
19775	}
19776
19777	for (const SDValue &Op : N->ops()) {
19778	EVT OpVT = Op.getValueType();
19779	unsigned NumElts = OpVT.getVectorNumElements();
19780
19781	if (ISD::UNDEF == Op.getOpcode())
19782	Opnds.append(NumElts, DAG.getUNDEF(MinVT));
19783
19784	if (ISD::BUILD_VECTOR == Op.getOpcode()) {
19785	if (SVT.isFloatingPoint()) {
19786	assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch")((SVT == OpVT.getScalarType() && "Concat vector type mismatch" ) ? static_cast<void> (0) : __assert_fail ("SVT == OpVT.getScalarType() && \"Concat vector type mismatch\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 19786, __PRETTY_FUNCTION__));
19787	Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
19788	} else {
19789	for (unsigned i = 0; i != NumElts; ++i)
19790	Opnds.push_back(
19791	DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
19792	}
19793	}
19794	}
19795
19796	assert(VT.getVectorNumElements() == Opnds.size() &&((VT.getVectorNumElements() == Opnds.size() && "Concat vector type mismatch" ) ? static_cast<void> (0) : __assert_fail ("VT.getVectorNumElements() == Opnds.size() && \"Concat vector type mismatch\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 19797, __PRETTY_FUNCTION__))
19797	"Concat vector type mismatch")((VT.getVectorNumElements() == Opnds.size() && "Concat vector type mismatch" ) ? static_cast<void> (0) : __assert_fail ("VT.getVectorNumElements() == Opnds.size() && \"Concat vector type mismatch\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 19797, __PRETTY_FUNCTION__));
19798	return DAG.getBuildVector(VT, SDLoc(N), Opnds);
19799	}
19800
19801	// Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
19802	if (SDValue V = combineConcatVectorOfScalars(N, DAG))
19803	return V;
19804
19805	// Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
19806	if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
19807	if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
19808	return V;
19809
19810	if (SDValue V = combineConcatVectorOfCasts(N, DAG))
19811	return V;
19812
19813	// Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
19814	// nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
19815	// operands and look for a CONCAT operations that place the incoming vectors
19816	// at the exact same location.
19817	//
19818	// For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
19819	SDValue SingleSource = SDValue();
19820	unsigned PartNumElem =
19821	N->getOperand(0).getValueType().getVectorMinNumElements();
19822
19823	for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
19824	SDValue Op = N->getOperand(i);
19825
19826	if (Op.isUndef())
19827	continue;
19828
19829	// Check if this is the identity extract:
19830	if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
19831	return SDValue();
19832
19833	// Find the single incoming vector for the extract_subvector.
19834	if (SingleSource.getNode()) {
19835	if (Op.getOperand(0) != SingleSource)
19836	return SDValue();
19837	} else {
19838	SingleSource = Op.getOperand(0);
19839
19840	// Check the source type is the same as the type of the result.
19841	// If not, this concat may extend the vector, so we can not
19842	// optimize it away.
19843	if (SingleSource.getValueType() != N->getValueType(0))
19844	return SDValue();
19845	}
19846
19847	// Check that we are reading from the identity index.
19848	unsigned IdentityIndex = i * PartNumElem;
19849	if (Op.getConstantOperandAPInt(1) != IdentityIndex)
19850	return SDValue();
19851	}
19852
19853	if (SingleSource.getNode())
19854	return SingleSource;
19855
19856	return SDValue();
19857	}
19858
19859	// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
19860	// if the subvector can be sourced for free.
19861	static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
19862	if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
19863	V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
19864	return V.getOperand(1);
19865	}
19866	auto *IndexC = dyn_cast<ConstantSDNode>(Index);
19867	if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
19868	V.getOperand(0).getValueType() == SubVT &&
19869	(IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
19870	uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
19871	return V.getOperand(SubIdx);
19872	}
19873	return SDValue();
19874	}
19875
19876	static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
19877	SelectionDAG &DAG,
19878	bool LegalOperations) {
19879	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19880	SDValue BinOp = Extract->getOperand(0);
19881	unsigned BinOpcode = BinOp.getOpcode();
19882	if (!TLI.isBinOp(BinOpcode) \|\| BinOp.getNode()->getNumValues() != 1)
19883	return SDValue();
19884
19885	EVT VecVT = BinOp.getValueType();
19886	SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
19887	if (VecVT != Bop0.getValueType() \|\| VecVT != Bop1.getValueType())
19888	return SDValue();
19889
19890	SDValue Index = Extract->getOperand(1);
19891	EVT SubVT = Extract->getValueType(0);
19892	if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
19893	return SDValue();
19894
19895	SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
19896	SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
19897
19898	// TODO: We could handle the case where only 1 operand is being inserted by
19899	// creating an extract of the other operand, but that requires checking
19900	// number of uses and/or costs.
19901	if (!Sub0 \|\| !Sub1)
19902	return SDValue();
19903
19904	// We are inserting both operands of the wide binop only to extract back
19905	// to the narrow vector size. Eliminate all of the insert/extract:
19906	// ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
19907	return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
19908	BinOp->getFlags());
19909	}
19910
19911	/// If we are extracting a subvector produced by a wide binary operator try
19912	/// to use a narrow binary operator and/or avoid concatenation and extraction.
19913	static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
19914	bool LegalOperations) {
19915	// TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
19916	// some of these bailouts with other transforms.
19917
19918	if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
19919	return V;
19920
19921	// The extract index must be a constant, so we can map it to a concat operand.
19922	auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
19923	if (!ExtractIndexC)
19924	return SDValue();
19925
19926	// We are looking for an optionally bitcasted wide vector binary operator
19927	// feeding an extract subvector.
19928	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19929	SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
19930	unsigned BOpcode = BinOp.getOpcode();
19931	if (!TLI.isBinOp(BOpcode) \|\| BinOp.getNode()->getNumValues() != 1)
19932	return SDValue();
19933
19934	// Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
19935	// reduced to the unary fneg when it is visited, and we probably want to deal
19936	// with fneg in a target-specific way.
19937	if (BOpcode == ISD::FSUB) {
19938	auto C = isConstOrConstSplatFP(BinOp.getOperand(0), /AllowUndefs*/ true);
19939	if (C && C->getValueAPF().isNegZero())
19940	return SDValue();
19941	}
19942
19943	// The binop must be a vector type, so we can extract some fraction of it.
19944	EVT WideBVT = BinOp.getValueType();
19945	// The optimisations below currently assume we are dealing with fixed length
19946	// vectors. It is possible to add support for scalable vectors, but at the
19947	// moment we've done no analysis to prove whether they are profitable or not.
19948	if (!WideBVT.isFixedLengthVector())
19949	return SDValue();
19950
19951	EVT VT = Extract->getValueType(0);
19952	unsigned ExtractIndex = ExtractIndexC->getZExtValue();
19953	assert(ExtractIndex % VT.getVectorNumElements() == 0 &&((ExtractIndex % VT.getVectorNumElements() == 0 && "Extract index is not a multiple of the vector length." ) ? static_cast<void> (0) : __assert_fail ("ExtractIndex % VT.getVectorNumElements() == 0 && \"Extract index is not a multiple of the vector length.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 19954, __PRETTY_FUNCTION__))
19954	"Extract index is not a multiple of the vector length.")((ExtractIndex % VT.getVectorNumElements() == 0 && "Extract index is not a multiple of the vector length." ) ? static_cast<void> (0) : __assert_fail ("ExtractIndex % VT.getVectorNumElements() == 0 && \"Extract index is not a multiple of the vector length.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 19954, __PRETTY_FUNCTION__));
19955
19956	// Bail out if this is not a proper multiple width extraction.
19957	unsigned WideWidth = WideBVT.getSizeInBits();
19958	unsigned NarrowWidth = VT.getSizeInBits();
19959	if (WideWidth % NarrowWidth != 0)
19960	return SDValue();
19961
19962	// Bail out if we are extracting a fraction of a single operation. This can
19963	// occur because we potentially looked through a bitcast of the binop.
19964	unsigned NarrowingRatio = WideWidth / NarrowWidth;
19965	unsigned WideNumElts = WideBVT.getVectorNumElements();
19966	if (WideNumElts % NarrowingRatio != 0)
19967	return SDValue();
19968
19969	// Bail out if the target does not support a narrower version of the binop.
19970	EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
19971	WideNumElts / NarrowingRatio);
19972	if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
19973	return SDValue();
19974
19975	// If extraction is cheap, we don't need to look at the binop operands
19976	// for concat ops. The narrow binop alone makes this transform profitable.
19977	// We can't just reuse the original extract index operand because we may have
19978	// bitcasted.
19979	unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
19980	unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
19981	if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
19982	BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
19983	// extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
19984	SDLoc DL(Extract);
19985	SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
19986	SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
19987	BinOp.getOperand(0), NewExtIndex);
19988	SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
19989	BinOp.getOperand(1), NewExtIndex);
19990	SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
19991	BinOp.getNode()->getFlags());
19992	return DAG.getBitcast(VT, NarrowBinOp);
19993	}
19994
19995	// Only handle the case where we are doubling and then halving. A larger ratio
19996	// may require more than two narrow binops to replace the wide binop.
19997	if (NarrowingRatio != 2)
19998	return SDValue();
19999
20000	// TODO: The motivating case for this transform is an x86 AVX1 target. That
20001	// target has temptingly almost legal versions of bitwise logic ops in 256-bit
20002	// flavors, but no other 256-bit integer support. This could be extended to
20003	// handle any binop, but that may require fixing/adding other folds to avoid
20004	// codegen regressions.
20005	if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
20006	return SDValue();
20007
20008	// We need at least one concatenation operation of a binop operand to make
20009	// this transform worthwhile. The concat must double the input vector sizes.
20010	auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
20011	if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
20012	return V.getOperand(ConcatOpNum);
20013	return SDValue();
20014	};
20015	SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
20016	SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
20017
20018	if (SubVecL \|\| SubVecR) {
20019	// If a binop operand was not the result of a concat, we must extract a
20020	// half-sized operand for our new narrow binop:
20021	// extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
20022	// extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
20023	// extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
20024	SDLoc DL(Extract);
20025	SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
20026	SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
20027	: DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20028	BinOp.getOperand(0), IndexC);
20029
20030	SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
20031	: DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20032	BinOp.getOperand(1), IndexC);
20033
20034	SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
20035	return DAG.getBitcast(VT, NarrowBinOp);
20036	}
20037
20038	return SDValue();
20039	}
20040
20041	/// If we are extracting a subvector from a wide vector load, convert to a
20042	/// narrow load to eliminate the extraction:
20043	/// (extract_subvector (load wide vector)) --> (load narrow vector)
20044	static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
20045	// TODO: Add support for big-endian. The offset calculation must be adjusted.
20046	if (DAG.getDataLayout().isBigEndian())
20047	return SDValue();
20048
20049	auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
20050	auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
20051	if (!Ld \|\| Ld->getExtensionType() \|\| !Ld->isSimple() \|\|
20052	!ExtIdx)
20053	return SDValue();
20054
20055	// Allow targets to opt-out.
20056	EVT VT = Extract->getValueType(0);
20057
20058	// We can only create byte sized loads.
20059	if (!VT.isByteSized())
20060	return SDValue();
20061
20062	unsigned Index = ExtIdx->getZExtValue();
20063	unsigned NumElts = VT.getVectorMinNumElements();
20064
20065	// The definition of EXTRACT_SUBVECTOR states that the index must be a
20066	// multiple of the minimum number of elements in the result type.
20067	assert(Index % NumElts == 0 && "The extract subvector index is not a "((Index % NumElts == 0 && "The extract subvector index is not a " "multiple of the result's element count") ? static_cast<void > (0) : __assert_fail ("Index % NumElts == 0 && \"The extract subvector index is not a \" \"multiple of the result's element count\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 20068, __PRETTY_FUNCTION__))
20068	"multiple of the result's element count")((Index % NumElts == 0 && "The extract subvector index is not a " "multiple of the result's element count") ? static_cast<void > (0) : __assert_fail ("Index % NumElts == 0 && \"The extract subvector index is not a \" \"multiple of the result's element count\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 20068, __PRETTY_FUNCTION__));
20069
20070	// It's fine to use TypeSize here as we know the offset will not be negative.
20071	TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
20072
20073	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20074	if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
20075	return SDValue();
20076
20077	// The narrow load will be offset from the base address of the old load if
20078	// we are extracting from something besides index 0 (little-endian).
20079	SDLoc DL(Extract);
20080
20081	// TODO: Use "BaseIndexOffset" to make this more effective.
20082	SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
20083
20084	uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
20085	MachineFunction &MF = DAG.getMachineFunction();
20086	MachineMemOperand *MMO;
20087	if (Offset.isScalable()) {
20088	MachinePointerInfo MPI =
20089	MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
20090	MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
20091	} else
20092	MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(),
20093	StoreSize);
20094
20095	SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
20096	DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
20097	return NewLd;
20098	}
20099
20100	SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
20101	EVT NVT = N->getValueType(0);
20102	SDValue V = N->getOperand(0);
20103	uint64_t ExtIdx = N->getConstantOperandVal(1);
20104
20105	// Extract from UNDEF is UNDEF.
20106	if (V.isUndef())
20107	return DAG.getUNDEF(NVT);
20108
20109	if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
20110	if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
20111	return NarrowLoad;
20112
20113	// Combine an extract of an extract into a single extract_subvector.
20114	// ext (ext X, C), 0 --> ext X, C
20115	if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
20116	if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
20117	V.getConstantOperandVal(1)) &&
20118	TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
20119	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
20120	V.getOperand(1));
20121	}
20122	}
20123
20124	// Try to move vector bitcast after extract_subv by scaling extraction index:
20125	// extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
20126	if (V.getOpcode() == ISD::BITCAST &&
20127	V.getOperand(0).getValueType().isVector()) {
20128	SDValue SrcOp = V.getOperand(0);
20129	EVT SrcVT = SrcOp.getValueType();
20130	unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
20131	unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
20132	if ((SrcNumElts % DestNumElts) == 0) {
20133	unsigned SrcDestRatio = SrcNumElts / DestNumElts;
20134	ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
20135	EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
20136	NewExtEC);
20137	if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
20138	SDLoc DL(N);
20139	SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
20140	SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
20141	V.getOperand(0), NewIndex);
20142	return DAG.getBitcast(NVT, NewExtract);
20143	}
20144	}
20145	if ((DestNumElts % SrcNumElts) == 0) {
20146	unsigned DestSrcRatio = DestNumElts / SrcNumElts;
20147	if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
20148	ElementCount NewExtEC =
20149	NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
20150	EVT ScalarVT = SrcVT.getScalarType();
20151	if ((ExtIdx % DestSrcRatio) == 0) {
20152	SDLoc DL(N);
20153	unsigned IndexValScaled = ExtIdx / DestSrcRatio;
20154	EVT NewExtVT =
20155	EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
20156	if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
20157	SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
20158	SDValue NewExtract =
20159	DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
20160	V.getOperand(0), NewIndex);
20161	return DAG.getBitcast(NVT, NewExtract);
20162	}
20163	if (NewExtEC.isScalar() &&
20164	TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
20165	SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
20166	SDValue NewExtract =
20167	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
20168	V.getOperand(0), NewIndex);
20169	return DAG.getBitcast(NVT, NewExtract);
20170	}
20171	}
20172	}
20173	}
20174	}
20175
20176	if (V.getOpcode() == ISD::CONCAT_VECTORS) {
20177	unsigned ExtNumElts = NVT.getVectorMinNumElements();
20178	EVT ConcatSrcVT = V.getOperand(0).getValueType();
20179	assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&((ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType () && "Concat and extract subvector do not change element type" ) ? static_cast<void> (0) : __assert_fail ("ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() && \"Concat and extract subvector do not change element type\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 20180, __PRETTY_FUNCTION__))
20180	"Concat and extract subvector do not change element type")((ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType () && "Concat and extract subvector do not change element type" ) ? static_cast<void> (0) : __assert_fail ("ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() && \"Concat and extract subvector do not change element type\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 20180, __PRETTY_FUNCTION__));
20181	assert((ExtIdx % ExtNumElts) == 0 &&(((ExtIdx % ExtNumElts) == 0 && "Extract index is not a multiple of the input vector length." ) ? static_cast<void> (0) : __assert_fail ("(ExtIdx % ExtNumElts) == 0 && \"Extract index is not a multiple of the input vector length.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 20182, __PRETTY_FUNCTION__))
20182	"Extract index is not a multiple of the input vector length.")(((ExtIdx % ExtNumElts) == 0 && "Extract index is not a multiple of the input vector length." ) ? static_cast<void> (0) : __assert_fail ("(ExtIdx % ExtNumElts) == 0 && \"Extract index is not a multiple of the input vector length.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 20182, __PRETTY_FUNCTION__));
20183
20184	unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
20185	unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
20186
20187	// If the concatenated source types match this extract, it's a direct
20188	// simplification:
20189	// extract_subvec (concat V1, V2, ...), i --> Vi
20190	if (ConcatSrcNumElts == ExtNumElts)
20191	return V.getOperand(ConcatOpIdx);
20192
20193	// If the concatenated source vectors are a multiple length of this extract,
20194	// then extract a fraction of one of those source vectors directly from a
20195	// concat operand. Example:
20196	// v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
20197	// v2i8 extract_subvec v8i8 Y, 6
20198	if (NVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) {
20199	SDLoc DL(N);
20200	unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
20201	assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&((NewExtIdx + ExtNumElts <= ConcatSrcNumElts && "Trying to extract from >1 concat operand?" ) ? static_cast<void> (0) : __assert_fail ("NewExtIdx + ExtNumElts <= ConcatSrcNumElts && \"Trying to extract from >1 concat operand?\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 20202, __PRETTY_FUNCTION__))
20202	"Trying to extract from >1 concat operand?")((NewExtIdx + ExtNumElts <= ConcatSrcNumElts && "Trying to extract from >1 concat operand?" ) ? static_cast<void> (0) : __assert_fail ("NewExtIdx + ExtNumElts <= ConcatSrcNumElts && \"Trying to extract from >1 concat operand?\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 20202, __PRETTY_FUNCTION__));
20203	assert(NewExtIdx % ExtNumElts == 0 &&((NewExtIdx % ExtNumElts == 0 && "Extract index is not a multiple of the input vector length." ) ? static_cast<void> (0) : __assert_fail ("NewExtIdx % ExtNumElts == 0 && \"Extract index is not a multiple of the input vector length.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 20204, __PRETTY_FUNCTION__))
20204	"Extract index is not a multiple of the input vector length.")((NewExtIdx % ExtNumElts == 0 && "Extract index is not a multiple of the input vector length." ) ? static_cast<void> (0) : __assert_fail ("NewExtIdx % ExtNumElts == 0 && \"Extract index is not a multiple of the input vector length.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 20204, __PRETTY_FUNCTION__));
20205	SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
20206	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
20207	V.getOperand(ConcatOpIdx), NewIndexC);
20208	}
20209	}
20210
20211	V = peekThroughBitcasts(V);
20212
20213	// If the input is a build vector. Try to make a smaller build vector.
20214	if (V.getOpcode() == ISD::BUILD_VECTOR) {
20215	EVT InVT = V.getValueType();
20216	unsigned ExtractSize = NVT.getSizeInBits();
20217	unsigned EltSize = InVT.getScalarSizeInBits();
20218	// Only do this if we won't split any elements.
20219	if (ExtractSize % EltSize == 0) {
20220	unsigned NumElems = ExtractSize / EltSize;
20221	EVT EltVT = InVT.getVectorElementType();
20222	EVT ExtractVT =
20223	NumElems == 1 ? EltVT
20224	: EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
20225	if ((Level < AfterLegalizeDAG \|\|
20226	(NumElems == 1 \|\|
20227	TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
20228	(!LegalTypes \|\| TLI.isTypeLegal(ExtractVT))) {
20229	unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
20230
20231	if (NumElems == 1) {
20232	SDValue Src = V->getOperand(IdxVal);
20233	if (EltVT != Src.getValueType())
20234	Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
20235	return DAG.getBitcast(NVT, Src);
20236	}
20237
20238	// Extract the pieces from the original build_vector.
20239	SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
20240	V->ops().slice(IdxVal, NumElems));
20241	return DAG.getBitcast(NVT, BuildVec);
20242	}
20243	}
20244	}
20245
20246	if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
20247	// Handle only simple case where vector being inserted and vector
20248	// being extracted are of same size.
20249	EVT SmallVT = V.getOperand(1).getValueType();
20250	if (!NVT.bitsEq(SmallVT))
20251	return SDValue();
20252
20253	// Combine:
20254	// (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
20255	// Into:
20256	// indices are equal or bit offsets are equal => V1
20257	// otherwise => (extract_subvec V1, ExtIdx)
20258	uint64_t InsIdx = V.getConstantOperandVal(2);
20259	if (InsIdx * SmallVT.getScalarSizeInBits() ==
20260	ExtIdx * NVT.getScalarSizeInBits())
20261	return DAG.getBitcast(NVT, V.getOperand(1));
20262	return DAG.getNode(
20263	ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
20264	DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
20265	N->getOperand(1));
20266	}
20267
20268	if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
20269	return NarrowBOp;
20270
20271	if (SimplifyDemandedVectorElts(SDValue(N, 0)))
20272	return SDValue(N, 0);
20273
20274	return SDValue();
20275	}
20276
20277	/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
20278	/// followed by concatenation. Narrow vector ops may have better performance
20279	/// than wide ops, and this can unlock further narrowing of other vector ops.
20280	/// Targets can invert this transform later if it is not profitable.
20281	static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
20282	SelectionDAG &DAG) {
20283	SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
20284	if (N0.getOpcode() != ISD::CONCAT_VECTORS \|\| N0.getNumOperands() != 2 \|\|
20285	N1.getOpcode() != ISD::CONCAT_VECTORS \|\| N1.getNumOperands() != 2 \|\|
20286	!N0.getOperand(1).isUndef() \|\| !N1.getOperand(1).isUndef())
20287	return SDValue();
20288
20289	// Split the wide shuffle mask into halves. Any mask element that is accessing
20290	// operand 1 is offset down to account for narrowing of the vectors.
20291	ArrayRef<int> Mask = Shuf->getMask();
20292	EVT VT = Shuf->getValueType(0);
20293	unsigned NumElts = VT.getVectorNumElements();
20294	unsigned HalfNumElts = NumElts / 2;
20295	SmallVector<int, 16> Mask0(HalfNumElts, -1);
20296	SmallVector<int, 16> Mask1(HalfNumElts, -1);
20297	for (unsigned i = 0; i != NumElts; ++i) {
20298	if (Mask[i] == -1)
20299	continue;
20300	int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
20301	if (i < HalfNumElts)
20302	Mask0[i] = M;
20303	else
20304	Mask1[i - HalfNumElts] = M;
20305	}
20306
20307	// Ask the target if this is a valid transform.
20308	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20309	EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
20310	HalfNumElts);
20311	if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) \|\|
20312	!TLI.isShuffleMaskLegal(Mask1, HalfVT))
20313	return SDValue();
20314
20315	// shuffle (concat X, undef), (concat Y, undef), Mask -->
20316	// concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
20317	SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
20318	SDLoc DL(Shuf);
20319	SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
20320	SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
20321	return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
20322	}
20323
20324	// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
20325	// or turn a shuffle of a single concat into simpler shuffle then concat.
20326	static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
20327	EVT VT = N->getValueType(0);
20328	unsigned NumElts = VT.getVectorNumElements();
20329
20330	SDValue N0 = N->getOperand(0);
20331	SDValue N1 = N->getOperand(1);
20332	ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
20333	ArrayRef<int> Mask = SVN->getMask();
20334
20335	SmallVector<SDValue, 4> Ops;
20336	EVT ConcatVT = N0.getOperand(0).getValueType();
20337	unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
20338	unsigned NumConcats = NumElts / NumElemsPerConcat;
20339
20340	auto IsUndefMaskElt = [](int i) { return i == -1; };
20341
20342	// Special case: shuffle(concat(A,B)) can be more efficiently represented
20343	// as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
20344	// half vector elements.
20345	if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
20346	llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
20347	IsUndefMaskElt)) {
20348	N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
20349	N0.getOperand(1),
20350	Mask.slice(0, NumElemsPerConcat));
20351	N1 = DAG.getUNDEF(ConcatVT);
20352	return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
20353	}
20354
20355	// Look at every vector that's inserted. We're looking for exact
20356	// subvector-sized copies from a concatenated vector
20357	for (unsigned I = 0; I != NumConcats; ++I) {
20358	unsigned Begin = I * NumElemsPerConcat;
20359	ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
20360
20361	// Make sure we're dealing with a copy.
20362	if (llvm::all_of(SubMask, IsUndefMaskElt)) {
20363	Ops.push_back(DAG.getUNDEF(ConcatVT));
20364	continue;
20365	}
20366
20367	int OpIdx = -1;
20368	for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
20369	if (IsUndefMaskElt(SubMask[i]))
20370	continue;
20371	if ((SubMask[i] % (int)NumElemsPerConcat) != i)
20372	return SDValue();
20373	int EltOpIdx = SubMask[i] / NumElemsPerConcat;
20374	if (0 <= OpIdx && EltOpIdx != OpIdx)
20375	return SDValue();
20376	OpIdx = EltOpIdx;
20377	}
20378	assert(0 <= OpIdx && "Unknown concat_vectors op")((0 <= OpIdx && "Unknown concat_vectors op") ? static_cast <void> (0) : __assert_fail ("0 <= OpIdx && \"Unknown concat_vectors op\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 20378, __PRETTY_FUNCTION__));
20379
20380	if (OpIdx < (int)N0.getNumOperands())
20381	Ops.push_back(N0.getOperand(OpIdx));
20382	else
20383	Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
20384	}
20385
20386	return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
20387	}
20388
20389	// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
20390	// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
20391	//
20392	// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
20393	// a simplification in some sense, but it isn't appropriate in general: some
20394	// BUILD_VECTORs are substantially cheaper than others. The general case
20395	// of a BUILD_VECTOR requires inserting each element individually (or
20396	// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
20397	// all constants is a single constant pool load. A BUILD_VECTOR where each
20398	// element is identical is a splat. A BUILD_VECTOR where most of the operands
20399	// are undef lowers to a small number of element insertions.
20400	//
20401	// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
20402	// We don't fold shuffles where one side is a non-zero constant, and we don't
20403	// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
20404	// non-constant operands. This seems to work out reasonably well in practice.
20405	static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
20406	SelectionDAG &DAG,
20407	const TargetLowering &TLI) {
20408	EVT VT = SVN->getValueType(0);
20409	unsigned NumElts = VT.getVectorNumElements();
20410	SDValue N0 = SVN->getOperand(0);
20411	SDValue N1 = SVN->getOperand(1);
20412
20413	if (!N0->hasOneUse())
20414	return SDValue();
20415
20416	// If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
20417	// discussed above.
20418	if (!N1.isUndef()) {
20419	if (!N1->hasOneUse())
20420	return SDValue();
20421
20422	bool N0AnyConst = isAnyConstantBuildVector(N0);
20423	bool N1AnyConst = isAnyConstantBuildVector(N1);
20424	if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
20425	return SDValue();
20426	if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
20427	return SDValue();
20428	}
20429
20430	// If both inputs are splats of the same value then we can safely merge this
20431	// to a single BUILD_VECTOR with undef elements based on the shuffle mask.
20432	bool IsSplat = false;
20433	auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
20434	auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
20435	if (BV0 && BV1)
20436	if (SDValue Splat0 = BV0->getSplatValue())
20437	IsSplat = (Splat0 == BV1->getSplatValue());
20438
20439	SmallVector<SDValue, 8> Ops;
20440	SmallSet<SDValue, 16> DuplicateOps;
20441	for (int M : SVN->getMask()) {
20442	SDValue Op = DAG.getUNDEF(VT.getScalarType());
20443	if (M >= 0) {
20444	int Idx = M < (int)NumElts ? M : M - NumElts;
20445	SDValue &S = (M < (int)NumElts ? N0 : N1);
20446	if (S.getOpcode() == ISD::BUILD_VECTOR) {
20447	Op = S.getOperand(Idx);
20448	} else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
20449	SDValue Op0 = S.getOperand(0);
20450	Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
20451	} else {
20452	// Operand can't be combined - bail out.
20453	return SDValue();
20454	}
20455	}
20456
20457	// Don't duplicate a non-constant BUILD_VECTOR operand unless we're
20458	// generating a splat; semantically, this is fine, but it's likely to
20459	// generate low-quality code if the target can't reconstruct an appropriate
20460	// shuffle.
20461	if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
20462	if (!IsSplat && !DuplicateOps.insert(Op).second)
20463	return SDValue();
20464
20465	Ops.push_back(Op);
20466	}
20467
20468	// BUILD_VECTOR requires all inputs to be of the same type, find the
20469	// maximum type and extend them all.
20470	EVT SVT = VT.getScalarType();
20471	if (SVT.isInteger())
20472	for (SDValue &Op : Ops)
20473	SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
20474	if (SVT != VT.getScalarType())
20475	for (SDValue &Op : Ops)
20476	Op = TLI.isZExtFree(Op.getValueType(), SVT)
20477	? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
20478	: DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
20479	return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
20480	}
20481
20482	// Match shuffles that can be converted to any_vector_extend_in_reg.
20483	// This is often generated during legalization.
20484	// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
20485	// TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
20486	static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
20487	SelectionDAG &DAG,
20488	const TargetLowering &TLI,
20489	bool LegalOperations) {
20490	EVT VT = SVN->getValueType(0);
20491	bool IsBigEndian = DAG.getDataLayout().isBigEndian();
20492
20493	// TODO Add support for big-endian when we have a test case.
20494	if (!VT.isInteger() \|\| IsBigEndian)
20495	return SDValue();
20496
20497	unsigned NumElts = VT.getVectorNumElements();
20498	unsigned EltSizeInBits = VT.getScalarSizeInBits();
20499	ArrayRef<int> Mask = SVN->getMask();
20500	SDValue N0 = SVN->getOperand(0);
20501
20502	// shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
20503	auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
20504	for (unsigned i = 0; i != NumElts; ++i) {
20505	if (Mask[i] < 0)
20506	continue;
20507	if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
20508	continue;
20509	return false;
20510	}
20511	return true;
20512	};
20513
20514	// Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
20515	// power-of-2 extensions as they are the most likely.
20516	for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
20517	// Check for non power of 2 vector sizes
20518	if (NumElts % Scale != 0)
20519	continue;
20520	if (!isAnyExtend(Scale))
20521	continue;
20522
20523	EVT OutSVT = EVT::getIntegerVT(DAG.getContext(), EltSizeInBits Scale);
20524	EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
20525	// Never create an illegal type. Only create unsupported operations if we
20526	// are pre-legalization.
20527	if (TLI.isTypeLegal(OutVT))
20528	if (!LegalOperations \|\|
20529	TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
20530	return DAG.getBitcast(VT,
20531	DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
20532	SDLoc(SVN), OutVT, N0));
20533	}
20534
20535	return SDValue();
20536	}
20537
20538	// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
20539	// each source element of a large type into the lowest elements of a smaller
20540	// destination type. This is often generated during legalization.
20541	// If the source node itself was a '*_extend_vector_inreg' node then we should
20542	// then be able to remove it.
20543	static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
20544	SelectionDAG &DAG) {
20545	EVT VT = SVN->getValueType(0);
20546	bool IsBigEndian = DAG.getDataLayout().isBigEndian();
20547
20548	// TODO Add support for big-endian when we have a test case.
20549	if (!VT.isInteger() \|\| IsBigEndian)
20550	return SDValue();
20551
20552	SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
20553
20554	unsigned Opcode = N0.getOpcode();
20555	if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
20556	Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
20557	Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
20558	return SDValue();
20559
20560	SDValue N00 = N0.getOperand(0);
20561	ArrayRef<int> Mask = SVN->getMask();
20562	unsigned NumElts = VT.getVectorNumElements();
20563	unsigned EltSizeInBits = VT.getScalarSizeInBits();
20564	unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
20565	unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
20566
20567	if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
20568	return SDValue();
20569	unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
20570
20571	// (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
20572	// (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
20573	// (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
20574	auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
20575	for (unsigned i = 0; i != NumElts; ++i) {
20576	if (Mask[i] < 0)
20577	continue;
20578	if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
20579	continue;
20580	return false;
20581	}
20582	return true;
20583	};
20584
20585	// At the moment we just handle the case where we've truncated back to the
20586	// same size as before the extension.
20587	// TODO: handle more extension/truncation cases as cases arise.
20588	if (EltSizeInBits != ExtSrcSizeInBits)
20589	return SDValue();
20590
20591	// We can remove *extend_vector_inreg only if the truncation happens at
20592	// the same scale as the extension.
20593	if (isTruncate(ExtScale))
20594	return DAG.getBitcast(VT, N00);
20595
20596	return SDValue();
20597	}
20598
20599	// Combine shuffles of splat-shuffles of the form:
20600	// shuffle (shuffle V, undef, splat-mask), undef, M
20601	// If splat-mask contains undef elements, we need to be careful about
20602	// introducing undef's in the folded mask which are not the result of composing
20603	// the masks of the shuffles.
20604	static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
20605	SelectionDAG &DAG) {
20606	if (!Shuf->getOperand(1).isUndef())
20607	return SDValue();
20608	auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
20609	if (!Splat \|\| !Splat->isSplat())
20610	return SDValue();
20611
20612	ArrayRef<int> ShufMask = Shuf->getMask();
20613	ArrayRef<int> SplatMask = Splat->getMask();
20614	assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch")((ShufMask.size() == SplatMask.size() && "Mask length mismatch" ) ? static_cast<void> (0) : __assert_fail ("ShufMask.size() == SplatMask.size() && \"Mask length mismatch\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 20614, __PRETTY_FUNCTION__));
20615
20616	// Prefer simplifying to the splat-shuffle, if possible. This is legal if
20617	// every undef mask element in the splat-shuffle has a corresponding undef
20618	// element in the user-shuffle's mask or if the composition of mask elements
20619	// would result in undef.
20620	// Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
20621	// * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
20622	// In this case it is not legal to simplify to the splat-shuffle because we
20623	// may be exposing the users of the shuffle an undef element at index 1
20624	// which was not there before the combine.
20625	// * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
20626	// In this case the composition of masks yields SplatMask, so it's ok to
20627	// simplify to the splat-shuffle.
20628	// * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
20629	// In this case the composed mask includes all undef elements of SplatMask
20630	// and in addition sets element zero to undef. It is safe to simplify to
20631	// the splat-shuffle.
20632	auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
20633	ArrayRef<int> SplatMask) {
20634	for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
20635	if (UserMask[i] != -1 && SplatMask[i] == -1 &&
20636	SplatMask[UserMask[i]] != -1)
20637	return false;
20638	return true;
20639	};
20640	if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
20641	return Shuf->getOperand(0);
20642
20643	// Create a new shuffle with a mask that is composed of the two shuffles'
20644	// masks.
20645	SmallVector<int, 32> NewMask;
20646	for (int Idx : ShufMask)
20647	NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
20648
20649	return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
20650	Splat->getOperand(0), Splat->getOperand(1),
20651	NewMask);
20652	}
20653
20654	/// Combine shuffle of shuffle of the form:
20655	/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
20656	static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
20657	SelectionDAG &DAG) {
20658	if (!OuterShuf->getOperand(1).isUndef())
20659	return SDValue();
20660	auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
20661	if (!InnerShuf \|\| !InnerShuf->getOperand(1).isUndef())
20662	return SDValue();
20663
20664	ArrayRef<int> OuterMask = OuterShuf->getMask();
20665	ArrayRef<int> InnerMask = InnerShuf->getMask();
20666	unsigned NumElts = OuterMask.size();
20667	assert(NumElts == InnerMask.size() && "Mask length mismatch")((NumElts == InnerMask.size() && "Mask length mismatch" ) ? static_cast<void> (0) : __assert_fail ("NumElts == InnerMask.size() && \"Mask length mismatch\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 20667, __PRETTY_FUNCTION__));
20668	SmallVector<int, 32> CombinedMask(NumElts, -1);
20669	int SplatIndex = -1;
20670	for (unsigned i = 0; i != NumElts; ++i) {
20671	// Undef lanes remain undef.
20672	int OuterMaskElt = OuterMask[i];
20673	if (OuterMaskElt == -1)
20674	continue;
20675
20676	// Peek through the shuffle masks to get the underlying source element.
20677	int InnerMaskElt = InnerMask[OuterMaskElt];
20678	if (InnerMaskElt == -1)
20679	continue;
20680
20681	// Initialize the splatted element.
20682	if (SplatIndex == -1)
20683	SplatIndex = InnerMaskElt;
20684
20685	// Non-matching index - this is not a splat.
20686	if (SplatIndex != InnerMaskElt)
20687	return SDValue();
20688
20689	CombinedMask[i] = InnerMaskElt;
20690	}
20691	assert((all_of(CombinedMask, [](int M) { return M == -1; }) \|\|(((all_of(CombinedMask, [](int M) { return M == -1; }) \|\| getSplatIndex (CombinedMask) != -1) && "Expected a splat mask") ? static_cast <void> (0) : __assert_fail ("(all_of(CombinedMask, [](int M) { return M == -1; }) \|\| getSplatIndex(CombinedMask) != -1) && \"Expected a splat mask\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 20693, __PRETTY_FUNCTION__))
20692	getSplatIndex(CombinedMask) != -1) &&(((all_of(CombinedMask, [](int M) { return M == -1; }) \|\| getSplatIndex (CombinedMask) != -1) && "Expected a splat mask") ? static_cast <void> (0) : __assert_fail ("(all_of(CombinedMask, [](int M) { return M == -1; }) \|\| getSplatIndex(CombinedMask) != -1) && \"Expected a splat mask\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 20693, __PRETTY_FUNCTION__))
20693	"Expected a splat mask")(((all_of(CombinedMask, [](int M) { return M == -1; }) \|\| getSplatIndex (CombinedMask) != -1) && "Expected a splat mask") ? static_cast <void> (0) : __assert_fail ("(all_of(CombinedMask, [](int M) { return M == -1; }) \|\| getSplatIndex(CombinedMask) != -1) && \"Expected a splat mask\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 20693, __PRETTY_FUNCTION__));
20694
20695	// TODO: The transform may be a win even if the mask is not legal.
20696	EVT VT = OuterShuf->getValueType(0);
20697	assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types")((VT == InnerShuf->getValueType(0) && "Expected matching shuffle types" ) ? static_cast<void> (0) : __assert_fail ("VT == InnerShuf->getValueType(0) && \"Expected matching shuffle types\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 20697, __PRETTY_FUNCTION__));
20698	if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
20699	return SDValue();
20700
20701	return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
20702	InnerShuf->getOperand(1), CombinedMask);
20703	}
20704
20705	/// If the shuffle mask is taking exactly one element from the first vector
20706	/// operand and passing through all other elements from the second vector
20707	/// operand, return the index of the mask element that is choosing an element
20708	/// from the first operand. Otherwise, return -1.
20709	static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
20710	int MaskSize = Mask.size();
20711	int EltFromOp0 = -1;
20712	// TODO: This does not match if there are undef elements in the shuffle mask.
20713	// Should we ignore undefs in the shuffle mask instead? The trade-off is
20714	// removing an instruction (a shuffle), but losing the knowledge that some
20715	// vector lanes are not needed.
20716	for (int i = 0; i != MaskSize; ++i) {
20717	if (Mask[i] >= 0 && Mask[i] < MaskSize) {
20718	// We're looking for a shuffle of exactly one element from operand 0.
20719	if (EltFromOp0 != -1)
20720	return -1;
20721	EltFromOp0 = i;
20722	} else if (Mask[i] != i + MaskSize) {
20723	// Nothing from operand 1 can change lanes.
20724	return -1;
20725	}
20726	}
20727	return EltFromOp0;
20728	}
20729
20730	/// If a shuffle inserts exactly one element from a source vector operand into
20731	/// another vector operand and we can access the specified element as a scalar,
20732	/// then we can eliminate the shuffle.
20733	static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
20734	SelectionDAG &DAG) {
20735	// First, check if we are taking one element of a vector and shuffling that
20736	// element into another vector.
20737	ArrayRef<int> Mask = Shuf->getMask();
20738	SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
20739	SDValue Op0 = Shuf->getOperand(0);
20740	SDValue Op1 = Shuf->getOperand(1);
20741	int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
20742	if (ShufOp0Index == -1) {
20743	// Commute mask and check again.
20744	ShuffleVectorSDNode::commuteMask(CommutedMask);
20745	ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
20746	if (ShufOp0Index == -1)
20747	return SDValue();
20748	// Commute operands to match the commuted shuffle mask.
20749	std::swap(Op0, Op1);
20750	Mask = CommutedMask;
20751	}
20752
20753	// The shuffle inserts exactly one element from operand 0 into operand 1.
20754	// Now see if we can access that element as a scalar via a real insert element
20755	// instruction.
20756	// TODO: We can try harder to locate the element as a scalar. Examples: it
20757	// could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
20758	assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&((Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() && "Shuffle mask value must be from operand 0" ) ? static_cast<void> (0) : __assert_fail ("Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() && \"Shuffle mask value must be from operand 0\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 20759, __PRETTY_FUNCTION__))
20759	"Shuffle mask value must be from operand 0")((Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() && "Shuffle mask value must be from operand 0" ) ? static_cast<void> (0) : __assert_fail ("Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() && \"Shuffle mask value must be from operand 0\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 20759, __PRETTY_FUNCTION__));
20760	if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
20761	return SDValue();
20762
20763	auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
20764	if (!InsIndexC \|\| InsIndexC->getSExtValue() != Mask[ShufOp0Index])
20765	return SDValue();
20766
20767	// There's an existing insertelement with constant insertion index, so we
20768	// don't need to check the legality/profitability of a replacement operation
20769	// that differs at most in the constant value. The target should be able to
20770	// lower any of those in a similar way. If not, legalization will expand this
20771	// to a scalar-to-vector plus shuffle.
20772	//
20773	// Note that the shuffle may move the scalar from the position that the insert
20774	// element used. Therefore, our new insert element occurs at the shuffle's
20775	// mask index value, not the insert's index value.
20776	// shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
20777	SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
20778	return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
20779	Op1, Op0.getOperand(1), NewInsIndex);
20780	}
20781
20782	/// If we have a unary shuffle of a shuffle, see if it can be folded away
20783	/// completely. This has the potential to lose undef knowledge because the first
20784	/// shuffle may not have an undef mask element where the second one does. So
20785	/// only call this after doing simplifications based on demanded elements.
20786	static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
20787	// shuf (shuf0 X, Y, Mask0), undef, Mask
20788	auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
20789	if (!Shuf0 \|\| !Shuf->getOperand(1).isUndef())
20790	return SDValue();
20791
20792	ArrayRef<int> Mask = Shuf->getMask();
20793	ArrayRef<int> Mask0 = Shuf0->getMask();
20794	for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
20795	// Ignore undef elements.
20796	if (Mask[i] == -1)
20797	continue;
20798	assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value")((Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value" ) ? static_cast<void> (0) : __assert_fail ("Mask[i] >= 0 && Mask[i] < e && \"Unexpected shuffle mask value\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 20798, __PRETTY_FUNCTION__));
20799
20800	// Is the element of the shuffle operand chosen by this shuffle the same as
20801	// the element chosen by the shuffle operand itself?
20802	if (Mask0[Mask[i]] != Mask0[i])
20803	return SDValue();
20804	}
20805	// Every element of this shuffle is identical to the result of the previous
20806	// shuffle, so we can replace this value.
20807	return Shuf->getOperand(0);
20808	}
20809
20810	SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
20811	EVT VT = N->getValueType(0);
20812	unsigned NumElts = VT.getVectorNumElements();
20813
20814	SDValue N0 = N->getOperand(0);
20815	SDValue N1 = N->getOperand(1);
20816
20817	assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG")((N0.getValueType() == VT && "Vector shuffle must be normalized in DAG" ) ? static_cast<void> (0) : __assert_fail ("N0.getValueType() == VT && \"Vector shuffle must be normalized in DAG\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 20817, __PRETTY_FUNCTION__));
20818
20819	// Canonicalize shuffle undef, undef -> undef
20820	if (N0.isUndef() && N1.isUndef())
20821	return DAG.getUNDEF(VT);
20822
20823	ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
20824
20825	// Canonicalize shuffle v, v -> v, undef
20826	if (N0 == N1) {
20827	SmallVector<int, 8> NewMask;
20828	for (unsigned i = 0; i != NumElts; ++i) {
20829	int Idx = SVN->getMaskElt(i);
20830	if (Idx >= (int)NumElts) Idx -= NumElts;
20831	NewMask.push_back(Idx);
20832	}
20833	return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
20834	}
20835
20836	// Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
20837	if (N0.isUndef())
20838	return DAG.getCommutedVectorShuffle(*SVN);
20839
20840	// Remove references to rhs if it is undef
20841	if (N1.isUndef()) {
20842	bool Changed = false;
20843	SmallVector<int, 8> NewMask;
20844	for (unsigned i = 0; i != NumElts; ++i) {
20845	int Idx = SVN->getMaskElt(i);
20846	if (Idx >= (int)NumElts) {
20847	Idx = -1;
20848	Changed = true;
20849	}
20850	NewMask.push_back(Idx);
20851	}
20852	if (Changed)
20853	return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
20854	}
20855
20856	if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
20857	return InsElt;
20858
20859	// A shuffle of a single vector that is a splatted value can always be folded.
20860	if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
20861	return V;
20862
20863	if (SDValue V = formSplatFromShuffles(SVN, DAG))
20864	return V;
20865
20866	// If it is a splat, check if the argument vector is another splat or a
20867	// build_vector.
20868	if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
20869	int SplatIndex = SVN->getSplatIndex();
20870	if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
20871	TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
20872	// splat (vector_bo L, R), Index -->
20873	// splat (scalar_bo (extelt L, Index), (extelt R, Index))
20874	SDValue L = N0.getOperand(0), R = N0.getOperand(1);
20875	SDLoc DL(N);
20876	EVT EltVT = VT.getScalarType();
20877	SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
20878	SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
20879	SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
20880	SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
20881	N0.getNode()->getFlags());
20882	SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
20883	SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
20884	return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
20885	}
20886
20887	// If this is a bit convert that changes the element type of the vector but
20888	// not the number of vector elements, look through it. Be careful not to
20889	// look though conversions that change things like v4f32 to v2f64.
20890	SDNode *V = N0.getNode();
20891	if (V->getOpcode() == ISD::BITCAST) {
20892	SDValue ConvInput = V->getOperand(0);
20893	if (ConvInput.getValueType().isVector() &&
20894	ConvInput.getValueType().getVectorNumElements() == NumElts)
20895	V = ConvInput.getNode();
20896	}
20897
20898	if (V->getOpcode() == ISD::BUILD_VECTOR) {
20899	assert(V->getNumOperands() == NumElts &&((V->getNumOperands() == NumElts && "BUILD_VECTOR has wrong number of operands" ) ? static_cast<void> (0) : __assert_fail ("V->getNumOperands() == NumElts && \"BUILD_VECTOR has wrong number of operands\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 20900, __PRETTY_FUNCTION__))
20900	"BUILD_VECTOR has wrong number of operands")((V->getNumOperands() == NumElts && "BUILD_VECTOR has wrong number of operands" ) ? static_cast<void> (0) : __assert_fail ("V->getNumOperands() == NumElts && \"BUILD_VECTOR has wrong number of operands\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 20900, __PRETTY_FUNCTION__));
20901	SDValue Base;
20902	bool AllSame = true;
20903	for (unsigned i = 0; i != NumElts; ++i) {
20904	if (!V->getOperand(i).isUndef()) {
20905	Base = V->getOperand(i);
20906	break;
20907	}
20908	}
20909	// Splat of <u, u, u, u>, return <u, u, u, u>
20910	if (!Base.getNode())
20911	return N0;
20912	for (unsigned i = 0; i != NumElts; ++i) {
20913	if (V->getOperand(i) != Base) {
20914	AllSame = false;
20915	break;
20916	}
20917	}
20918	// Splat of <x, x, x, x>, return <x, x, x, x>
20919	if (AllSame)
20920	return N0;
20921
20922	// Canonicalize any other splat as a build_vector.
20923	SDValue Splatted = V->getOperand(SplatIndex);
20924	SmallVector<SDValue, 8> Ops(NumElts, Splatted);
20925	SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
20926
20927	// We may have jumped through bitcasts, so the type of the
20928	// BUILD_VECTOR may not match the type of the shuffle.
20929	if (V->getValueType(0) != VT)
20930	NewBV = DAG.getBitcast(VT, NewBV);
20931	return NewBV;
20932	}
20933	}
20934
20935	// Simplify source operands based on shuffle mask.
20936	if (SimplifyDemandedVectorElts(SDValue(N, 0)))
20937	return SDValue(N, 0);
20938
20939	// This is intentionally placed after demanded elements simplification because
20940	// it could eliminate knowledge of undef elements created by this shuffle.
20941	if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
20942	return ShufOp;
20943
20944	// Match shuffles that can be converted to any_vector_extend_in_reg.
20945	if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
20946	return V;
20947
20948	// Combine "truncate_vector_in_reg" style shuffles.
20949	if (SDValue V = combineTruncationShuffle(SVN, DAG))
20950	return V;
20951
20952	if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
20953	Level < AfterLegalizeVectorOps &&
20954	(N1.isUndef() \|\|
20955	(N1.getOpcode() == ISD::CONCAT_VECTORS &&
20956	N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
20957	if (SDValue V = partitionShuffleOfConcats(N, DAG))
20958	return V;
20959	}
20960
20961	// A shuffle of a concat of the same narrow vector can be reduced to use
20962	// only low-half elements of a concat with undef:
20963	// shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
20964	if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
20965	N0.getNumOperands() == 2 &&
20966	N0.getOperand(0) == N0.getOperand(1)) {
20967	int HalfNumElts = (int)NumElts / 2;
20968	SmallVector<int, 8> NewMask;
20969	for (unsigned i = 0; i != NumElts; ++i) {
20970	int Idx = SVN->getMaskElt(i);
20971	if (Idx >= HalfNumElts) {
20972	assert(Idx < (int)NumElts && "Shuffle mask chooses undef op")((Idx < (int)NumElts && "Shuffle mask chooses undef op" ) ? static_cast<void> (0) : __assert_fail ("Idx < (int)NumElts && \"Shuffle mask chooses undef op\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 20972, __PRETTY_FUNCTION__));
20973	Idx -= HalfNumElts;
20974	}
20975	NewMask.push_back(Idx);
20976	}
20977	if (TLI.isShuffleMaskLegal(NewMask, VT)) {
20978	SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
20979	SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
20980	N0.getOperand(0), UndefVec);
20981	return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
20982	}
20983	}
20984
20985	// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
20986	// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
20987	if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
20988	if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
20989	return Res;
20990
20991	// If this shuffle only has a single input that is a bitcasted shuffle,
20992	// attempt to merge the 2 shuffles and suitably bitcast the inputs/output
20993	// back to their original types.
20994	if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
20995	N1.isUndef() && Level < AfterLegalizeVectorOps &&
20996	TLI.isTypeLegal(VT)) {
20997
20998	SDValue BC0 = peekThroughOneUseBitcasts(N0);
20999	if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
21000	EVT SVT = VT.getScalarType();
21001	EVT InnerVT = BC0->getValueType(0);
21002	EVT InnerSVT = InnerVT.getScalarType();
21003
21004	// Determine which shuffle works with the smaller scalar type.
21005	EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
21006	EVT ScaleSVT = ScaleVT.getScalarType();
21007
21008	if (TLI.isTypeLegal(ScaleVT) &&
21009	0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
21010	0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
21011	int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
21012	int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
21013
21014	// Scale the shuffle masks to the smaller scalar type.
21015	ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
21016	SmallVector<int, 8> InnerMask;
21017	SmallVector<int, 8> OuterMask;
21018	narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
21019	narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
21020
21021	// Merge the shuffle masks.
21022	SmallVector<int, 8> NewMask;
21023	for (int M : OuterMask)
21024	NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
21025
21026	// Test for shuffle mask legality over both commutations.
21027	SDValue SV0 = BC0->getOperand(0);
21028	SDValue SV1 = BC0->getOperand(1);
21029	bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
21030	if (!LegalMask) {
21031	std::swap(SV0, SV1);
21032	ShuffleVectorSDNode::commuteMask(NewMask);
21033	LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
21034	}
21035
21036	if (LegalMask) {
21037	SV0 = DAG.getBitcast(ScaleVT, SV0);
21038	SV1 = DAG.getBitcast(ScaleVT, SV1);
21039	return DAG.getBitcast(
21040	VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
21041	}
21042	}
21043	}
21044	}
21045
21046	// Compute the combined shuffle mask for a shuffle with SV0 as the first
21047	// operand, and SV1 as the second operand.
21048	// i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
21049	// Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
21050	auto MergeInnerShuffle =
21051	[NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
21052	ShuffleVectorSDNode *OtherSVN, SDValue N1,
21053	const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
21054	SmallVectorImpl<int> &Mask) -> bool {
21055	// Don't try to fold splats; they're likely to simplify somehow, or they
21056	// might be free.
21057	if (OtherSVN->isSplat())
21058	return false;
21059
21060	SV0 = SV1 = SDValue();
21061	Mask.clear();
21062
21063	for (unsigned i = 0; i != NumElts; ++i) {
21064	int Idx = SVN->getMaskElt(i);
21065	if (Idx < 0) {
21066	// Propagate Undef.
21067	Mask.push_back(Idx);
21068	continue;
21069	}
21070
21071	if (Commute)
21072	Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
21073
21074	SDValue CurrentVec;
21075	if (Idx < (int)NumElts) {
21076	// This shuffle index refers to the inner shuffle N0. Lookup the inner
21077	// shuffle mask to identify which vector is actually referenced.
21078	Idx = OtherSVN->getMaskElt(Idx);
21079	if (Idx < 0) {
21080	// Propagate Undef.
21081	Mask.push_back(Idx);
21082	continue;
21083	}
21084	CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
21085	: OtherSVN->getOperand(1);
21086	} else {
21087	// This shuffle index references an element within N1.
21088	CurrentVec = N1;
21089	}
21090
21091	// Simple case where 'CurrentVec' is UNDEF.
21092	if (CurrentVec.isUndef()) {
21093	Mask.push_back(-1);
21094	continue;
21095	}
21096
21097	// Canonicalize the shuffle index. We don't know yet if CurrentVec
21098	// will be the first or second operand of the combined shuffle.
21099	Idx = Idx % NumElts;
21100	if (!SV0.getNode() \|\| SV0 == CurrentVec) {
21101	// Ok. CurrentVec is the left hand side.
21102	// Update the mask accordingly.
21103	SV0 = CurrentVec;
21104	Mask.push_back(Idx);
21105	continue;
21106	}
21107	if (!SV1.getNode() \|\| SV1 == CurrentVec) {
21108	// Ok. CurrentVec is the right hand side.
21109	// Update the mask accordingly.
21110	SV1 = CurrentVec;
21111	Mask.push_back(Idx + NumElts);
21112	continue;
21113	}
21114
21115	// Last chance - see if the vector is another shuffle and if it
21116	// uses one of the existing candidate shuffle ops.
21117	if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
21118	int InnerIdx = CurrentSVN->getMaskElt(Idx);
21119	if (InnerIdx < 0) {
21120	Mask.push_back(-1);
21121	continue;
21122	}
21123	SDValue InnerVec = (InnerIdx < (int)NumElts)
21124	? CurrentSVN->getOperand(0)
21125	: CurrentSVN->getOperand(1);
21126	if (InnerVec.isUndef()) {
21127	Mask.push_back(-1);
21128	continue;
21129	}
21130	InnerIdx %= NumElts;
21131	if (InnerVec == SV0) {
21132	Mask.push_back(InnerIdx);
21133	continue;
21134	}
21135	if (InnerVec == SV1) {
21136	Mask.push_back(InnerIdx + NumElts);
21137	continue;
21138	}
21139	}
21140
21141	// Bail out if we cannot convert the shuffle pair into a single shuffle.
21142	return false;
21143	}
21144
21145	if (llvm::all_of(Mask, [](int M) { return M < 0; }))
21146	return true;
21147
21148	// Avoid introducing shuffles with illegal mask.
21149	// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
21150	// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
21151	// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
21152	// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
21153	// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
21154	// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
21155	if (TLI.isShuffleMaskLegal(Mask, VT))
21156	return true;
21157
21158	std::swap(SV0, SV1);
21159	ShuffleVectorSDNode::commuteMask(Mask);
21160	return TLI.isShuffleMaskLegal(Mask, VT);
21161	};
21162
21163	if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
21164	// Canonicalize shuffles according to rules:
21165	// shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
21166	// shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
21167	// shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
21168	if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
21169	N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
21170	// The incoming shuffle must be of the same type as the result of the
21171	// current shuffle.
21172	assert(N1->getOperand(0).getValueType() == VT &&((N1->getOperand(0).getValueType() == VT && "Shuffle types don't match" ) ? static_cast<void> (0) : __assert_fail ("N1->getOperand(0).getValueType() == VT && \"Shuffle types don't match\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 21173, __PRETTY_FUNCTION__))
21173	"Shuffle types don't match")((N1->getOperand(0).getValueType() == VT && "Shuffle types don't match" ) ? static_cast<void> (0) : __assert_fail ("N1->getOperand(0).getValueType() == VT && \"Shuffle types don't match\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 21173, __PRETTY_FUNCTION__));
21174
21175	SDValue SV0 = N1->getOperand(0);
21176	SDValue SV1 = N1->getOperand(1);
21177	bool HasSameOp0 = N0 == SV0;
21178	bool IsSV1Undef = SV1.isUndef();
21179	if (HasSameOp0 \|\| IsSV1Undef \|\| N0 == SV1)
21180	// Commute the operands of this shuffle so merging below will trigger.
21181	return DAG.getCommutedVectorShuffle(*SVN);
21182	}
21183
21184	// Canonicalize splat shuffles to the RHS to improve merging below.
21185	// shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
21186	if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
21187	N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
21188	cast<ShuffleVectorSDNode>(N0)->isSplat() &&
21189	!cast<ShuffleVectorSDNode>(N1)->isSplat()) {
21190	return DAG.getCommutedVectorShuffle(*SVN);
21191	}
21192
21193	// Try to fold according to rules:
21194	// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
21195	// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
21196	// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
21197	// Don't try to fold shuffles with illegal type.
21198	// Only fold if this shuffle is the only user of the other shuffle.
21199	// Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
21200	for (int i = 0; i != 2; ++i) {
21201	if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
21202	N->isOnlyUserOf(N->getOperand(i).getNode())) {
21203	// The incoming shuffle must be of the same type as the result of the
21204	// current shuffle.
21205	auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
21206	assert(OtherSV->getOperand(0).getValueType() == VT &&((OtherSV->getOperand(0).getValueType() == VT && "Shuffle types don't match" ) ? static_cast<void> (0) : __assert_fail ("OtherSV->getOperand(0).getValueType() == VT && \"Shuffle types don't match\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 21207, __PRETTY_FUNCTION__))
21207	"Shuffle types don't match")((OtherSV->getOperand(0).getValueType() == VT && "Shuffle types don't match" ) ? static_cast<void> (0) : __assert_fail ("OtherSV->getOperand(0).getValueType() == VT && \"Shuffle types don't match\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 21207, __PRETTY_FUNCTION__));
21208
21209	SDValue SV0, SV1;
21210	SmallVector<int, 4> Mask;
21211	if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
21212	SV0, SV1, Mask)) {
21213	// Check if all indices in Mask are Undef. In case, propagate Undef.
21214	if (llvm::all_of(Mask, [](int M) { return M < 0; }))
21215	return DAG.getUNDEF(VT);
21216
21217	return DAG.getVectorShuffle(VT, SDLoc(N),
21218	SV0 ? SV0 : DAG.getUNDEF(VT),
21219	SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
21220	}
21221	}
21222	}
21223
21224	// Merge shuffles through binops if we are able to merge it with at least
21225	// one other shuffles.
21226	// shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
21227	unsigned SrcOpcode = N0.getOpcode();
21228	if (SrcOpcode == N1.getOpcode() && TLI.isBinOp(SrcOpcode) &&
21229	N->isOnlyUserOf(N0.getNode()) && N->isOnlyUserOf(N1.getNode())) {
21230	SDValue Op00 = N0.getOperand(0);
21231	SDValue Op10 = N1.getOperand(0);
21232	SDValue Op01 = N0.getOperand(1);
21233	SDValue Op11 = N1.getOperand(1);
21234	// TODO: We might be able to relax the VT check but we don't currently
21235	// have any isBinOp() that has different result/ops VTs so play safe until
21236	// we have test coverage.
21237	if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
21238	Op01.getValueType() == VT && Op11.getValueType() == VT &&
21239	(Op00.getOpcode() == ISD::VECTOR_SHUFFLE \|\|
21240	Op10.getOpcode() == ISD::VECTOR_SHUFFLE \|\|
21241	Op01.getOpcode() == ISD::VECTOR_SHUFFLE \|\|
21242	Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
21243	auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
21244	SmallVectorImpl<int> &Mask, bool LeftOp,
21245	bool Commute) {
21246	SDValue InnerN = Commute ? N1 : N0;
21247	SDValue Op0 = LeftOp ? Op00 : Op01;
21248	SDValue Op1 = LeftOp ? Op10 : Op11;
21249	if (Commute)
21250	std::swap(Op0, Op1);
21251	return Op0.getOpcode() == ISD::VECTOR_SHUFFLE &&
21252	InnerN->isOnlyUserOf(Op0.getNode()) &&
21253	MergeInnerShuffle(Commute, SVN, cast<ShuffleVectorSDNode>(Op0),
21254	Op1, TLI, SV0, SV1, Mask) &&
21255	llvm::none_of(Mask, [](int M) { return M < 0; });
21256	};
21257
21258	// Ensure we don't increase the number of shuffles - we must merge a
21259	// shuffle from at least one of the LHS and RHS ops.
21260	bool MergedLeft = false;
21261	SDValue LeftSV0, LeftSV1;
21262	SmallVector<int, 4> LeftMask;
21263	if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) \|\|
21264	CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
21265	MergedLeft = true;
21266	} else {
21267	LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
21268	LeftSV0 = Op00, LeftSV1 = Op10;
21269	}
21270
21271	bool MergedRight = false;
21272	SDValue RightSV0, RightSV1;
21273	SmallVector<int, 4> RightMask;
21274	if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) \|\|
21275	CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
21276	MergedRight = true;
21277	} else {
21278	RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
21279	RightSV0 = Op01, RightSV1 = Op11;
21280	}
21281
21282	if (MergedLeft \|\| MergedRight) {
21283	SDLoc DL(N);
21284	SDValue LHS = DAG.getVectorShuffle(
21285	VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
21286	LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
21287	SDValue RHS = DAG.getVectorShuffle(
21288	VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
21289	RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
21290	return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
21291	}
21292	}
21293	}
21294	}
21295
21296	if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
21297	return V;
21298
21299	return SDValue();
21300	}
21301
21302	SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
21303	SDValue InVal = N->getOperand(0);
21304	EVT VT = N->getValueType(0);
21305
21306	// Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
21307	// with a VECTOR_SHUFFLE and possible truncate.
21308	if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21309	VT.isFixedLengthVector() &&
21310	InVal->getOperand(0).getValueType().isFixedLengthVector()) {
21311	SDValue InVec = InVal->getOperand(0);
21312	SDValue EltNo = InVal->getOperand(1);
21313	auto InVecT = InVec.getValueType();
21314	if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
21315	SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
21316	int Elt = C0->getZExtValue();
21317	NewMask[0] = Elt;
21318	// If we have an implict truncate do truncate here as long as it's legal.
21319	// if it's not legal, this should
21320	if (VT.getScalarType() != InVal.getValueType() &&
21321	InVal.getValueType().isScalarInteger() &&
21322	isTypeLegal(VT.getScalarType())) {
21323	SDValue Val =
21324	DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
21325	return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
21326	}
21327	if (VT.getScalarType() == InVecT.getScalarType() &&
21328	VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
21329	SDValue LegalShuffle =
21330	TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
21331	DAG.getUNDEF(InVecT), NewMask, DAG);
21332	if (LegalShuffle) {
21333	// If the initial vector is the correct size this shuffle is a
21334	// valid result.
21335	if (VT == InVecT)
21336	return LegalShuffle;
21337	// If not we must truncate the vector.
21338	if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
21339	SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
21340	EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
21341	InVecT.getVectorElementType(),
21342	VT.getVectorNumElements());
21343	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
21344	LegalShuffle, ZeroIdx);
21345	}
21346	}
21347	}
21348	}
21349	}
21350
21351	return SDValue();
21352	}
21353
21354	SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
21355	EVT VT = N->getValueType(0);
21356	SDValue N0 = N->getOperand(0);
21357	SDValue N1 = N->getOperand(1);
21358	SDValue N2 = N->getOperand(2);
21359	uint64_t InsIdx = N->getConstantOperandVal(2);
21360
21361	// If inserting an UNDEF, just return the original vector.
21362	if (N1.isUndef())
21363	return N0;
21364
21365	// If this is an insert of an extracted vector into an undef vector, we can
21366	// just use the input to the extract.
21367	if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
21368	N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
21369	return N1.getOperand(0);
21370
21371	// If we are inserting a bitcast value into an undef, with the same
21372	// number of elements, just use the bitcast input of the extract.
21373	// i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
21374	// BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
21375	if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
21376	N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
21377	N1.getOperand(0).getOperand(1) == N2 &&
21378	N1.getOperand(0).getOperand(0).getValueType().getVectorElementCount() ==
21379	VT.getVectorElementCount() &&
21380	N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
21381	VT.getSizeInBits()) {
21382	return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
21383	}
21384
21385	// If both N1 and N2 are bitcast values on which insert_subvector
21386	// would makes sense, pull the bitcast through.
21387	// i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
21388	// BITCAST (INSERT_SUBVECTOR N0 N1 N2)
21389	if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
21390	SDValue CN0 = N0.getOperand(0);
21391	SDValue CN1 = N1.getOperand(0);
21392	EVT CN0VT = CN0.getValueType();
21393	EVT CN1VT = CN1.getValueType();
21394	if (CN0VT.isVector() && CN1VT.isVector() &&
21395	CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
21396	CN0VT.getVectorElementCount() == VT.getVectorElementCount()) {
21397	SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
21398	CN0.getValueType(), CN0, CN1, N2);
21399	return DAG.getBitcast(VT, NewINSERT);
21400	}
21401	}
21402
21403	// Combine INSERT_SUBVECTORs where we are inserting to the same index.
21404	// INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
21405	// --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
21406	if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
21407	N0.getOperand(1).getValueType() == N1.getValueType() &&
21408	N0.getOperand(2) == N2)
21409	return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
21410	N1, N2);
21411
21412	// Eliminate an intermediate insert into an undef vector:
21413	// insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
21414	// insert_subvector undef, X, N2
21415	if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
21416	N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
21417	return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
21418	N1.getOperand(1), N2);
21419
21420	// Push subvector bitcasts to the output, adjusting the index as we go.
21421	// insert_subvector(bitcast(v), bitcast(s), c1)
21422	// -> bitcast(insert_subvector(v, s, c2))
21423	if ((N0.isUndef() \|\| N0.getOpcode() == ISD::BITCAST) &&
21424	N1.getOpcode() == ISD::BITCAST) {
21425	SDValue N0Src = peekThroughBitcasts(N0);
21426	SDValue N1Src = peekThroughBitcasts(N1);
21427	EVT N0SrcSVT = N0Src.getValueType().getScalarType();
21428	EVT N1SrcSVT = N1Src.getValueType().getScalarType();
21429	if ((N0.isUndef() \|\| N0SrcSVT == N1SrcSVT) &&
21430	N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
21431	EVT NewVT;
21432	SDLoc DL(N);
21433	SDValue NewIdx;
21434	LLVMContext &Ctx = *DAG.getContext();
21435	ElementCount NumElts = VT.getVectorElementCount();
21436	unsigned EltSizeInBits = VT.getScalarSizeInBits();
21437	if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
21438	unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
21439	NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
21440	NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
21441	} else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
21442	unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
21443	if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
21444	NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
21445	NumElts.divideCoefficientBy(Scale));
21446	NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
21447	}
21448	}
21449	if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
21450	SDValue Res = DAG.getBitcast(NewVT, N0Src);
21451	Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
21452	return DAG.getBitcast(VT, Res);
21453	}
21454	}
21455	}
21456
21457	// Canonicalize insert_subvector dag nodes.
21458	// Example:
21459	// (insert_subvector (insert_subvector A, Idx0), Idx1)
21460	// -> (insert_subvector (insert_subvector A, Idx1), Idx0)
21461	if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
21462	N1.getValueType() == N0.getOperand(1).getValueType()) {
21463	unsigned OtherIdx = N0.getConstantOperandVal(2);
21464	if (InsIdx < OtherIdx) {
21465	// Swap nodes.
21466	SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
21467	N0.getOperand(0), N1, N2);
21468	AddToWorklist(NewOp.getNode());
21469	return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
21470	VT, NewOp, N0.getOperand(1), N0.getOperand(2));
21471	}
21472	}
21473
21474	// If the input vector is a concatenation, and the insert replaces
21475	// one of the pieces, we can optimize into a single concat_vectors.
21476	if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
21477	N0.getOperand(0).getValueType() == N1.getValueType() &&
21478	N0.getOperand(0).getValueType().isScalableVector() ==
21479	N1.getValueType().isScalableVector()) {
21480	unsigned Factor = N1.getValueType().getVectorMinNumElements();
21481	SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
21482	Ops[InsIdx / Factor] = N1;
21483	return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
21484	}
21485
21486	// Simplify source operands based on insertion.
21487	if (SimplifyDemandedVectorElts(SDValue(N, 0)))
21488	return SDValue(N, 0);
21489
21490	return SDValue();
21491	}
21492
21493	SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
21494	SDValue N0 = N->getOperand(0);
21495
21496	// fold (fp_to_fp16 (fp16_to_fp op)) -> op
21497	if (N0->getOpcode() == ISD::FP16_TO_FP)
21498	return N0->getOperand(0);
21499
21500	return SDValue();
21501	}
21502
21503	SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
21504	SDValue N0 = N->getOperand(0);
21505
21506	// fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
21507	if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
21508	ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
21509	if (AndConst && AndConst->getAPIntValue() == 0xffff) {
21510	return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
21511	N0.getOperand(0));
21512	}
21513	}
21514
21515	return SDValue();
21516	}
21517
21518	SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
21519	SDValue N0 = N->getOperand(0);
21520	EVT VT = N0.getValueType();
21521	unsigned Opcode = N->getOpcode();
21522
21523	// VECREDUCE over 1-element vector is just an extract.
21524	if (VT.getVectorElementCount().isScalar()) {
21525	SDLoc dl(N);
21526	SDValue Res =
21527	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
21528	DAG.getVectorIdxConstant(0, dl));
21529	if (Res.getValueType() != N->getValueType(0))
21530	Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
21531	return Res;
21532	}
21533
21534	// On an boolean vector an and/or reduction is the same as a umin/umax
21535	// reduction. Convert them if the latter is legal while the former isn't.
21536	if (Opcode == ISD::VECREDUCE_AND \|\| Opcode == ISD::VECREDUCE_OR) {
21537	unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
21538	? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
21539	if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
21540	TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
21541	DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
21542	return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
21543	}
21544
21545	return SDValue();
21546	}
21547
21548	/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
21549	/// with the destination vector and a zero vector.
21550	/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
21551	/// vector_shuffle V, Zero, <0, 4, 2, 4>
21552	SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
21553	assert(N->getOpcode() == ISD::AND && "Unexpected opcode!")((N->getOpcode() == ISD::AND && "Unexpected opcode!" ) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"Unexpected opcode!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 21553, __PRETTY_FUNCTION__));
21554
21555	EVT VT = N->getValueType(0);
21556	SDValue LHS = N->getOperand(0);
21557	SDValue RHS = peekThroughBitcasts(N->getOperand(1));
21558	SDLoc DL(N);
21559
21560	// Make sure we're not running after operation legalization where it
21561	// may have custom lowered the vector shuffles.
21562	if (LegalOperations)
21563	return SDValue();
21564
21565	if (RHS.getOpcode() != ISD::BUILD_VECTOR)
21566	return SDValue();
21567
21568	EVT RVT = RHS.getValueType();
21569	unsigned NumElts = RHS.getNumOperands();
21570
21571	// Attempt to create a valid clear mask, splitting the mask into
21572	// sub elements and checking to see if each is
21573	// all zeros or all ones - suitable for shuffle masking.
21574	auto BuildClearMask = [&](int Split) {
21575	int NumSubElts = NumElts * Split;
21576	int NumSubBits = RVT.getScalarSizeInBits() / Split;
21577
21578	SmallVector<int, 8> Indices;
21579	for (int i = 0; i != NumSubElts; ++i) {
21580	int EltIdx = i / Split;
21581	int SubIdx = i % Split;
21582	SDValue Elt = RHS.getOperand(EltIdx);
21583	// X & undef --> 0 (not undef). So this lane must be converted to choose
21584	// from the zero constant vector (same as if the element had all 0-bits).
21585	if (Elt.isUndef()) {
21586	Indices.push_back(i + NumSubElts);
21587	continue;
21588	}
21589
21590	APInt Bits;
21591	if (isa<ConstantSDNode>(Elt))
21592	Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
21593	else if (isa<ConstantFPSDNode>(Elt))
21594	Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
21595	else
21596	return SDValue();
21597
21598	// Extract the sub element from the constant bit mask.
21599	if (DAG.getDataLayout().isBigEndian())
21600	Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
21601	else
21602	Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
21603
21604	if (Bits.isAllOnesValue())
21605	Indices.push_back(i);
21606	else if (Bits == 0)
21607	Indices.push_back(i + NumSubElts);
21608	else
21609	return SDValue();
21610	}
21611
21612	// Let's see if the target supports this vector_shuffle.
21613	EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
21614	EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
21615	if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
21616	return SDValue();
21617
21618	SDValue Zero = DAG.getConstant(0, DL, ClearVT);
21619	return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
21620	DAG.getBitcast(ClearVT, LHS),
21621	Zero, Indices));
21622	};
21623
21624	// Determine maximum split level (byte level masking).
21625	int MaxSplit = 1;
21626	if (RVT.getScalarSizeInBits() % 8 == 0)
21627	MaxSplit = RVT.getScalarSizeInBits() / 8;
21628
21629	for (int Split = 1; Split <= MaxSplit; ++Split)
21630	if (RVT.getScalarSizeInBits() % Split == 0)
21631	if (SDValue S = BuildClearMask(Split))
21632	return S;
21633
21634	return SDValue();
21635	}
21636
21637	/// If a vector binop is performed on splat values, it may be profitable to
21638	/// extract, scalarize, and insert/splat.
21639	static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
21640	SDValue N0 = N->getOperand(0);
21641	SDValue N1 = N->getOperand(1);
21642	unsigned Opcode = N->getOpcode();
21643	EVT VT = N->getValueType(0);
21644	EVT EltVT = VT.getVectorElementType();
21645	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21646
21647	// TODO: Remove/replace the extract cost check? If the elements are available
21648	// as scalars, then there may be no extract cost. Should we ask if
21649	// inserting a scalar back into a vector is cheap instead?
21650	int Index0, Index1;
21651	SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
21652	SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
21653	if (!Src0 \|\| !Src1 \|\| Index0 != Index1 \|\|
21654	Src0.getValueType().getVectorElementType() != EltVT \|\|
21655	Src1.getValueType().getVectorElementType() != EltVT \|\|
21656	!TLI.isExtractVecEltCheap(VT, Index0) \|\|
21657	!TLI.isOperationLegalOrCustom(Opcode, EltVT))
21658	return SDValue();
21659
21660	SDLoc DL(N);
21661	SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
21662	SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
21663	SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
21664	SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
21665
21666	// If all lanes but 1 are undefined, no need to splat the scalar result.
21667	// TODO: Keep track of undefs and use that info in the general case.
21668	if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
21669	count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
21670	count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
21671	// bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
21672	// build_vec ..undef, (bo X, Y), undef...
21673	SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
21674	Ops[Index0] = ScalarBO;
21675	return DAG.getBuildVector(VT, DL, Ops);
21676	}
21677
21678	// bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
21679	SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
21680	return DAG.getBuildVector(VT, DL, Ops);
21681	}
21682
21683	/// Visit a binary vector operation, like ADD.
21684	SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
21685	assert(N->getValueType(0).isVector() &&((N->getValueType(0).isVector() && "SimplifyVBinOp only works on vectors!" ) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0).isVector() && \"SimplifyVBinOp only works on vectors!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 21686, __PRETTY_FUNCTION__))
21686	"SimplifyVBinOp only works on vectors!")((N->getValueType(0).isVector() && "SimplifyVBinOp only works on vectors!" ) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0).isVector() && \"SimplifyVBinOp only works on vectors!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 21686, __PRETTY_FUNCTION__));
21687
21688	SDValue LHS = N->getOperand(0);
21689	SDValue RHS = N->getOperand(1);
21690	SDValue Ops[] = {LHS, RHS};
21691	EVT VT = N->getValueType(0);
21692	unsigned Opcode = N->getOpcode();
21693	SDNodeFlags Flags = N->getFlags();
21694
21695	// See if we can constant fold the vector operation.
21696	if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
21697	Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
21698	return Fold;
21699
21700	// Move unary shuffles with identical masks after a vector binop:
21701	// VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
21702	// --> shuffle (VBinOp A, B), Undef, Mask
21703	// This does not require type legality checks because we are creating the
21704	// same types of operations that are in the original sequence. We do have to
21705	// restrict ops like integer div that have immediate UB (eg, div-by-zero)
21706	// though. This code is adapted from the identical transform in instcombine.
21707	if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
21708	Opcode != ISD::UREM && Opcode != ISD::SREM &&
21709	Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
21710	auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
21711	auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
21712	if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
21713	LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
21714	(LHS.hasOneUse() \|\| RHS.hasOneUse() \|\| LHS == RHS)) {
21715	SDLoc DL(N);
21716	SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
21717	RHS.getOperand(0), Flags);
21718	SDValue UndefV = LHS.getOperand(1);
21719	return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
21720	}
21721
21722	// Try to sink a splat shuffle after a binop with a uniform constant.
21723	// This is limited to cases where neither the shuffle nor the constant have
21724	// undefined elements because that could be poison-unsafe or inhibit
21725	// demanded elements analysis. It is further limited to not change a splat
21726	// of an inserted scalar because that may be optimized better by
21727	// load-folding or other target-specific behaviors.
21728	if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) &&
21729	Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
21730	Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
21731	// binop (splat X), (splat C) --> splat (binop X, C)
21732	SDLoc DL(N);
21733	SDValue X = Shuf0->getOperand(0);
21734	SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
21735	return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
21736	Shuf0->getMask());
21737	}
21738	if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) &&
21739	Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
21740	Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
21741	// binop (splat C), (splat X) --> splat (binop C, X)
21742	SDLoc DL(N);
21743	SDValue X = Shuf1->getOperand(0);
21744	SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
21745	return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
21746	Shuf1->getMask());
21747	}
21748	}
21749
21750	// The following pattern is likely to emerge with vector reduction ops. Moving
21751	// the binary operation ahead of insertion may allow using a narrower vector
21752	// instruction that has better performance than the wide version of the op:
21753	// VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
21754	if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
21755	RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
21756	LHS.getOperand(2) == RHS.getOperand(2) &&
21757	(LHS.hasOneUse() \|\| RHS.hasOneUse())) {
21758	SDValue X = LHS.getOperand(1);
21759	SDValue Y = RHS.getOperand(1);
21760	SDValue Z = LHS.getOperand(2);
21761	EVT NarrowVT = X.getValueType();
21762	if (NarrowVT == Y.getValueType() &&
21763	TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
21764	LegalOperations)) {
21765	// (binop undef, undef) may not return undef, so compute that result.
21766	SDLoc DL(N);
21767	SDValue VecC =
21768	DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
21769	SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
21770	return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
21771	}
21772	}
21773
21774	// Make sure all but the first op are undef or constant.
21775	auto ConcatWithConstantOrUndef = [](SDValue Concat) {
21776	return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
21777	all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
21778	return Op.isUndef() \|\|
21779	ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
21780	});
21781	};
21782
21783	// The following pattern is likely to emerge with vector reduction ops. Moving
21784	// the binary operation ahead of the concat may allow using a narrower vector
21785	// instruction that has better performance than the wide version of the op:
21786	// VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
21787	// concat (VBinOp X, Y), VecC
21788	if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
21789	(LHS.hasOneUse() \|\| RHS.hasOneUse())) {
21790	EVT NarrowVT = LHS.getOperand(0).getValueType();
21791	if (NarrowVT == RHS.getOperand(0).getValueType() &&
21792	TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
21793	SDLoc DL(N);
21794	unsigned NumOperands = LHS.getNumOperands();
21795	SmallVector<SDValue, 4> ConcatOps;
21796	for (unsigned i = 0; i != NumOperands; ++i) {
21797	// This constant fold for operands 1 and up.
21798	ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
21799	RHS.getOperand(i)));
21800	}
21801
21802	return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
21803	}
21804	}
21805
21806	if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
21807	return V;
21808
21809	return SDValue();
21810	}
21811
21812	SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
21813	SDValue N2) {
21814	assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!")((N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!" ) ? static_cast<void> (0) : __assert_fail ("N0.getOpcode() ==ISD::SETCC && \"First argument must be a SetCC node!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 21814, __PRETTY_FUNCTION__));
21815
21816	SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
21817	cast<CondCodeSDNode>(N0.getOperand(2))->get());
21818
21819	// If we got a simplified select_cc node back from SimplifySelectCC, then
21820	// break it down into a new SETCC node, and a new SELECT node, and then return
21821	// the SELECT node, since we were called with a SELECT node.
21822	if (SCC.getNode()) {
21823	// Check to see if we got a select_cc back (to turn into setcc/select).
21824	// Otherwise, just return whatever node we got back, like fabs.
21825	if (SCC.getOpcode() == ISD::SELECT_CC) {
21826	const SDNodeFlags Flags = N0.getNode()->getFlags();
21827	SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
21828	N0.getValueType(),
21829	SCC.getOperand(0), SCC.getOperand(1),
21830	SCC.getOperand(4), Flags);
21831	AddToWorklist(SETCC.getNode());
21832	SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
21833	SCC.getOperand(2), SCC.getOperand(3));
21834	SelectNode->setFlags(Flags);
21835	return SelectNode;
21836	}
21837
21838	return SCC;
21839	}
21840	return SDValue();
21841	}
21842
21843	/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
21844	/// being selected between, see if we can simplify the select. Callers of this
21845	/// should assume that TheSelect is deleted if this returns true. As such, they
21846	/// should return the appropriate thing (e.g. the node) back to the top-level of
21847	/// the DAG combiner loop to avoid it being looked at.
21848	bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
21849	SDValue RHS) {
21850	// fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
21851	// The select + setcc is redundant, because fsqrt returns NaN for X < 0.
21852	if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
21853	if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
21854	// We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
21855	SDValue Sqrt = RHS;
21856	ISD::CondCode CC;
21857	SDValue CmpLHS;
21858	const ConstantFPSDNode *Zero = nullptr;
21859
21860	if (TheSelect->getOpcode() == ISD::SELECT_CC) {
21861	CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
21862	CmpLHS = TheSelect->getOperand(0);
21863	Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
21864	} else {
21865	// SELECT or VSELECT
21866	SDValue Cmp = TheSelect->getOperand(0);
21867	if (Cmp.getOpcode() == ISD::SETCC) {
21868	CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
21869	CmpLHS = Cmp.getOperand(0);
21870	Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
21871	}
21872	}
21873	if (Zero && Zero->isZero() &&
21874	Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT \|\|
21875	CC == ISD::SETULT \|\| CC == ISD::SETLT)) {
21876	// We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
21877	CombineTo(TheSelect, Sqrt);
21878	return true;
21879	}
21880	}
21881	}
21882	// Cannot simplify select with vector condition
21883	if (TheSelect->getOperand(0).getValueType().isVector()) return false;
21884
21885	// If this is a select from two identical things, try to pull the operation
21886	// through the select.
21887	if (LHS.getOpcode() != RHS.getOpcode() \|\|
21888	!LHS.hasOneUse() \|\| !RHS.hasOneUse())
21889	return false;
21890
21891	// If this is a load and the token chain is identical, replace the select
21892	// of two loads with a load through a select of the address to load from.
21893	// This triggers in things like "select bool X, 10.0, 123.0" after the FP
21894	// constants have been dropped into the constant pool.
21895	if (LHS.getOpcode() == ISD::LOAD) {
21896	LoadSDNode *LLD = cast<LoadSDNode>(LHS);
21897	LoadSDNode *RLD = cast<LoadSDNode>(RHS);
21898
21899	// Token chains must be identical.
21900	if (LHS.getOperand(0) != RHS.getOperand(0) \|\|
21901	// Do not let this transformation reduce the number of volatile loads.
21902	// Be conservative for atomics for the moment
21903	// TODO: This does appear to be legal for unordered atomics (see D66309)
21904	!LLD->isSimple() \|\| !RLD->isSimple() \|\|
21905	// FIXME: If either is a pre/post inc/dec load,
21906	// we'd need to split out the address adjustment.
21907	LLD->isIndexed() \|\| RLD->isIndexed() \|\|
21908	// If this is an EXTLOAD, the VT's must match.
21909	LLD->getMemoryVT() != RLD->getMemoryVT() \|\|
21910	// If this is an EXTLOAD, the kind of extension must match.
21911	(LLD->getExtensionType() != RLD->getExtensionType() &&
21912	// The only exception is if one of the extensions is anyext.
21913	LLD->getExtensionType() != ISD::EXTLOAD &&
21914	RLD->getExtensionType() != ISD::EXTLOAD) \|\|
21915	// FIXME: this discards src value information. This is
21916	// over-conservative. It would be beneficial to be able to remember
21917	// both potential memory locations. Since we are discarding
21918	// src value info, don't do the transformation if the memory
21919	// locations are not in the default address space.
21920	LLD->getPointerInfo().getAddrSpace() != 0 \|\|
21921	RLD->getPointerInfo().getAddrSpace() != 0 \|\|
21922	// We can't produce a CMOV of a TargetFrameIndex since we won't
21923	// generate the address generation required.
21924	LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex \|\|
21925	RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex \|\|
21926	!TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
21927	LLD->getBasePtr().getValueType()))
21928	return false;
21929
21930	// The loads must not depend on one another.
21931	if (LLD->isPredecessorOf(RLD) \|\| RLD->isPredecessorOf(LLD))
21932	return false;
21933
21934	// Check that the select condition doesn't reach either load. If so,
21935	// folding this will induce a cycle into the DAG. If not, this is safe to
21936	// xform, so create a select of the addresses.
21937
21938	SmallPtrSet<const SDNode *, 32> Visited;
21939	SmallVector<const SDNode *, 16> Worklist;
21940
21941	// Always fail if LLD and RLD are not independent. TheSelect is a
21942	// predecessor to all Nodes in question so we need not search past it.
21943
21944	Visited.insert(TheSelect);
21945	Worklist.push_back(LLD);
21946	Worklist.push_back(RLD);
21947
21948	if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) \|\|
21949	SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
21950	return false;
21951
21952	SDValue Addr;
21953	if (TheSelect->getOpcode() == ISD::SELECT) {
21954	// We cannot do this optimization if any pair of {RLD, LLD} is a
21955	// predecessor to {RLD, LLD, CondNode}. As we've already compared the
21956	// Loads, we only need to check if CondNode is a successor to one of the
21957	// loads. We can further avoid this if there's no use of their chain
21958	// value.
21959	SDNode *CondNode = TheSelect->getOperand(0).getNode();
21960	Worklist.push_back(CondNode);
21961
21962	if ((LLD->hasAnyUseOfValue(1) &&
21963	SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) \|\|
21964	(RLD->hasAnyUseOfValue(1) &&
21965	SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
21966	return false;
21967
21968	Addr = DAG.getSelect(SDLoc(TheSelect),
21969	LLD->getBasePtr().getValueType(),
21970	TheSelect->getOperand(0), LLD->getBasePtr(),
21971	RLD->getBasePtr());
21972	} else { // Otherwise SELECT_CC
21973	// We cannot do this optimization if any pair of {RLD, LLD} is a
21974	// predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
21975	// the Loads, we only need to check if CondLHS/CondRHS is a successor to
21976	// one of the loads. We can further avoid this if there's no use of their
21977	// chain value.
21978
21979	SDNode *CondLHS = TheSelect->getOperand(0).getNode();
21980	SDNode *CondRHS = TheSelect->getOperand(1).getNode();
21981	Worklist.push_back(CondLHS);
21982	Worklist.push_back(CondRHS);
21983
21984	if ((LLD->hasAnyUseOfValue(1) &&
21985	SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) \|\|
21986	(RLD->hasAnyUseOfValue(1) &&
21987	SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
21988	return false;
21989
21990	Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
21991	LLD->getBasePtr().getValueType(),
21992	TheSelect->getOperand(0),
21993	TheSelect->getOperand(1),
21994	LLD->getBasePtr(), RLD->getBasePtr(),
21995	TheSelect->getOperand(4));
21996	}
21997
21998	SDValue Load;
21999	// It is safe to replace the two loads if they have different alignments,
22000	// but the new load must be the minimum (most restrictive) alignment of the
22001	// inputs.
22002	Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
22003	MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
22004	if (!RLD->isInvariant())
22005	MMOFlags &= ~MachineMemOperand::MOInvariant;
22006	if (!RLD->isDereferenceable())
22007	MMOFlags &= ~MachineMemOperand::MODereferenceable;
22008	if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
22009	// FIXME: Discards pointer and AA info.
22010	Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
22011	LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
22012	MMOFlags);
22013	} else {
22014	// FIXME: Discards pointer and AA info.
22015	Load = DAG.getExtLoad(
22016	LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
22017	: LLD->getExtensionType(),
22018	SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
22019	MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
22020	}
22021
22022	// Users of the select now use the result of the load.
22023	CombineTo(TheSelect, Load);
22024
22025	// Users of the old loads now use the new load's chain. We know the
22026	// old-load value is dead now.
22027	CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
22028	CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
22029	return true;
22030	}
22031
22032	return false;
22033	}
22034
22035	/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
22036	/// bitwise 'and'.
22037	SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
22038	SDValue N1, SDValue N2, SDValue N3,
22039	ISD::CondCode CC) {
22040	// If this is a select where the false operand is zero and the compare is a
22041	// check of the sign bit, see if we can perform the "gzip trick":
22042	// select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
22043	// select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
22044	EVT XType = N0.getValueType();
22045	EVT AType = N2.getValueType();
22046	if (!isNullConstant(N3) \|\| !XType.bitsGE(AType))
22047	return SDValue();
22048
22049	// If the comparison is testing for a positive value, we have to invert
22050	// the sign bit mask, so only do that transform if the target has a bitwise
22051	// 'and not' instruction (the invert is free).
22052	if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
22053	// (X > -1) ? A : 0
22054	// (X > 0) ? X : 0 <-- This is canonical signed max.
22055	if (!(isAllOnesConstant(N1) \|\| (isNullConstant(N1) && N0 == N2)))
22056	return SDValue();
22057	} else if (CC == ISD::SETLT) {
22058	// (X < 0) ? A : 0
22059	// (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
22060	if (!(isNullConstant(N1) \|\| (isOneConstant(N1) && N0 == N2)))
22061	return SDValue();
22062	} else {
22063	return SDValue();
22064	}
22065
22066	// and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
22067	// constant.
22068	EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
22069	auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
22070	if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
22071	unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
22072	if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
22073	SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
22074	SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
22075	AddToWorklist(Shift.getNode());
22076
22077	if (XType.bitsGT(AType)) {
22078	Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
22079	AddToWorklist(Shift.getNode());
22080	}
22081
22082	if (CC == ISD::SETGT)
22083	Shift = DAG.getNOT(DL, Shift, AType);
22084
22085	return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
22086	}
22087	}
22088
22089	unsigned ShCt = XType.getSizeInBits() - 1;
22090	if (TLI.shouldAvoidTransformToShift(XType, ShCt))
22091	return SDValue();
22092
22093	SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
22094	SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
22095	AddToWorklist(Shift.getNode());
22096
22097	if (XType.bitsGT(AType)) {
22098	Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
22099	AddToWorklist(Shift.getNode());
22100	}
22101
22102	if (CC == ISD::SETGT)
22103	Shift = DAG.getNOT(DL, Shift, AType);
22104
22105	return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
22106	}
22107
22108	// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
22109	SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
22110	SDValue N0 = N->getOperand(0);
22111	EVT VT = N->getValueType(0);
22112	bool IsFabs = N->getOpcode() == ISD::FABS;
22113	bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
22114
22115	if (IsFree \|\| N0.getOpcode() != ISD::BITCAST \|\| !N0.hasOneUse())
22116	return SDValue();
22117
22118	SDValue Int = N0.getOperand(0);
22119	EVT IntVT = Int.getValueType();
22120
22121	// The operand to cast should be integer.
22122	if (!IntVT.isInteger() \|\| IntVT.isVector())
22123	return SDValue();
22124
22125	// (fneg (bitconvert x)) -> (bitconvert (xor x sign))
22126	// (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
22127	APInt SignMask;
22128	if (N0.getValueType().isVector()) {
22129	// For vector, create a sign mask (0x80...) or its inverse (for fabs,
22130	// 0x7f...) per element and splat it.
22131	SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
22132	if (IsFabs)
22133	SignMask = ~SignMask;
22134	SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
22135	} else {
22136	// For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
22137	SignMask = APInt::getSignMask(IntVT.getSizeInBits());
22138	if (IsFabs)
22139	SignMask = ~SignMask;
22140	}
22141	SDLoc DL(N0);
22142	Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
22143	DAG.getConstant(SignMask, DL, IntVT));
22144	AddToWorklist(Int.getNode());
22145	return DAG.getBitcast(VT, Int);
22146	}
22147
22148	/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
22149	/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
22150	/// in it. This may be a win when the constant is not otherwise available
22151	/// because it replaces two constant pool loads with one.
22152	SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
22153	const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
22154	ISD::CondCode CC) {
22155	if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
22156	return SDValue();
22157
22158	// If we are before legalize types, we want the other legalization to happen
22159	// first (for example, to avoid messing with soft float).
22160	auto *TV = dyn_cast<ConstantFPSDNode>(N2);
22161	auto *FV = dyn_cast<ConstantFPSDNode>(N3);
22162	EVT VT = N2.getValueType();
22163	if (!TV \|\| !FV \|\| !TLI.isTypeLegal(VT))
22164	return SDValue();
22165
22166	// If a constant can be materialized without loads, this does not make sense.
22167	if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal \|\|
22168	TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) \|\|
22169	TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
22170	return SDValue();
22171
22172	// If both constants have multiple uses, then we won't need to do an extra
22173	// load. The values are likely around in registers for other users.
22174	if (!TV->hasOneUse() && !FV->hasOneUse())
22175	return SDValue();
22176
22177	Constant Elts[] = { const_cast<ConstantFP>(FV->getConstantFPValue()),
22178	const_cast<ConstantFP*>(TV->getConstantFPValue()) };
22179	Type *FPTy = Elts[0]->getType();
22180	const DataLayout &TD = DAG.getDataLayout();
22181
22182	// Create a ConstantArray of the two constants.
22183	Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
22184	SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
22185	TD.getPrefTypeAlign(FPTy));
22186	Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
22187
22188	// Get offsets to the 0 and 1 elements of the array, so we can select between
22189	// them.
22190	SDValue Zero = DAG.getIntPtrConstant(0, DL);
22191	unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
22192	SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
22193	SDValue Cond =
22194	DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
22195	AddToWorklist(Cond.getNode());
22196	SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
22197	AddToWorklist(CstOffset.getNode());
22198	CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
22199	AddToWorklist(CPIdx.getNode());
22200	return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
22201	MachinePointerInfo::getConstantPool(
22202	DAG.getMachineFunction()), Alignment);
22203	}
22204
22205	/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
22206	/// where 'cond' is the comparison specified by CC.
22207	SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
22208	SDValue N2, SDValue N3, ISD::CondCode CC,
22209	bool NotExtCompare) {
22210	// (x ? y : y) -> y.
22211	if (N2 == N3) return N2;
22212
22213	EVT CmpOpVT = N0.getValueType();
22214	EVT CmpResVT = getSetCCResultType(CmpOpVT);
22215	EVT VT = N2.getValueType();
22216	auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
22217	auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
22218	auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
22219
22220	// Determine if the condition we're dealing with is constant.
22221	if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
22222	AddToWorklist(SCC.getNode());
22223	if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
22224	// fold select_cc true, x, y -> x
22225	// fold select_cc false, x, y -> y
22226	return !(SCCC->isNullValue()) ? N2 : N3;
22227	}
22228	}
22229
22230	if (SDValue V =
22231	convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
22232	return V;
22233
22234	if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
22235	return V;
22236
22237	// fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
22238	// where y is has a single bit set.
22239	// A plaintext description would be, we can turn the SELECT_CC into an AND
22240	// when the condition can be materialized as an all-ones register. Any
22241	// single bit-test can be materialized as an all-ones register with
22242	// shift-left and shift-right-arith.
22243	if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
22244	N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
22245	SDValue AndLHS = N0->getOperand(0);
22246	auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
22247	if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
22248	// Shift the tested bit over the sign bit.
22249	const APInt &AndMask = ConstAndRHS->getAPIntValue();
22250	unsigned ShCt = AndMask.getBitWidth() - 1;
22251	if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
22252	SDValue ShlAmt =
22253	DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
22254	getShiftAmountTy(AndLHS.getValueType()));
22255	SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
22256
22257	// Now arithmetic right shift it all the way over, so the result is
22258	// either all-ones, or zero.
22259	SDValue ShrAmt =
22260	DAG.getConstant(ShCt, SDLoc(Shl),
22261	getShiftAmountTy(Shl.getValueType()));
22262	SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
22263
22264	return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
22265	}
22266	}
22267	}
22268
22269	// fold select C, 16, 0 -> shl C, 4
22270	bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
22271	bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
22272
22273	if ((Fold \|\| Swap) &&
22274	TLI.getBooleanContents(CmpOpVT) ==
22275	TargetLowering::ZeroOrOneBooleanContent &&
22276	(!LegalOperations \|\| TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
22277
22278	if (Swap) {
22279	CC = ISD::getSetCCInverse(CC, CmpOpVT);
22280	std::swap(N2C, N3C);
22281	}
22282
22283	// If the caller doesn't want us to simplify this into a zext of a compare,
22284	// don't do it.
22285	if (NotExtCompare && N2C->isOne())
22286	return SDValue();
22287
22288	SDValue Temp, SCC;
22289	// zext (setcc n0, n1)
22290	if (LegalTypes) {
22291	SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
22292	if (VT.bitsLT(SCC.getValueType()))
22293	Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
22294	else
22295	Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
22296	} else {
22297	SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
22298	Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
22299	}
22300
22301	AddToWorklist(SCC.getNode());
22302	AddToWorklist(Temp.getNode());
22303
22304	if (N2C->isOne())
22305	return Temp;
22306
22307	unsigned ShCt = N2C->getAPIntValue().logBase2();
22308	if (TLI.shouldAvoidTransformToShift(VT, ShCt))
22309	return SDValue();
22310
22311	// shl setcc result by log2 n2c
22312	return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
22313	DAG.getConstant(ShCt, SDLoc(Temp),
22314	getShiftAmountTy(Temp.getValueType())));
22315	}
22316
22317	// select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
22318	// select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
22319	// select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
22320	// select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
22321	// select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
22322	// select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
22323	// select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
22324	// select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
22325	if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ \|\| CC == ISD::SETNE)) {
22326	SDValue ValueOnZero = N2;
22327	SDValue Count = N3;
22328	// If the condition is NE instead of E, swap the operands.
22329	if (CC == ISD::SETNE)
22330	std::swap(ValueOnZero, Count);
22331	// Check if the value on zero is a constant equal to the bits in the type.
22332	if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
22333	if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
22334	// If the other operand is cttz/cttz_zero_undef of N0, and cttz is
22335	// legal, combine to just cttz.
22336	if ((Count.getOpcode() == ISD::CTTZ \|\|
22337	Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
22338	N0 == Count.getOperand(0) &&
22339	(!LegalOperations \|\| TLI.isOperationLegal(ISD::CTTZ, VT)))
22340	return DAG.getNode(ISD::CTTZ, DL, VT, N0);
22341	// If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
22342	// legal, combine to just ctlz.
22343	if ((Count.getOpcode() == ISD::CTLZ \|\|
22344	Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
22345	N0 == Count.getOperand(0) &&
22346	(!LegalOperations \|\| TLI.isOperationLegal(ISD::CTLZ, VT)))
22347	return DAG.getNode(ISD::CTLZ, DL, VT, N0);
22348	}
22349	}
22350	}
22351
22352	return SDValue();
22353	}
22354
22355	/// This is a stub for TargetLowering::SimplifySetCC.
22356	SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
22357	ISD::CondCode Cond, const SDLoc &DL,
22358	bool foldBooleans) {
22359	TargetLowering::DAGCombinerInfo
22360	DagCombineInfo(DAG, Level, false, this);
22361	return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
22362	}
22363
22364	/// Given an ISD::SDIV node expressing a divide by constant, return
22365	/// a DAG expression to select that will generate the same value by multiplying
22366	/// by a magic number.
22367	/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
22368	SDValue DAGCombiner::BuildSDIV(SDNode *N) {
22369	// when optimising for minimum size, we don't want to expand a div to a mul
22370	// and a shift.
22371	if (DAG.getMachineFunction().getFunction().hasMinSize())
22372	return SDValue();
22373
22374	SmallVector<SDNode *, 8> Built;
22375	if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
22376	for (SDNode *N : Built)
22377	AddToWorklist(N);
22378	return S;
22379	}
22380
22381	return SDValue();
22382	}
22383
22384	/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
22385	/// DAG expression that will generate the same value by right shifting.
22386	SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
22387	ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
22388	if (!C)
22389	return SDValue();
22390
22391	// Avoid division by zero.
22392	if (C->isNullValue())
22393	return SDValue();
22394
22395	SmallVector<SDNode *, 8> Built;
22396	if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
22397	for (SDNode *N : Built)
22398	AddToWorklist(N);
22399	return S;
22400	}
22401
22402	return SDValue();
22403	}
22404
22405	/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
22406	/// expression that will generate the same value by multiplying by a magic
22407	/// number.
22408	/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
22409	SDValue DAGCombiner::BuildUDIV(SDNode *N) {
22410	// when optimising for minimum size, we don't want to expand a div to a mul
22411	// and a shift.
22412	if (DAG.getMachineFunction().getFunction().hasMinSize())
22413	return SDValue();
22414
22415	SmallVector<SDNode *, 8> Built;
22416	if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
22417	for (SDNode *N : Built)
22418	AddToWorklist(N);
22419	return S;
22420	}
22421
22422	return SDValue();
22423	}
22424
22425	/// Determines the LogBase2 value for a non-null input value using the
22426	/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
22427	SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
22428	EVT VT = V.getValueType();
22429	SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
22430	SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
22431	SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
22432	return LogBase2;
22433	}
22434
22435	/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
22436	/// For the reciprocal, we need to find the zero of the function:
22437	/// F(X) = A X - 1 [which has a zero at X = 1/A]
22438	/// =>
22439	/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
22440	/// does not require additional intermediate precision]
22441	/// For the last iteration, put numerator N into it to gain more precision:
22442	/// Result = N X_i + X_i (N - N A X_i)
22443	SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
22444	SDNodeFlags Flags) {
22445	if (LegalDAG)
22446	return SDValue();
22447
22448	// TODO: Handle half and/or extended types?
22449	EVT VT = Op.getValueType();
22450	if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
22451	return SDValue();
22452
22453	// If estimates are explicitly disabled for this function, we're done.
22454	MachineFunction &MF = DAG.getMachineFunction();
22455	int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
22456	if (Enabled == TLI.ReciprocalEstimate::Disabled)
22457	return SDValue();
22458
22459	// Estimates may be explicitly enabled for this type with a custom number of
22460	// refinement steps.
22461	int Iterations = TLI.getDivRefinementSteps(VT, MF);
22462	if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
22463	AddToWorklist(Est.getNode());
22464
22465	SDLoc DL(Op);
22466	if (Iterations) {
22467	SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
22468
22469	// Newton iterations: Est = Est + Est (N - Arg * Est)
22470	// If this is the last iteration, also multiply by the numerator.
22471	for (int i = 0; i < Iterations; ++i) {
22472	SDValue MulEst = Est;
22473
22474	if (i == Iterations - 1) {
22475	MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
22476	AddToWorklist(MulEst.getNode());
22477	}
22478
22479	SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
22480	AddToWorklist(NewEst.getNode());
22481
22482	NewEst = DAG.getNode(ISD::FSUB, DL, VT,
22483	(i == Iterations - 1 ? N : FPOne), NewEst, Flags);
22484	AddToWorklist(NewEst.getNode());
22485
22486	NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
22487	AddToWorklist(NewEst.getNode());
22488
22489	Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
22490	AddToWorklist(Est.getNode());
22491	}
22492	} else {
22493	// If no iterations are available, multiply with N.
22494	Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
22495	AddToWorklist(Est.getNode());
22496	}
22497
22498	return Est;
22499	}
22500
22501	return SDValue();
22502	}
22503
22504	/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
22505	/// For the reciprocal sqrt, we need to find the zero of the function:
22506	/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
22507	/// =>
22508	/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
22509	/// As a result, we precompute A/2 prior to the iteration loop.
22510	SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
22511	unsigned Iterations,
22512	SDNodeFlags Flags, bool Reciprocal) {
22513	EVT VT = Arg.getValueType();
22514	SDLoc DL(Arg);
22515	SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
22516
22517	// We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
22518	// this entire sequence requires only one FP constant.
22519	SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
22520	HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
22521
22522	// Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
22523	for (unsigned i = 0; i < Iterations; ++i) {
22524	SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
22525	NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
22526	NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
22527	Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
22528	}
22529
22530	// If non-reciprocal square root is requested, multiply the result by Arg.
22531	if (!Reciprocal)
22532	Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
22533
22534	return Est;
22535	}
22536
22537	/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
22538	/// For the reciprocal sqrt, we need to find the zero of the function:
22539	/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
22540	/// =>
22541	/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
22542	SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
22543	unsigned Iterations,
22544	SDNodeFlags Flags, bool Reciprocal) {
22545	EVT VT = Arg.getValueType();
22546	SDLoc DL(Arg);
22547	SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
22548	SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
22549
22550	// This routine must enter the loop below to work correctly
22551	// when (Reciprocal == false).
22552	assert(Iterations > 0)((Iterations > 0) ? static_cast<void> (0) : __assert_fail ("Iterations > 0", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp" , 22552, __PRETTY_FUNCTION__));
22553
22554	// Newton iterations for reciprocal square root:
22555	// E = (E * -0.5) * ((A * E) * E + -3.0)
22556	for (unsigned i = 0; i < Iterations; ++i) {
22557	SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
22558	SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
22559	SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
22560
22561	// When calculating a square root at the last iteration build:
22562	// S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
22563	// (notice a common subexpression)
22564	SDValue LHS;
22565	if (Reciprocal \|\| (i + 1) < Iterations) {
22566	// RSQRT: LHS = (E * -0.5)
22567	LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
22568	} else {
22569	// SQRT: LHS = (A * E) * -0.5
22570	LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
22571	}
22572
22573	Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
22574	}
22575
22576	return Est;
22577	}
22578
22579	/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
22580	/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
22581	/// Op can be zero.
22582	SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
22583	bool Reciprocal) {
22584	if (LegalDAG)
22585	return SDValue();
22586
22587	// TODO: Handle half and/or extended types?
22588	EVT VT = Op.getValueType();
22589	if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
22590	return SDValue();
22591
22592	// If estimates are explicitly disabled for this function, we're done.
22593	MachineFunction &MF = DAG.getMachineFunction();
22594	int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
22595	if (Enabled == TLI.ReciprocalEstimate::Disabled)
22596	return SDValue();
22597
22598	// Estimates may be explicitly enabled for this type with a custom number of
22599	// refinement steps.
22600	int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
22601
22602	bool UseOneConstNR = false;
22603	if (SDValue Est =
22604	TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
22605	Reciprocal)) {
22606	AddToWorklist(Est.getNode());
22607
22608	if (Iterations)
22609	Est = UseOneConstNR
22610	? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
22611	: buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
22612	if (!Reciprocal) {
22613	SDLoc DL(Op);
22614	// Try the target specific test first.
22615	SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
22616
22617	// The estimate is now completely wrong if the input was exactly 0.0 or
22618	// possibly a denormal. Force the answer to 0.0 or value provided by
22619	// target for those cases.
22620	Est = DAG.getNode(
22621	Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
22622	Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
22623	}
22624	return Est;
22625	}
22626
22627	return SDValue();
22628	}
22629
22630	SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
22631	return buildSqrtEstimateImpl(Op, Flags, true);
22632	}
22633
22634	SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
22635	return buildSqrtEstimateImpl(Op, Flags, false);
22636	}
22637
22638	/// Return true if there is any possibility that the two addresses overlap.
22639	bool DAGCombiner::isAlias(SDNode Op0, SDNode Op1) const {
22640
22641	struct MemUseCharacteristics {
22642	bool IsVolatile;
22643	bool IsAtomic;
22644	SDValue BasePtr;
22645	int64_t Offset;
22646	Optional<int64_t> NumBytes;
22647	MachineMemOperand *MMO;
22648	};
22649
22650	auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
22651	if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
22652	int64_t Offset = 0;
22653	if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
22654	Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
22655	? C->getSExtValue()
22656	: (LSN->getAddressingMode() == ISD::PRE_DEC)
22657	? -1 * C->getSExtValue()
22658	: 0;
22659	uint64_t Size =
22660	MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
22661	return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
22662	Offset /base offset/,
22663	Optional<int64_t>(Size),
22664	LSN->getMemOperand()};
22665	}
22666	if (const auto *LN = cast<LifetimeSDNode>(N))
22667	return {false /isVolatile/, /isAtomic/ false, LN->getOperand(1),
22668	(LN->hasOffset()) ? LN->getOffset() : 0,
22669	(LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
22670	: Optional<int64_t>(),
22671	(MachineMemOperand *)nullptr};
22672	// Default.
22673	return {false /isvolatile/, /isAtomic/ false, SDValue(),
22674	(int64_t)0 /offset/,
22675	Optional<int64_t>() /size/, (MachineMemOperand *)nullptr};
22676	};
22677
22678	MemUseCharacteristics MUC0 = getCharacteristics(Op0),
22679	MUC1 = getCharacteristics(Op1);
22680
22681	// If they are to the same address, then they must be aliases.
22682	if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
22683	MUC0.Offset == MUC1.Offset)
22684	return true;
22685
22686	// If they are both volatile then they cannot be reordered.
22687	if (MUC0.IsVolatile && MUC1.IsVolatile)
22688	return true;
22689
22690	// Be conservative about atomics for the moment
22691	// TODO: This is way overconservative for unordered atomics (see D66309)
22692	if (MUC0.IsAtomic && MUC1.IsAtomic)
22693	return true;
22694
22695	if (MUC0.MMO && MUC1.MMO) {
22696	if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) \|\|
22697	(MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
22698	return false;
22699	}
22700
22701	// Try to prove that there is aliasing, or that there is no aliasing. Either
22702	// way, we can return now. If nothing can be proved, proceed with more tests.
22703	bool IsAlias;
22704	if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
22705	DAG, IsAlias))
22706	return IsAlias;
22707
22708	// The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
22709	// either are not known.
22710	if (!MUC0.MMO \|\| !MUC1.MMO)
22711	return true;
22712
22713	// If one operation reads from invariant memory, and the other may store, they
22714	// cannot alias. These should really be checking the equivalent of mayWrite,
22715	// but it only matters for memory nodes other than load /store.
22716	if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) \|\|
22717	(MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
22718	return false;
22719
22720	// If we know required SrcValue1 and SrcValue2 have relatively large
22721	// alignment compared to the size and offset of the access, we may be able
22722	// to prove they do not alias. This check is conservative for now to catch
22723	// cases created by splitting vector types, it only works when the offsets are
22724	// multiples of the size of the data.
22725	int64_t SrcValOffset0 = MUC0.MMO->getOffset();
22726	int64_t SrcValOffset1 = MUC1.MMO->getOffset();
22727	Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
22728	Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
22729	auto &Size0 = MUC0.NumBytes;
22730	auto &Size1 = MUC1.NumBytes;
22731	if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
22732	Size0.hasValue() && Size1.hasValue() && Size0 == Size1 &&
22733	OrigAlignment0 > Size0 && SrcValOffset0 % Size0 == 0 &&
22734	SrcValOffset1 % *Size1 == 0) {
22735	int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
22736	int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
22737
22738	// There is no overlap between these relatively aligned accesses of
22739	// similar size. Return no alias.
22740	if ((OffAlign0 + Size0) <= OffAlign1 \|\| (OffAlign1 + Size1) <= OffAlign0)
22741	return false;
22742	}
22743
22744	bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
22745	? CombinerGlobalAA
22746	: DAG.getSubtarget().useAA();
22747	#ifndef NDEBUG
22748	if (CombinerAAOnlyFunc.getNumOccurrences() &&
22749	CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
22750	UseAA = false;
22751	#endif
22752
22753	if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
22754	Size0.hasValue() && Size1.hasValue()) {
22755	// Use alias analysis information.
22756	int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
22757	int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
22758	int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
22759	AliasResult AAResult = AA->alias(
22760	MemoryLocation(MUC0.MMO->getValue(), Overlap0,
22761	UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
22762	MemoryLocation(MUC1.MMO->getValue(), Overlap1,
22763	UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes()));
22764	if (AAResult == NoAlias)
22765	return false;
22766	}
22767
22768	// Otherwise we have to assume they alias.
22769	return true;
22770	}
22771
22772	/// Walk up chain skipping non-aliasing memory nodes,
22773	/// looking for aliasing nodes and adding them to the Aliases vector.
22774	void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
22775	SmallVectorImpl<SDValue> &Aliases) {
22776	SmallVector<SDValue, 8> Chains; // List of chains to visit.
22777	SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
22778
22779	// Get alias information for node.
22780	// TODO: relax aliasing for unordered atomics (see D66309)
22781	const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
22782
22783	// Starting off.
22784	Chains.push_back(OriginalChain);
22785	unsigned Depth = 0;
22786
22787	// Attempt to improve chain by a single step
22788	std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
22789	switch (C.getOpcode()) {
22790	case ISD::EntryToken:
22791	// No need to mark EntryToken.
22792	C = SDValue();
22793	return true;
22794	case ISD::LOAD:
22795	case ISD::STORE: {
22796	// Get alias information for C.
22797	// TODO: Relax aliasing for unordered atomics (see D66309)
22798	bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
22799	cast<LSBaseSDNode>(C.getNode())->isSimple();
22800	if ((IsLoad && IsOpLoad) \|\| !isAlias(N, C.getNode())) {
22801	// Look further up the chain.
22802	C = C.getOperand(0);
22803	return true;
22804	}
22805	// Alias, so stop here.
22806	return false;
22807	}
22808
22809	case ISD::CopyFromReg:
22810	// Always forward past past CopyFromReg.
22811	C = C.getOperand(0);
22812	return true;
22813
22814	case ISD::LIFETIME_START:
22815	case ISD::LIFETIME_END: {
22816	// We can forward past any lifetime start/end that can be proven not to
22817	// alias the memory access.
22818	if (!isAlias(N, C.getNode())) {
22819	// Look further up the chain.
22820	C = C.getOperand(0);
22821	return true;
22822	}
22823	return false;
22824	}
22825	default:
22826	return false;
22827	}
22828	};
22829
22830	// Look at each chain and determine if it is an alias. If so, add it to the
22831	// aliases list. If not, then continue up the chain looking for the next
22832	// candidate.
22833	while (!Chains.empty()) {
22834	SDValue Chain = Chains.pop_back_val();
22835
22836	// Don't bother if we've seen Chain before.
22837	if (!Visited.insert(Chain.getNode()).second)
22838	continue;
22839
22840	// For TokenFactor nodes, look at each operand and only continue up the
22841	// chain until we reach the depth limit.
22842	//
22843	// FIXME: The depth check could be made to return the last non-aliasing
22844	// chain we found before we hit a tokenfactor rather than the original
22845	// chain.
22846	if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
22847	Aliases.clear();
22848	Aliases.push_back(OriginalChain);
22849	return;
22850	}
22851
22852	if (Chain.getOpcode() == ISD::TokenFactor) {
22853	// We have to check each of the operands of the token factor for "small"
22854	// token factors, so we queue them up. Adding the operands to the queue
22855	// (stack) in reverse order maintains the original order and increases the
22856	// likelihood that getNode will find a matching token factor (CSE.)
22857	if (Chain.getNumOperands() > 16) {
22858	Aliases.push_back(Chain);
22859	continue;
22860	}
22861	for (unsigned n = Chain.getNumOperands(); n;)
22862	Chains.push_back(Chain.getOperand(--n));
22863	++Depth;
22864	continue;
22865	}
22866	// Everything else
22867	if (ImproveChain(Chain)) {
22868	// Updated Chain Found, Consider new chain if one exists.
22869	if (Chain.getNode())
22870	Chains.push_back(Chain);
22871	++Depth;
22872	continue;
22873	}
22874	// No Improved Chain Possible, treat as Alias.
22875	Aliases.push_back(Chain);
22876	}
22877	}
22878
22879	/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
22880	/// (aliasing node.)
22881	SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
22882	if (OptLevel == CodeGenOpt::None)
22883	return OldChain;
22884
22885	// Ops for replacing token factor.
22886	SmallVector<SDValue, 8> Aliases;
22887
22888	// Accumulate all the aliases to this node.
22889	GatherAllAliases(N, OldChain, Aliases);
22890
22891	// If no operands then chain to entry token.
22892	if (Aliases.size() == 0)
22893	return DAG.getEntryNode();
22894
22895	// If a single operand then chain to it. We don't need to revisit it.
22896	if (Aliases.size() == 1)
22897	return Aliases[0];
22898
22899	// Construct a custom tailored token factor.
22900	return DAG.getTokenFactor(SDLoc(N), Aliases);
22901	}
22902
22903	namespace {
22904	// TODO: Replace with with std::monostate when we move to C++17.
22905	struct UnitT { } Unit;
22906	bool operator==(const UnitT &, const UnitT &) { return true; }
22907	bool operator!=(const UnitT &, const UnitT &) { return false; }
22908	} // namespace
22909
22910	// This function tries to collect a bunch of potentially interesting
22911	// nodes to improve the chains of, all at once. This might seem
22912	// redundant, as this function gets called when visiting every store
22913	// node, so why not let the work be done on each store as it's visited?
22914	//
22915	// I believe this is mainly important because mergeConsecutiveStores
22916	// is unable to deal with merging stores of different sizes, so unless
22917	// we improve the chains of all the potential candidates up-front
22918	// before running mergeConsecutiveStores, it might only see some of
22919	// the nodes that will eventually be candidates, and then not be able
22920	// to go from a partially-merged state to the desired final
22921	// fully-merged state.
22922
22923	bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
22924	SmallVector<StoreSDNode *, 8> ChainedStores;
22925	StoreSDNode *STChain = St;
22926	// Intervals records which offsets from BaseIndex have been covered. In
22927	// the common case, every store writes to the immediately previous address
22928	// space and thus merged with the previous interval at insertion time.
22929
22930	using IMap =
22931	llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
22932	IMap::Allocator A;
22933	IMap Intervals(A);
22934
22935	// This holds the base pointer, index, and the offset in bytes from the base
22936	// pointer.
22937	const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
22938
22939	// We must have a base and an offset.
22940	if (!BasePtr.getBase().getNode())
22941	return false;
22942
22943	// Do not handle stores to undef base pointers.
22944	if (BasePtr.getBase().isUndef())
22945	return false;
22946
22947	// BaseIndexOffset assumes that offsets are fixed-size, which
22948	// is not valid for scalable vectors where the offsets are
22949	// scaled by `vscale`, so bail out early.
22950	if (St->getMemoryVT().isScalableVector())
22951	return false;
22952
22953	// Add ST's interval.
22954	Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
22955
22956	while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
22957	// If the chain has more than one use, then we can't reorder the mem ops.
22958	if (!SDValue(Chain, 0)->hasOneUse())
22959	break;
22960	// TODO: Relax for unordered atomics (see D66309)
22961	if (!Chain->isSimple() \|\| Chain->isIndexed())
22962	break;
22963
22964	// Find the base pointer and offset for this memory node.
22965	const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
22966	// Check that the base pointer is the same as the original one.
22967	int64_t Offset;
22968	if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
22969	break;
22970	int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
22971	// Make sure we don't overlap with other intervals by checking the ones to
22972	// the left or right before inserting.
22973	auto I = Intervals.find(Offset);
22974	// If there's a next interval, we should end before it.
22975	if (I != Intervals.end() && I.start() < (Offset + Length))
22976	break;
22977	// If there's a previous interval, we should start after it.
22978	if (I != Intervals.begin() && (--I).stop() <= Offset)
22979	break;
22980	Intervals.insert(Offset, Offset + Length, Unit);
22981
22982	ChainedStores.push_back(Chain);
22983	STChain = Chain;
22984	}
22985
22986	// If we didn't find a chained store, exit.
22987	if (ChainedStores.size() == 0)
22988	return false;
22989
22990	// Improve all chained stores (St and ChainedStores members) starting from
22991	// where the store chain ended and return single TokenFactor.
22992	SDValue NewChain = STChain->getChain();
22993	SmallVector<SDValue, 8> TFOps;
22994	for (unsigned I = ChainedStores.size(); I;) {
22995	StoreSDNode *S = ChainedStores[--I];
22996	SDValue BetterChain = FindBetterChain(S, NewChain);
22997	S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
22998	S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
22999	TFOps.push_back(SDValue(S, 0));
23000	ChainedStores[I] = S;
23001	}
23002
23003	// Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
23004	SDValue BetterChain = FindBetterChain(St, NewChain);
23005	SDValue NewST;
23006	if (St->isTruncatingStore())
23007	NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
23008	St->getBasePtr(), St->getMemoryVT(),
23009	St->getMemOperand());
23010	else
23011	NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
23012	St->getBasePtr(), St->getMemOperand());
23013
23014	TFOps.push_back(NewST);
23015
23016	// If we improved every element of TFOps, then we've lost the dependence on
23017	// NewChain to successors of St and we need to add it back to TFOps. Do so at
23018	// the beginning to keep relative order consistent with FindBetterChains.
23019	auto hasImprovedChain = [&](SDValue ST) -> bool {
23020	return ST->getOperand(0) != NewChain;
23021	};
23022	bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
23023	if (AddNewChain)
23024	TFOps.insert(TFOps.begin(), NewChain);
23025
23026	SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
23027	CombineTo(St, TF);
23028
23029	// Add TF and its operands to the worklist.
23030	AddToWorklist(TF.getNode());
23031	for (const SDValue &Op : TF->ops())
23032	AddToWorklist(Op.getNode());
23033	AddToWorklist(STChain);
23034	return true;
23035	}
23036
23037	bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
23038	if (OptLevel == CodeGenOpt::None)
23039	return false;
23040
23041	const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
23042
23043	// We must have a base and an offset.
23044	if (!BasePtr.getBase().getNode())
23045	return false;
23046
23047	// Do not handle stores to undef base pointers.
23048	if (BasePtr.getBase().isUndef())
23049	return false;
23050
23051	// Directly improve a chain of disjoint stores starting at St.
23052	if (parallelizeChainedStores(St))
23053	return true;
23054
23055	// Improve St's Chain..
23056	SDValue BetterChain = FindBetterChain(St, St->getChain());
23057	if (St->getChain() != BetterChain) {
23058	replaceStoreChain(St, BetterChain);
23059	return true;
23060	}
23061	return false;
23062	}
23063
23064	/// This is the entry point for the file.
23065	void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
23066	CodeGenOpt::Level OptLevel) {
23067	/// This is the main entry point to this class.
23068	DAGCombiner(*this, AA, OptLevel).Run(Level);
23069	}