Bug Summary

File:llvm/include/llvm/CodeGen/SelectionDAGNodes.h
Warning:line 1332, column 12
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name DAGCombiner.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/build-llvm/lib/CodeGen/SelectionDAG -resource-dir /usr/lib/llvm-13/lib/clang/13.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/build-llvm/lib/CodeGen/SelectionDAG -I /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG -I /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/build-llvm/include -I /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/lib/llvm-13/lib/clang/13.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/build-llvm/lib/CodeGen/SelectionDAG -fdebug-prefix-map=/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f=. -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-04-14-063029-18377-1 -x c++ /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
22#include "llvm/ADT/IntervalMap.h"
23#include "llvm/ADT/None.h"
24#include "llvm/ADT/Optional.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallBitVector.h"
28#include "llvm/ADT/SmallPtrSet.h"
29#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/SmallVector.h"
31#include "llvm/ADT/Statistic.h"
32#include "llvm/Analysis/AliasAnalysis.h"
33#include "llvm/Analysis/MemoryLocation.h"
34#include "llvm/Analysis/TargetLibraryInfo.h"
35#include "llvm/Analysis/VectorUtils.h"
36#include "llvm/CodeGen/DAGCombine.h"
37#include "llvm/CodeGen/ISDOpcodes.h"
38#include "llvm/CodeGen/MachineFrameInfo.h"
39#include "llvm/CodeGen/MachineFunction.h"
40#include "llvm/CodeGen/MachineMemOperand.h"
41#include "llvm/CodeGen/RuntimeLibcalls.h"
42#include "llvm/CodeGen/SelectionDAG.h"
43#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
44#include "llvm/CodeGen/SelectionDAGNodes.h"
45#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
46#include "llvm/CodeGen/TargetLowering.h"
47#include "llvm/CodeGen/TargetRegisterInfo.h"
48#include "llvm/CodeGen/TargetSubtargetInfo.h"
49#include "llvm/CodeGen/ValueTypes.h"
50#include "llvm/IR/Attributes.h"
51#include "llvm/IR/Constant.h"
52#include "llvm/IR/DataLayout.h"
53#include "llvm/IR/DerivedTypes.h"
54#include "llvm/IR/Function.h"
55#include "llvm/IR/LLVMContext.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/Support/Casting.h"
58#include "llvm/Support/CodeGen.h"
59#include "llvm/Support/CommandLine.h"
60#include "llvm/Support/Compiler.h"
61#include "llvm/Support/Debug.h"
62#include "llvm/Support/ErrorHandling.h"
63#include "llvm/Support/KnownBits.h"
64#include "llvm/Support/MachineValueType.h"
65#include "llvm/Support/MathExtras.h"
66#include "llvm/Support/raw_ostream.h"
67#include "llvm/Target/TargetMachine.h"
68#include "llvm/Target/TargetOptions.h"
69#include <algorithm>
70#include <cassert>
71#include <cstdint>
72#include <functional>
73#include <iterator>
74#include <string>
75#include <tuple>
76#include <utility>
77
78using namespace llvm;
79
80#define DEBUG_TYPE"dagcombine" "dagcombine"
81
82STATISTIC(NodesCombined , "Number of dag nodes combined")static llvm::Statistic NodesCombined = {"dagcombine", "NodesCombined"
, "Number of dag nodes combined"}
;
83STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created")static llvm::Statistic PreIndexedNodes = {"dagcombine", "PreIndexedNodes"
, "Number of pre-indexed nodes created"}
;
84STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created")static llvm::Statistic PostIndexedNodes = {"dagcombine", "PostIndexedNodes"
, "Number of post-indexed nodes created"}
;
85STATISTIC(OpsNarrowed , "Number of load/op/store narrowed")static llvm::Statistic OpsNarrowed = {"dagcombine", "OpsNarrowed"
, "Number of load/op/store narrowed"}
;
86STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int")static llvm::Statistic LdStFP2Int = {"dagcombine", "LdStFP2Int"
, "Number of fp load/store pairs transformed to int"}
;
87STATISTIC(SlicedLoads, "Number of load sliced")static llvm::Statistic SlicedLoads = {"dagcombine", "SlicedLoads"
, "Number of load sliced"}
;
88STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops")static llvm::Statistic NumFPLogicOpsConv = {"dagcombine", "NumFPLogicOpsConv"
, "Number of logic ops converted to fp ops"}
;
89
90static cl::opt<bool>
91CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
92 cl::desc("Enable DAG combiner's use of IR alias analysis"));
93
94static cl::opt<bool>
95UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
96 cl::desc("Enable DAG combiner's use of TBAA"));
97
98#ifndef NDEBUG
99static cl::opt<std::string>
100CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
101 cl::desc("Only use DAG-combiner alias analysis in this"
102 " function"));
103#endif
104
105/// Hidden option to stress test load slicing, i.e., when this option
106/// is enabled, load slicing bypasses most of its profitability guards.
107static cl::opt<bool>
108StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
109 cl::desc("Bypass the profitability model of load slicing"),
110 cl::init(false));
111
112static cl::opt<bool>
113 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
114 cl::desc("DAG combiner may split indexing from loads"));
115
116static cl::opt<bool>
117 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
118 cl::desc("DAG combiner enable merging multiple stores "
119 "into a wider store"));
120
121static cl::opt<unsigned> TokenFactorInlineLimit(
122 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
123 cl::desc("Limit the number of operands to inline for Token Factors"));
124
125static cl::opt<unsigned> StoreMergeDependenceLimit(
126 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
127 cl::desc("Limit the number of times for the same StoreNode and RootNode "
128 "to bail out in store merging dependence check"));
129
130static cl::opt<bool> EnableReduceLoadOpStoreWidth(
131 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
132 cl::desc("DAG cominber enable reducing the width of load/op/store "
133 "sequence"));
134
135static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
136 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
137 cl::desc("DAG cominber enable load/<replace bytes>/store with "
138 "a narrower store"));
139
140namespace {
141
142 class DAGCombiner {
143 SelectionDAG &DAG;
144 const TargetLowering &TLI;
145 const SelectionDAGTargetInfo *STI;
146 CombineLevel Level;
147 CodeGenOpt::Level OptLevel;
148 bool LegalDAG = false;
149 bool LegalOperations = false;
150 bool LegalTypes = false;
151 bool ForCodeSize;
152 bool DisableGenericCombines;
153
154 /// Worklist of all of the nodes that need to be simplified.
155 ///
156 /// This must behave as a stack -- new nodes to process are pushed onto the
157 /// back and when processing we pop off of the back.
158 ///
159 /// The worklist will not contain duplicates but may contain null entries
160 /// due to nodes being deleted from the underlying DAG.
161 SmallVector<SDNode *, 64> Worklist;
162
163 /// Mapping from an SDNode to its position on the worklist.
164 ///
165 /// This is used to find and remove nodes from the worklist (by nulling
166 /// them) when they are deleted from the underlying DAG. It relies on
167 /// stable indices of nodes within the worklist.
168 DenseMap<SDNode *, unsigned> WorklistMap;
169 /// This records all nodes attempted to add to the worklist since we
170 /// considered a new worklist entry. As we keep do not add duplicate nodes
171 /// in the worklist, this is different from the tail of the worklist.
172 SmallSetVector<SDNode *, 32> PruningList;
173
174 /// Set of nodes which have been combined (at least once).
175 ///
176 /// This is used to allow us to reliably add any operands of a DAG node
177 /// which have not yet been combined to the worklist.
178 SmallPtrSet<SDNode *, 32> CombinedNodes;
179
180 /// Map from candidate StoreNode to the pair of RootNode and count.
181 /// The count is used to track how many times we have seen the StoreNode
182 /// with the same RootNode bail out in dependence check. If we have seen
183 /// the bail out for the same pair many times over a limit, we won't
184 /// consider the StoreNode with the same RootNode as store merging
185 /// candidate again.
186 DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
187
188 // AA - Used for DAG load/store alias analysis.
189 AliasAnalysis *AA;
190
191 /// When an instruction is simplified, add all users of the instruction to
192 /// the work lists because they might get more simplified now.
193 void AddUsersToWorklist(SDNode *N) {
194 for (SDNode *Node : N->uses())
195 AddToWorklist(Node);
196 }
197
198 /// Convenient shorthand to add a node and all of its user to the worklist.
199 void AddToWorklistWithUsers(SDNode *N) {
200 AddUsersToWorklist(N);
201 AddToWorklist(N);
202 }
203
204 // Prune potentially dangling nodes. This is called after
205 // any visit to a node, but should also be called during a visit after any
206 // failed combine which may have created a DAG node.
207 void clearAddedDanglingWorklistEntries() {
208 // Check any nodes added to the worklist to see if they are prunable.
209 while (!PruningList.empty()) {
210 auto *N = PruningList.pop_back_val();
211 if (N->use_empty())
212 recursivelyDeleteUnusedNodes(N);
213 }
214 }
215
216 SDNode *getNextWorklistEntry() {
217 // Before we do any work, remove nodes that are not in use.
218 clearAddedDanglingWorklistEntries();
219 SDNode *N = nullptr;
220 // The Worklist holds the SDNodes in order, but it may contain null
221 // entries.
222 while (!N && !Worklist.empty()) {
223 N = Worklist.pop_back_val();
224 }
225
226 if (N) {
227 bool GoodWorklistEntry = WorklistMap.erase(N);
228 (void)GoodWorklistEntry;
229 assert(GoodWorklistEntry &&((GoodWorklistEntry && "Found a worklist entry without a corresponding map entry!"
) ? static_cast<void> (0) : __assert_fail ("GoodWorklistEntry && \"Found a worklist entry without a corresponding map entry!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 230, __PRETTY_FUNCTION__))
230 "Found a worklist entry without a corresponding map entry!")((GoodWorklistEntry && "Found a worklist entry without a corresponding map entry!"
) ? static_cast<void> (0) : __assert_fail ("GoodWorklistEntry && \"Found a worklist entry without a corresponding map entry!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 230, __PRETTY_FUNCTION__))
;
231 }
232 return N;
233 }
234
235 /// Call the node-specific routine that folds each particular type of node.
236 SDValue visit(SDNode *N);
237
238 public:
239 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
240 : DAG(D), TLI(D.getTargetLoweringInfo()),
241 STI(D.getSubtarget().getSelectionDAGInfo()),
242 Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) {
243 ForCodeSize = DAG.shouldOptForSize();
244 DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
245
246 MaximumLegalStoreInBits = 0;
247 // We use the minimum store size here, since that's all we can guarantee
248 // for the scalable vector types.
249 for (MVT VT : MVT::all_valuetypes())
250 if (EVT(VT).isSimple() && VT != MVT::Other &&
251 TLI.isTypeLegal(EVT(VT)) &&
252 VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
253 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
254 }
255
256 void ConsiderForPruning(SDNode *N) {
257 // Mark this for potential pruning.
258 PruningList.insert(N);
259 }
260
261 /// Add to the worklist making sure its instance is at the back (next to be
262 /// processed.)
263 void AddToWorklist(SDNode *N) {
264 assert(N->getOpcode() != ISD::DELETED_NODE &&((N->getOpcode() != ISD::DELETED_NODE && "Deleted Node added to Worklist"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && \"Deleted Node added to Worklist\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 265, __PRETTY_FUNCTION__))
265 "Deleted Node added to Worklist")((N->getOpcode() != ISD::DELETED_NODE && "Deleted Node added to Worklist"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && \"Deleted Node added to Worklist\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 265, __PRETTY_FUNCTION__))
;
266
267 // Skip handle nodes as they can't usefully be combined and confuse the
268 // zero-use deletion strategy.
269 if (N->getOpcode() == ISD::HANDLENODE)
270 return;
271
272 ConsiderForPruning(N);
273
274 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
275 Worklist.push_back(N);
276 }
277
278 /// Remove all instances of N from the worklist.
279 void removeFromWorklist(SDNode *N) {
280 CombinedNodes.erase(N);
281 PruningList.remove(N);
282 StoreRootCountMap.erase(N);
283
284 auto It = WorklistMap.find(N);
285 if (It == WorklistMap.end())
286 return; // Not in the worklist.
287
288 // Null out the entry rather than erasing it to avoid a linear operation.
289 Worklist[It->second] = nullptr;
290 WorklistMap.erase(It);
291 }
292
293 void deleteAndRecombine(SDNode *N);
294 bool recursivelyDeleteUnusedNodes(SDNode *N);
295
296 /// Replaces all uses of the results of one DAG node with new values.
297 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
298 bool AddTo = true);
299
300 /// Replaces all uses of the results of one DAG node with new values.
301 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
302 return CombineTo(N, &Res, 1, AddTo);
303 }
304
305 /// Replaces all uses of the results of one DAG node with new values.
306 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
307 bool AddTo = true) {
308 SDValue To[] = { Res0, Res1 };
309 return CombineTo(N, To, 2, AddTo);
310 }
311
312 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
313
314 private:
315 unsigned MaximumLegalStoreInBits;
316
317 /// Check the specified integer node value to see if it can be simplified or
318 /// if things it uses can be simplified by bit propagation.
319 /// If so, return true.
320 bool SimplifyDemandedBits(SDValue Op) {
321 unsigned BitWidth = Op.getScalarValueSizeInBits();
322 APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
323 return SimplifyDemandedBits(Op, DemandedBits);
324 }
325
326 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
327 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
328 KnownBits Known;
329 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
330 return false;
331
332 // Revisit the node.
333 AddToWorklist(Op.getNode());
334
335 CommitTargetLoweringOpt(TLO);
336 return true;
337 }
338
339 /// Check the specified vector node value to see if it can be simplified or
340 /// if things it uses can be simplified as it only uses some of the
341 /// elements. If so, return true.
342 bool SimplifyDemandedVectorElts(SDValue Op) {
343 // TODO: For now just pretend it cannot be simplified.
344 if (Op.getValueType().isScalableVector())
345 return false;
346
347 unsigned NumElts = Op.getValueType().getVectorNumElements();
348 APInt DemandedElts = APInt::getAllOnesValue(NumElts);
349 return SimplifyDemandedVectorElts(Op, DemandedElts);
350 }
351
352 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
353 const APInt &DemandedElts,
354 bool AssumeSingleUse = false);
355 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
356 bool AssumeSingleUse = false);
357
358 bool CombineToPreIndexedLoadStore(SDNode *N);
359 bool CombineToPostIndexedLoadStore(SDNode *N);
360 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
361 bool SliceUpLoad(SDNode *N);
362
363 // Scalars have size 0 to distinguish from singleton vectors.
364 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
365 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
366 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
367
368 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
369 /// load.
370 ///
371 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
372 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
373 /// \param EltNo index of the vector element to load.
374 /// \param OriginalLoad load that EVE came from to be replaced.
375 /// \returns EVE on success SDValue() on failure.
376 SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
377 SDValue EltNo,
378 LoadSDNode *OriginalLoad);
379 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
380 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
381 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
382 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
383 SDValue PromoteIntBinOp(SDValue Op);
384 SDValue PromoteIntShiftOp(SDValue Op);
385 SDValue PromoteExtend(SDValue Op);
386 bool PromoteLoad(SDValue Op);
387
388 /// Call the node-specific routine that knows how to fold each
389 /// particular type of node. If that doesn't do anything, try the
390 /// target-specific DAG combines.
391 SDValue combine(SDNode *N);
392
393 // Visitation implementation - Implement dag node combining for different
394 // node types. The semantics are as follows:
395 // Return Value:
396 // SDValue.getNode() == 0 - No change was made
397 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
398 // otherwise - N should be replaced by the returned Operand.
399 //
400 SDValue visitTokenFactor(SDNode *N);
401 SDValue visitMERGE_VALUES(SDNode *N);
402 SDValue visitADD(SDNode *N);
403 SDValue visitADDLike(SDNode *N);
404 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
405 SDValue visitSUB(SDNode *N);
406 SDValue visitADDSAT(SDNode *N);
407 SDValue visitSUBSAT(SDNode *N);
408 SDValue visitADDC(SDNode *N);
409 SDValue visitADDO(SDNode *N);
410 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
411 SDValue visitSUBC(SDNode *N);
412 SDValue visitSUBO(SDNode *N);
413 SDValue visitADDE(SDNode *N);
414 SDValue visitADDCARRY(SDNode *N);
415 SDValue visitSADDO_CARRY(SDNode *N);
416 SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
417 SDValue visitSUBE(SDNode *N);
418 SDValue visitSUBCARRY(SDNode *N);
419 SDValue visitSSUBO_CARRY(SDNode *N);
420 SDValue visitMUL(SDNode *N);
421 SDValue visitMULFIX(SDNode *N);
422 SDValue useDivRem(SDNode *N);
423 SDValue visitSDIV(SDNode *N);
424 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
425 SDValue visitUDIV(SDNode *N);
426 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
427 SDValue visitREM(SDNode *N);
428 SDValue visitMULHU(SDNode *N);
429 SDValue visitMULHS(SDNode *N);
430 SDValue visitSMUL_LOHI(SDNode *N);
431 SDValue visitUMUL_LOHI(SDNode *N);
432 SDValue visitMULO(SDNode *N);
433 SDValue visitIMINMAX(SDNode *N);
434 SDValue visitAND(SDNode *N);
435 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
436 SDValue visitOR(SDNode *N);
437 SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
438 SDValue visitXOR(SDNode *N);
439 SDValue SimplifyVBinOp(SDNode *N);
440 SDValue visitSHL(SDNode *N);
441 SDValue visitSRA(SDNode *N);
442 SDValue visitSRL(SDNode *N);
443 SDValue visitFunnelShift(SDNode *N);
444 SDValue visitRotate(SDNode *N);
445 SDValue visitABS(SDNode *N);
446 SDValue visitBSWAP(SDNode *N);
447 SDValue visitBITREVERSE(SDNode *N);
448 SDValue visitCTLZ(SDNode *N);
449 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
450 SDValue visitCTTZ(SDNode *N);
451 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
452 SDValue visitCTPOP(SDNode *N);
453 SDValue visitSELECT(SDNode *N);
454 SDValue visitVSELECT(SDNode *N);
455 SDValue visitSELECT_CC(SDNode *N);
456 SDValue visitSETCC(SDNode *N);
457 SDValue visitSETCCCARRY(SDNode *N);
458 SDValue visitSIGN_EXTEND(SDNode *N);
459 SDValue visitZERO_EXTEND(SDNode *N);
460 SDValue visitANY_EXTEND(SDNode *N);
461 SDValue visitAssertExt(SDNode *N);
462 SDValue visitAssertAlign(SDNode *N);
463 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
464 SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
465 SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
466 SDValue visitTRUNCATE(SDNode *N);
467 SDValue visitBITCAST(SDNode *N);
468 SDValue visitFREEZE(SDNode *N);
469 SDValue visitBUILD_PAIR(SDNode *N);
470 SDValue visitFADD(SDNode *N);
471 SDValue visitSTRICT_FADD(SDNode *N);
472 SDValue visitFSUB(SDNode *N);
473 SDValue visitFMUL(SDNode *N);
474 SDValue visitFMA(SDNode *N);
475 SDValue visitFDIV(SDNode *N);
476 SDValue visitFREM(SDNode *N);
477 SDValue visitFSQRT(SDNode *N);
478 SDValue visitFCOPYSIGN(SDNode *N);
479 SDValue visitFPOW(SDNode *N);
480 SDValue visitSINT_TO_FP(SDNode *N);
481 SDValue visitUINT_TO_FP(SDNode *N);
482 SDValue visitFP_TO_SINT(SDNode *N);
483 SDValue visitFP_TO_UINT(SDNode *N);
484 SDValue visitFP_ROUND(SDNode *N);
485 SDValue visitFP_EXTEND(SDNode *N);
486 SDValue visitFNEG(SDNode *N);
487 SDValue visitFABS(SDNode *N);
488 SDValue visitFCEIL(SDNode *N);
489 SDValue visitFTRUNC(SDNode *N);
490 SDValue visitFFLOOR(SDNode *N);
491 SDValue visitFMINNUM(SDNode *N);
492 SDValue visitFMAXNUM(SDNode *N);
493 SDValue visitFMINIMUM(SDNode *N);
494 SDValue visitFMAXIMUM(SDNode *N);
495 SDValue visitBRCOND(SDNode *N);
496 SDValue visitBR_CC(SDNode *N);
497 SDValue visitLOAD(SDNode *N);
498
499 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
500 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
501
502 SDValue visitSTORE(SDNode *N);
503 SDValue visitLIFETIME_END(SDNode *N);
504 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
505 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
506 SDValue visitBUILD_VECTOR(SDNode *N);
507 SDValue visitCONCAT_VECTORS(SDNode *N);
508 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
509 SDValue visitVECTOR_SHUFFLE(SDNode *N);
510 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
511 SDValue visitINSERT_SUBVECTOR(SDNode *N);
512 SDValue visitMLOAD(SDNode *N);
513 SDValue visitMSTORE(SDNode *N);
514 SDValue visitMGATHER(SDNode *N);
515 SDValue visitMSCATTER(SDNode *N);
516 SDValue visitFP_TO_FP16(SDNode *N);
517 SDValue visitFP16_TO_FP(SDNode *N);
518 SDValue visitVECREDUCE(SDNode *N);
519
520 SDValue visitFADDForFMACombine(SDNode *N);
521 SDValue visitFSUBForFMACombine(SDNode *N);
522 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
523
524 SDValue XformToShuffleWithZero(SDNode *N);
525 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
526 const SDLoc &DL, SDValue N0,
527 SDValue N1);
528 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
529 SDValue N1);
530 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
531 SDValue N1, SDNodeFlags Flags);
532
533 SDValue visitShiftByConstant(SDNode *N);
534
535 SDValue foldSelectOfConstants(SDNode *N);
536 SDValue foldVSelectOfConstants(SDNode *N);
537 SDValue foldBinOpIntoSelect(SDNode *BO);
538 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
539 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
540 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
541 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
542 SDValue N2, SDValue N3, ISD::CondCode CC,
543 bool NotExtCompare = false);
544 SDValue convertSelectOfFPConstantsToLoadOffset(
545 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
546 ISD::CondCode CC);
547 SDValue foldSignChangeInBitcast(SDNode *N);
548 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
549 SDValue N2, SDValue N3, ISD::CondCode CC);
550 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
551 const SDLoc &DL);
552 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
553 SDValue unfoldMaskedMerge(SDNode *N);
554 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
555 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
556 const SDLoc &DL, bool foldBooleans);
557 SDValue rebuildSetCC(SDValue N);
558
559 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
560 SDValue &CC, bool MatchStrict = false) const;
561 bool isOneUseSetCC(SDValue N) const;
562
563 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
564 unsigned HiOp);
565 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
566 SDValue CombineExtLoad(SDNode *N);
567 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
568 SDValue combineRepeatedFPDivisors(SDNode *N);
569 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
570 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
571 SDValue BuildSDIV(SDNode *N);
572 SDValue BuildSDIVPow2(SDNode *N);
573 SDValue BuildUDIV(SDNode *N);
574 SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
575 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
576 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
577 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
578 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
579 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
580 SDNodeFlags Flags, bool Reciprocal);
581 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
582 SDNodeFlags Flags, bool Reciprocal);
583 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
584 bool DemandHighBits = true);
585 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
586 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
587 SDValue InnerPos, SDValue InnerNeg,
588 unsigned PosOpcode, unsigned NegOpcode,
589 const SDLoc &DL);
590 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
591 SDValue InnerPos, SDValue InnerNeg,
592 unsigned PosOpcode, unsigned NegOpcode,
593 const SDLoc &DL);
594 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
595 SDValue MatchLoadCombine(SDNode *N);
596 SDValue mergeTruncStores(StoreSDNode *N);
597 SDValue ReduceLoadWidth(SDNode *N);
598 SDValue ReduceLoadOpStoreWidth(SDNode *N);
599 SDValue splitMergedValStore(StoreSDNode *ST);
600 SDValue TransformFPLoadStorePair(SDNode *N);
601 SDValue convertBuildVecZextToZext(SDNode *N);
602 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
603 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
604 SDValue reduceBuildVecToShuffle(SDNode *N);
605 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
606 ArrayRef<int> VectorMask, SDValue VecIn1,
607 SDValue VecIn2, unsigned LeftIdx,
608 bool DidSplitVec);
609 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
610
611 /// Walk up chain skipping non-aliasing memory nodes,
612 /// looking for aliasing nodes and adding them to the Aliases vector.
613 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
614 SmallVectorImpl<SDValue> &Aliases);
615
616 /// Return true if there is any possibility that the two addresses overlap.
617 bool isAlias(SDNode *Op0, SDNode *Op1) const;
618
619 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
620 /// chain (aliasing node.)
621 SDValue FindBetterChain(SDNode *N, SDValue Chain);
622
623 /// Try to replace a store and any possibly adjacent stores on
624 /// consecutive chains with better chains. Return true only if St is
625 /// replaced.
626 ///
627 /// Notice that other chains may still be replaced even if the function
628 /// returns false.
629 bool findBetterNeighborChains(StoreSDNode *St);
630
631 // Helper for findBetterNeighborChains. Walk up store chain add additional
632 // chained stores that do not overlap and can be parallelized.
633 bool parallelizeChainedStores(StoreSDNode *St);
634
635 /// Holds a pointer to an LSBaseSDNode as well as information on where it
636 /// is located in a sequence of memory operations connected by a chain.
637 struct MemOpLink {
638 // Ptr to the mem node.
639 LSBaseSDNode *MemNode;
640
641 // Offset from the base ptr.
642 int64_t OffsetFromBase;
643
644 MemOpLink(LSBaseSDNode *N, int64_t Offset)
645 : MemNode(N), OffsetFromBase(Offset) {}
646 };
647
648 // Classify the origin of a stored value.
649 enum class StoreSource { Unknown, Constant, Extract, Load };
650 StoreSource getStoreSource(SDValue StoreVal) {
651 switch (StoreVal.getOpcode()) {
652 case ISD::Constant:
653 case ISD::ConstantFP:
654 return StoreSource::Constant;
655 case ISD::EXTRACT_VECTOR_ELT:
656 case ISD::EXTRACT_SUBVECTOR:
657 return StoreSource::Extract;
658 case ISD::LOAD:
659 return StoreSource::Load;
660 default:
661 return StoreSource::Unknown;
662 }
663 }
664
665 /// This is a helper function for visitMUL to check the profitability
666 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
667 /// MulNode is the original multiply, AddNode is (add x, c1),
668 /// and ConstNode is c2.
669 bool isMulAddWithConstProfitable(SDNode *MulNode,
670 SDValue &AddNode,
671 SDValue &ConstNode);
672
673 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
674 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
675 /// the type of the loaded value to be extended.
676 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
677 EVT LoadResultTy, EVT &ExtVT);
678
679 /// Helper function to calculate whether the given Load/Store can have its
680 /// width reduced to ExtVT.
681 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
682 EVT &MemVT, unsigned ShAmt = 0);
683
684 /// Used by BackwardsPropagateMask to find suitable loads.
685 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
686 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
687 ConstantSDNode *Mask, SDNode *&NodeToMask);
688 /// Attempt to propagate a given AND node back to load leaves so that they
689 /// can be combined into narrow loads.
690 bool BackwardsPropagateMask(SDNode *N);
691
692 /// Helper function for mergeConsecutiveStores which merges the component
693 /// store chains.
694 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
695 unsigned NumStores);
696
697 /// This is a helper function for mergeConsecutiveStores. When the source
698 /// elements of the consecutive stores are all constants or all extracted
699 /// vector elements, try to merge them into one larger store introducing
700 /// bitcasts if necessary. \return True if a merged store was created.
701 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
702 EVT MemVT, unsigned NumStores,
703 bool IsConstantSrc, bool UseVector,
704 bool UseTrunc);
705
706 /// This is a helper function for mergeConsecutiveStores. Stores that
707 /// potentially may be merged with St are placed in StoreNodes. RootNode is
708 /// a chain predecessor to all store candidates.
709 void getStoreMergeCandidates(StoreSDNode *St,
710 SmallVectorImpl<MemOpLink> &StoreNodes,
711 SDNode *&Root);
712
713 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
714 /// have indirect dependency through their operands. RootNode is the
715 /// predecessor to all stores calculated by getStoreMergeCandidates and is
716 /// used to prune the dependency check. \return True if safe to merge.
717 bool checkMergeStoreCandidatesForDependencies(
718 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
719 SDNode *RootNode);
720
721 /// This is a helper function for mergeConsecutiveStores. Given a list of
722 /// store candidates, find the first N that are consecutive in memory.
723 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
724 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
725 int64_t ElementSizeBytes) const;
726
727 /// This is a helper function for mergeConsecutiveStores. It is used for
728 /// store chains that are composed entirely of constant values.
729 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
730 unsigned NumConsecutiveStores,
731 EVT MemVT, SDNode *Root, bool AllowVectors);
732
733 /// This is a helper function for mergeConsecutiveStores. It is used for
734 /// store chains that are composed entirely of extracted vector elements.
735 /// When extracting multiple vector elements, try to store them in one
736 /// vector store rather than a sequence of scalar stores.
737 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
738 unsigned NumConsecutiveStores, EVT MemVT,
739 SDNode *Root);
740
741 /// This is a helper function for mergeConsecutiveStores. It is used for
742 /// store chains that are composed entirely of loaded values.
743 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
744 unsigned NumConsecutiveStores, EVT MemVT,
745 SDNode *Root, bool AllowVectors,
746 bool IsNonTemporalStore, bool IsNonTemporalLoad);
747
748 /// Merge consecutive store operations into a wide store.
749 /// This optimization uses wide integers or vectors when possible.
750 /// \return true if stores were merged.
751 bool mergeConsecutiveStores(StoreSDNode *St);
752
753 /// Try to transform a truncation where C is a constant:
754 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
755 ///
756 /// \p N needs to be a truncation and its first operand an AND. Other
757 /// requirements are checked by the function (e.g. that trunc is
758 /// single-use) and if missed an empty SDValue is returned.
759 SDValue distributeTruncateThroughAnd(SDNode *N);
760
761 /// Helper function to determine whether the target supports operation
762 /// given by \p Opcode for type \p VT, that is, whether the operation
763 /// is legal or custom before legalizing operations, and whether is
764 /// legal (but not custom) after legalization.
765 bool hasOperation(unsigned Opcode, EVT VT) {
766 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
767 }
768
769 public:
770 /// Runs the dag combiner on all nodes in the work list
771 void Run(CombineLevel AtLevel);
772
773 SelectionDAG &getDAG() const { return DAG; }
774
775 /// Returns a type large enough to hold any valid shift amount - before type
776 /// legalization these can be huge.
777 EVT getShiftAmountTy(EVT LHSTy) {
778 assert(LHSTy.isInteger() && "Shift amount is not an integer type!")((LHSTy.isInteger() && "Shift amount is not an integer type!"
) ? static_cast<void> (0) : __assert_fail ("LHSTy.isInteger() && \"Shift amount is not an integer type!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 778, __PRETTY_FUNCTION__))
;
779 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
780 }
781
782 /// This method returns true if we are running before type legalization or
783 /// if the specified VT is legal.
784 bool isTypeLegal(const EVT &VT) {
785 if (!LegalTypes) return true;
786 return TLI.isTypeLegal(VT);
787 }
788
789 /// Convenience wrapper around TargetLowering::getSetCCResultType
790 EVT getSetCCResultType(EVT VT) const {
791 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
792 }
793
794 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
795 SDValue OrigLoad, SDValue ExtLoad,
796 ISD::NodeType ExtType);
797 };
798
799/// This class is a DAGUpdateListener that removes any deleted
800/// nodes from the worklist.
801class WorklistRemover : public SelectionDAG::DAGUpdateListener {
802 DAGCombiner &DC;
803
804public:
805 explicit WorklistRemover(DAGCombiner &dc)
806 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
807
808 void NodeDeleted(SDNode *N, SDNode *E) override {
809 DC.removeFromWorklist(N);
810 }
811};
812
813class WorklistInserter : public SelectionDAG::DAGUpdateListener {
814 DAGCombiner &DC;
815
816public:
817 explicit WorklistInserter(DAGCombiner &dc)
818 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
819
820 // FIXME: Ideally we could add N to the worklist, but this causes exponential
821 // compile time costs in large DAGs, e.g. Halide.
822 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
823};
824
825} // end anonymous namespace
826
827//===----------------------------------------------------------------------===//
828// TargetLowering::DAGCombinerInfo implementation
829//===----------------------------------------------------------------------===//
830
831void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
832 ((DAGCombiner*)DC)->AddToWorklist(N);
833}
834
835SDValue TargetLowering::DAGCombinerInfo::
836CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
837 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
838}
839
840SDValue TargetLowering::DAGCombinerInfo::
841CombineTo(SDNode *N, SDValue Res, bool AddTo) {
842 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
843}
844
845SDValue TargetLowering::DAGCombinerInfo::
846CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
847 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
848}
849
850bool TargetLowering::DAGCombinerInfo::
851recursivelyDeleteUnusedNodes(SDNode *N) {
852 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
853}
854
855void TargetLowering::DAGCombinerInfo::
856CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
857 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
858}
859
860//===----------------------------------------------------------------------===//
861// Helper Functions
862//===----------------------------------------------------------------------===//
863
864void DAGCombiner::deleteAndRecombine(SDNode *N) {
865 removeFromWorklist(N);
866
867 // If the operands of this node are only used by the node, they will now be
868 // dead. Make sure to re-visit them and recursively delete dead nodes.
869 for (const SDValue &Op : N->ops())
870 // For an operand generating multiple values, one of the values may
871 // become dead allowing further simplification (e.g. split index
872 // arithmetic from an indexed load).
873 if (Op->hasOneUse() || Op->getNumValues() > 1)
874 AddToWorklist(Op.getNode());
875
876 DAG.DeleteNode(N);
877}
878
879// APInts must be the same size for most operations, this helper
880// function zero extends the shorter of the pair so that they match.
881// We provide an Offset so that we can create bitwidths that won't overflow.
882static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
883 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
884 LHS = LHS.zextOrSelf(Bits);
885 RHS = RHS.zextOrSelf(Bits);
886}
887
888// Return true if this node is a setcc, or is a select_cc
889// that selects between the target values used for true and false, making it
890// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
891// the appropriate nodes based on the type of node we are checking. This
892// simplifies life a bit for the callers.
893bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
894 SDValue &CC, bool MatchStrict) const {
895 if (N.getOpcode() == ISD::SETCC) {
896 LHS = N.getOperand(0);
897 RHS = N.getOperand(1);
898 CC = N.getOperand(2);
899 return true;
900 }
901
902 if (MatchStrict &&
903 (N.getOpcode() == ISD::STRICT_FSETCC ||
904 N.getOpcode() == ISD::STRICT_FSETCCS)) {
905 LHS = N.getOperand(1);
906 RHS = N.getOperand(2);
907 CC = N.getOperand(3);
908 return true;
909 }
910
911 if (N.getOpcode() != ISD::SELECT_CC ||
912 !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
913 !TLI.isConstFalseVal(N.getOperand(3).getNode()))
914 return false;
915
916 if (TLI.getBooleanContents(N.getValueType()) ==
917 TargetLowering::UndefinedBooleanContent)
918 return false;
919
920 LHS = N.getOperand(0);
921 RHS = N.getOperand(1);
922 CC = N.getOperand(4);
923 return true;
924}
925
926/// Return true if this is a SetCC-equivalent operation with only one use.
927/// If this is true, it allows the users to invert the operation for free when
928/// it is profitable to do so.
929bool DAGCombiner::isOneUseSetCC(SDValue N) const {
930 SDValue N0, N1, N2;
931 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
932 return true;
933 return false;
934}
935
936static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
937 if (!ScalarTy.isSimple())
938 return false;
939
940 uint64_t MaskForTy = 0ULL;
941 switch (ScalarTy.getSimpleVT().SimpleTy) {
942 case MVT::i8:
943 MaskForTy = 0xFFULL;
944 break;
945 case MVT::i16:
946 MaskForTy = 0xFFFFULL;
947 break;
948 case MVT::i32:
949 MaskForTy = 0xFFFFFFFFULL;
950 break;
951 default:
952 return false;
953 break;
954 }
955
956 APInt Val;
957 if (ISD::isConstantSplatVector(N, Val))
958 return Val.getLimitedValue() == MaskForTy;
959
960 return false;
961}
962
963// Determines if it is a constant integer or a splat/build vector of constant
964// integers (and undefs).
965// Do not permit build vector implicit truncation.
966static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
967 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
968 return !(Const->isOpaque() && NoOpaques);
969 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
970 return false;
971 unsigned BitWidth = N.getScalarValueSizeInBits();
972 for (const SDValue &Op : N->op_values()) {
973 if (Op.isUndef())
974 continue;
975 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
976 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
977 (Const->isOpaque() && NoOpaques))
978 return false;
979 }
980 return true;
981}
982
983// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
984// undef's.
985static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
986 if (V.getOpcode() != ISD::BUILD_VECTOR)
987 return false;
988 return isConstantOrConstantVector(V, NoOpaques) ||
989 ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
990}
991
992// Determine if this an indexed load with an opaque target constant index.
993static bool canSplitIdx(LoadSDNode *LD) {
994 return MaySplitLoadIndex &&
995 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
996 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
997}
998
999bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1000 const SDLoc &DL,
1001 SDValue N0,
1002 SDValue N1) {
1003 // Currently this only tries to ensure we don't undo the GEP splits done by
1004 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1005 // we check if the following transformation would be problematic:
1006 // (load/store (add, (add, x, offset1), offset2)) ->
1007 // (load/store (add, x, offset1+offset2)).
1008
1009 if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1010 return false;
1011
1012 if (N0.hasOneUse())
1013 return false;
1014
1015 auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1016 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1017 if (!C1 || !C2)
1018 return false;
1019
1020 const APInt &C1APIntVal = C1->getAPIntValue();
1021 const APInt &C2APIntVal = C2->getAPIntValue();
1022 if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
1023 return false;
1024
1025 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1026 if (CombinedValueIntVal.getBitWidth() > 64)
1027 return false;
1028 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1029
1030 for (SDNode *Node : N0->uses()) {
1031 auto LoadStore = dyn_cast<MemSDNode>(Node);
1032 if (LoadStore) {
1033 // Is x[offset2] already not a legal addressing mode? If so then
1034 // reassociating the constants breaks nothing (we test offset2 because
1035 // that's the one we hope to fold into the load or store).
1036 TargetLoweringBase::AddrMode AM;
1037 AM.HasBaseReg = true;
1038 AM.BaseOffs = C2APIntVal.getSExtValue();
1039 EVT VT = LoadStore->getMemoryVT();
1040 unsigned AS = LoadStore->getAddressSpace();
1041 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1042 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1043 continue;
1044
1045 // Would x[offset1+offset2] still be a legal addressing mode?
1046 AM.BaseOffs = CombinedValue;
1047 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1048 return true;
1049 }
1050 }
1051
1052 return false;
1053}
1054
1055// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1056// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1057SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1058 SDValue N0, SDValue N1) {
1059 EVT VT = N0.getValueType();
1060
1061 if (N0.getOpcode() != Opc)
1062 return SDValue();
1063
1064 if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
1065 if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1066 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1067 if (SDValue OpNode =
1068 DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1}))
1069 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1070 return SDValue();
1071 }
1072 if (N0.hasOneUse()) {
1073 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1074 // iff (op x, c1) has one use
1075 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1076 if (!OpNode.getNode())
1077 return SDValue();
1078 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1079 }
1080 }
1081 return SDValue();
1082}
1083
1084// Try to reassociate commutative binops.
1085SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1086 SDValue N1, SDNodeFlags Flags) {
1087 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.")((TLI.isCommutativeBinOp(Opc) && "Operation not commutative."
) ? static_cast<void> (0) : __assert_fail ("TLI.isCommutativeBinOp(Opc) && \"Operation not commutative.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1087, __PRETTY_FUNCTION__))
;
1088
1089 // Floating-point reassociation is not allowed without loose FP math.
1090 if (N0.getValueType().isFloatingPoint() ||
1091 N1.getValueType().isFloatingPoint())
1092 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1093 return SDValue();
1094
1095 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1096 return Combined;
1097 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1098 return Combined;
1099 return SDValue();
1100}
1101
1102SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1103 bool AddTo) {
1104 assert(N->getNumValues() == NumTo && "Broken CombineTo call!")((N->getNumValues() == NumTo && "Broken CombineTo call!"
) ? static_cast<void> (0) : __assert_fail ("N->getNumValues() == NumTo && \"Broken CombineTo call!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1104, __PRETTY_FUNCTION__))
;
1105 ++NodesCombined;
1106 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.1 "; N->dump
(&DAG); dbgs() << "\nWith: "; To[0].getNode()->dump
(&DAG); dbgs() << " and " << NumTo - 1 <<
" other values\n"; } } while (false)
1107 To[0].getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.1 "; N->dump
(&DAG); dbgs() << "\nWith: "; To[0].getNode()->dump
(&DAG); dbgs() << " and " << NumTo - 1 <<
" other values\n"; } } while (false)
1108 dbgs() << " and " << NumTo - 1 << " other values\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.1 "; N->dump
(&DAG); dbgs() << "\nWith: "; To[0].getNode()->dump
(&DAG); dbgs() << " and " << NumTo - 1 <<
" other values\n"; } } while (false)
;
1109 for (unsigned i = 0, e = NumTo; i != e; ++i)
1110 assert((!To[i].getNode() ||(((!To[i].getNode() || N->getValueType(i) == To[i].getValueType
()) && "Cannot combine value to value of different type!"
) ? static_cast<void> (0) : __assert_fail ("(!To[i].getNode() || N->getValueType(i) == To[i].getValueType()) && \"Cannot combine value to value of different type!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1112, __PRETTY_FUNCTION__))
1111 N->getValueType(i) == To[i].getValueType()) &&(((!To[i].getNode() || N->getValueType(i) == To[i].getValueType
()) && "Cannot combine value to value of different type!"
) ? static_cast<void> (0) : __assert_fail ("(!To[i].getNode() || N->getValueType(i) == To[i].getValueType()) && \"Cannot combine value to value of different type!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1112, __PRETTY_FUNCTION__))
1112 "Cannot combine value to value of different type!")(((!To[i].getNode() || N->getValueType(i) == To[i].getValueType
()) && "Cannot combine value to value of different type!"
) ? static_cast<void> (0) : __assert_fail ("(!To[i].getNode() || N->getValueType(i) == To[i].getValueType()) && \"Cannot combine value to value of different type!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1112, __PRETTY_FUNCTION__))
;
1113
1114 WorklistRemover DeadNodes(*this);
1115 DAG.ReplaceAllUsesWith(N, To);
1116 if (AddTo) {
1117 // Push the new nodes and any users onto the worklist
1118 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1119 if (To[i].getNode()) {
1120 AddToWorklist(To[i].getNode());
1121 AddUsersToWorklist(To[i].getNode());
1122 }
1123 }
1124 }
1125
1126 // Finally, if the node is now dead, remove it from the graph. The node
1127 // may not be dead if the replacement process recursively simplified to
1128 // something else needing this node.
1129 if (N->use_empty())
1130 deleteAndRecombine(N);
1131 return SDValue(N, 0);
1132}
1133
1134void DAGCombiner::
1135CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1136 // Replace the old value with the new one.
1137 ++NodesCombined;
1138 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.2 "; TLO.Old.getNode
()->dump(&DAG); dbgs() << "\nWith: "; TLO.New.getNode
()->dump(&DAG); dbgs() << '\n'; } } while (false
)
1139 dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.2 "; TLO.Old.getNode
()->dump(&DAG); dbgs() << "\nWith: "; TLO.New.getNode
()->dump(&DAG); dbgs() << '\n'; } } while (false
)
1140 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.2 "; TLO.Old.getNode
()->dump(&DAG); dbgs() << "\nWith: "; TLO.New.getNode
()->dump(&DAG); dbgs() << '\n'; } } while (false
)
;
1141
1142 // Replace all uses. If any nodes become isomorphic to other nodes and
1143 // are deleted, make sure to remove them from our worklist.
1144 WorklistRemover DeadNodes(*this);
1145 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1146
1147 // Push the new node and any (possibly new) users onto the worklist.
1148 AddToWorklistWithUsers(TLO.New.getNode());
1149
1150 // Finally, if the node is now dead, remove it from the graph. The node
1151 // may not be dead if the replacement process recursively simplified to
1152 // something else needing this node.
1153 if (TLO.Old.getNode()->use_empty())
1154 deleteAndRecombine(TLO.Old.getNode());
1155}
1156
1157/// Check the specified integer node value to see if it can be simplified or if
1158/// things it uses can be simplified by bit propagation. If so, return true.
1159bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1160 const APInt &DemandedElts,
1161 bool AssumeSingleUse) {
1162 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1163 KnownBits Known;
1164 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1165 AssumeSingleUse))
1166 return false;
1167
1168 // Revisit the node.
1169 AddToWorklist(Op.getNode());
1170
1171 CommitTargetLoweringOpt(TLO);
1172 return true;
1173}
1174
1175/// Check the specified vector node value to see if it can be simplified or
1176/// if things it uses can be simplified as it only uses some of the elements.
1177/// If so, return true.
1178bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1179 const APInt &DemandedElts,
1180 bool AssumeSingleUse) {
1181 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1182 APInt KnownUndef, KnownZero;
1183 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1184 TLO, 0, AssumeSingleUse))
1185 return false;
1186
1187 // Revisit the node.
1188 AddToWorklist(Op.getNode());
1189
1190 CommitTargetLoweringOpt(TLO);
1191 return true;
1192}
1193
1194void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1195 SDLoc DL(Load);
1196 EVT VT = Load->getValueType(0);
1197 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1198
1199 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.9 "; Load->
dump(&DAG); dbgs() << "\nWith: "; Trunc.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
1200 Trunc.getNode()->dump(&DAG); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.9 "; Load->
dump(&DAG); dbgs() << "\nWith: "; Trunc.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
;
1201 WorklistRemover DeadNodes(*this);
1202 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1203 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1204 deleteAndRecombine(Load);
1205 AddToWorklist(Trunc.getNode());
1206}
1207
1208SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1209 Replace = false;
1210 SDLoc DL(Op);
1211 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1212 LoadSDNode *LD = cast<LoadSDNode>(Op);
1213 EVT MemVT = LD->getMemoryVT();
1214 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1215 : LD->getExtensionType();
1216 Replace = true;
1217 return DAG.getExtLoad(ExtType, DL, PVT,
1218 LD->getChain(), LD->getBasePtr(),
1219 MemVT, LD->getMemOperand());
1220 }
1221
1222 unsigned Opc = Op.getOpcode();
1223 switch (Opc) {
1224 default: break;
1225 case ISD::AssertSext:
1226 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1227 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1228 break;
1229 case ISD::AssertZext:
1230 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1231 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1232 break;
1233 case ISD::Constant: {
1234 unsigned ExtOpc =
1235 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1236 return DAG.getNode(ExtOpc, DL, PVT, Op);
1237 }
1238 }
1239
1240 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1241 return SDValue();
1242 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1243}
1244
1245SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1246 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1247 return SDValue();
1248 EVT OldVT = Op.getValueType();
1249 SDLoc DL(Op);
1250 bool Replace = false;
1251 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1252 if (!NewOp.getNode())
1253 return SDValue();
1254 AddToWorklist(NewOp.getNode());
1255
1256 if (Replace)
1257 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1258 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1259 DAG.getValueType(OldVT));
1260}
1261
1262SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1263 EVT OldVT = Op.getValueType();
1264 SDLoc DL(Op);
1265 bool Replace = false;
1266 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1267 if (!NewOp.getNode())
1268 return SDValue();
1269 AddToWorklist(NewOp.getNode());
1270
1271 if (Replace)
1272 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1273 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1274}
1275
1276/// Promote the specified integer binary operation if the target indicates it is
1277/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1278/// i32 since i16 instructions are longer.
1279SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1280 if (!LegalOperations)
1281 return SDValue();
1282
1283 EVT VT = Op.getValueType();
1284 if (VT.isVector() || !VT.isInteger())
1285 return SDValue();
1286
1287 // If operation type is 'undesirable', e.g. i16 on x86, consider
1288 // promoting it.
1289 unsigned Opc = Op.getOpcode();
1290 if (TLI.isTypeDesirableForOp(Opc, VT))
1291 return SDValue();
1292
1293 EVT PVT = VT;
1294 // Consult target whether it is a good idea to promote this operation and
1295 // what's the right type to promote it to.
1296 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1297 assert(PVT != VT && "Don't know what type to promote to!")((PVT != VT && "Don't know what type to promote to!")
? static_cast<void> (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1297, __PRETTY_FUNCTION__))
;
1298
1299 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; Op.getNode(
)->dump(&DAG); } } while (false)
;
1300
1301 bool Replace0 = false;
1302 SDValue N0 = Op.getOperand(0);
1303 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1304
1305 bool Replace1 = false;
1306 SDValue N1 = Op.getOperand(1);
1307 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1308 SDLoc DL(Op);
1309
1310 SDValue RV =
1311 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1312
1313 // We are always replacing N0/N1's use in N and only need additional
1314 // replacements if there are additional uses.
1315 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1316 // (SDValue) here because the node may reference multiple values
1317 // (for example, the chain value of a load node).
1318 Replace0 &= !N0->hasOneUse();
1319 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1320
1321 // Combine Op here so it is preserved past replacements.
1322 CombineTo(Op.getNode(), RV);
1323
1324 // If operands have a use ordering, make sure we deal with
1325 // predecessor first.
1326 if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1327 std::swap(N0, N1);
1328 std::swap(NN0, NN1);
1329 }
1330
1331 if (Replace0) {
1332 AddToWorklist(NN0.getNode());
1333 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1334 }
1335 if (Replace1) {
1336 AddToWorklist(NN1.getNode());
1337 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1338 }
1339 return Op;
1340 }
1341 return SDValue();
1342}
1343
1344/// Promote the specified integer shift operation if the target indicates it is
1345/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1346/// i32 since i16 instructions are longer.
1347SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1348 if (!LegalOperations)
1349 return SDValue();
1350
1351 EVT VT = Op.getValueType();
1352 if (VT.isVector() || !VT.isInteger())
1353 return SDValue();
1354
1355 // If operation type is 'undesirable', e.g. i16 on x86, consider
1356 // promoting it.
1357 unsigned Opc = Op.getOpcode();
1358 if (TLI.isTypeDesirableForOp(Opc, VT))
1359 return SDValue();
1360
1361 EVT PVT = VT;
1362 // Consult target whether it is a good idea to promote this operation and
1363 // what's the right type to promote it to.
1364 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1365 assert(PVT != VT && "Don't know what type to promote to!")((PVT != VT && "Don't know what type to promote to!")
? static_cast<void> (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1365, __PRETTY_FUNCTION__))
;
1366
1367 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; Op.getNode(
)->dump(&DAG); } } while (false)
;
1368
1369 bool Replace = false;
1370 SDValue N0 = Op.getOperand(0);
1371 SDValue N1 = Op.getOperand(1);
1372 if (Opc == ISD::SRA)
1373 N0 = SExtPromoteOperand(N0, PVT);
1374 else if (Opc == ISD::SRL)
1375 N0 = ZExtPromoteOperand(N0, PVT);
1376 else
1377 N0 = PromoteOperand(N0, PVT, Replace);
1378
1379 if (!N0.getNode())
1380 return SDValue();
1381
1382 SDLoc DL(Op);
1383 SDValue RV =
1384 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1385
1386 if (Replace)
1387 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1388
1389 // Deal with Op being deleted.
1390 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1391 return RV;
1392 }
1393 return SDValue();
1394}
1395
1396SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1397 if (!LegalOperations)
1398 return SDValue();
1399
1400 EVT VT = Op.getValueType();
1401 if (VT.isVector() || !VT.isInteger())
1402 return SDValue();
1403
1404 // If operation type is 'undesirable', e.g. i16 on x86, consider
1405 // promoting it.
1406 unsigned Opc = Op.getOpcode();
1407 if (TLI.isTypeDesirableForOp(Opc, VT))
1408 return SDValue();
1409
1410 EVT PVT = VT;
1411 // Consult target whether it is a good idea to promote this operation and
1412 // what's the right type to promote it to.
1413 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1414 assert(PVT != VT && "Don't know what type to promote to!")((PVT != VT && "Don't know what type to promote to!")
? static_cast<void> (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1414, __PRETTY_FUNCTION__))
;
1415 // fold (aext (aext x)) -> (aext x)
1416 // fold (aext (zext x)) -> (zext x)
1417 // fold (aext (sext x)) -> (sext x)
1418 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; Op.getNode(
)->dump(&DAG); } } while (false)
;
1419 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1420 }
1421 return SDValue();
1422}
1423
1424bool DAGCombiner::PromoteLoad(SDValue Op) {
1425 if (!LegalOperations)
1426 return false;
1427
1428 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1429 return false;
1430
1431 EVT VT = Op.getValueType();
1432 if (VT.isVector() || !VT.isInteger())
1433 return false;
1434
1435 // If operation type is 'undesirable', e.g. i16 on x86, consider
1436 // promoting it.
1437 unsigned Opc = Op.getOpcode();
1438 if (TLI.isTypeDesirableForOp(Opc, VT))
1439 return false;
1440
1441 EVT PVT = VT;
1442 // Consult target whether it is a good idea to promote this operation and
1443 // what's the right type to promote it to.
1444 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1445 assert(PVT != VT && "Don't know what type to promote to!")((PVT != VT && "Don't know what type to promote to!")
? static_cast<void> (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1445, __PRETTY_FUNCTION__))
;
1446
1447 SDLoc DL(Op);
1448 SDNode *N = Op.getNode();
1449 LoadSDNode *LD = cast<LoadSDNode>(N);
1450 EVT MemVT = LD->getMemoryVT();
1451 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1452 : LD->getExtensionType();
1453 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1454 LD->getChain(), LD->getBasePtr(),
1455 MemVT, LD->getMemOperand());
1456 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1457
1458 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; N->dump(
&DAG); dbgs() << "\nTo: "; Result.getNode()->dump
(&DAG); dbgs() << '\n'; } } while (false)
1459 Result.getNode()->dump(&DAG); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; N->dump(
&DAG); dbgs() << "\nTo: "; Result.getNode()->dump
(&DAG); dbgs() << '\n'; } } while (false)
;
1460 WorklistRemover DeadNodes(*this);
1461 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1462 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1463 deleteAndRecombine(N);
1464 AddToWorklist(Result.getNode());
1465 return true;
1466 }
1467 return false;
1468}
1469
1470/// Recursively delete a node which has no uses and any operands for
1471/// which it is the only use.
1472///
1473/// Note that this both deletes the nodes and removes them from the worklist.
1474/// It also adds any nodes who have had a user deleted to the worklist as they
1475/// may now have only one use and subject to other combines.
1476bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1477 if (!N->use_empty())
1478 return false;
1479
1480 SmallSetVector<SDNode *, 16> Nodes;
1481 Nodes.insert(N);
1482 do {
1483 N = Nodes.pop_back_val();
1484 if (!N)
1485 continue;
1486
1487 if (N->use_empty()) {
1488 for (const SDValue &ChildN : N->op_values())
1489 Nodes.insert(ChildN.getNode());
1490
1491 removeFromWorklist(N);
1492 DAG.DeleteNode(N);
1493 } else {
1494 AddToWorklist(N);
1495 }
1496 } while (!Nodes.empty());
1497 return true;
1498}
1499
1500//===----------------------------------------------------------------------===//
1501// Main DAG Combiner implementation
1502//===----------------------------------------------------------------------===//
1503
1504void DAGCombiner::Run(CombineLevel AtLevel) {
1505 // set the instance variables, so that the various visit routines may use it.
1506 Level = AtLevel;
1507 LegalDAG = Level >= AfterLegalizeDAG;
1508 LegalOperations = Level >= AfterLegalizeVectorOps;
1509 LegalTypes = Level >= AfterLegalizeTypes;
1510
1511 WorklistInserter AddNodes(*this);
1512
1513 // Add all the dag nodes to the worklist.
1514 for (SDNode &Node : DAG.allnodes())
1515 AddToWorklist(&Node);
1516
1517 // Create a dummy node (which is not added to allnodes), that adds a reference
1518 // to the root node, preventing it from being deleted, and tracking any
1519 // changes of the root.
1520 HandleSDNode Dummy(DAG.getRoot());
1521
1522 // While we have a valid worklist entry node, try to combine it.
1523 while (SDNode *N = getNextWorklistEntry()) {
1524 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1525 // N is deleted from the DAG, since they too may now be dead or may have a
1526 // reduced number of uses, allowing other xforms.
1527 if (recursivelyDeleteUnusedNodes(N))
1528 continue;
1529
1530 WorklistRemover DeadNodes(*this);
1531
1532 // If this combine is running after legalizing the DAG, re-legalize any
1533 // nodes pulled off the worklist.
1534 if (LegalDAG) {
1535 SmallSetVector<SDNode *, 16> UpdatedNodes;
1536 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1537
1538 for (SDNode *LN : UpdatedNodes)
1539 AddToWorklistWithUsers(LN);
1540
1541 if (!NIsValid)
1542 continue;
1543 }
1544
1545 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nCombining: "; N->dump
(&DAG); } } while (false)
;
1546
1547 // Add any operands of the new node which have not yet been combined to the
1548 // worklist as well. Because the worklist uniques things already, this
1549 // won't repeatedly process the same operand.
1550 CombinedNodes.insert(N);
1551 for (const SDValue &ChildN : N->op_values())
1552 if (!CombinedNodes.count(ChildN.getNode()))
1553 AddToWorklist(ChildN.getNode());
1554
1555 SDValue RV = combine(N);
1556
1557 if (!RV.getNode())
1558 continue;
1559
1560 ++NodesCombined;
1561
1562 // If we get back the same node we passed in, rather than a new node or
1563 // zero, we know that the node must have defined multiple values and
1564 // CombineTo was used. Since CombineTo takes care of the worklist
1565 // mechanics for us, we have no work to do in this case.
1566 if (RV.getNode() == N)
1567 continue;
1568
1569 assert(N->getOpcode() != ISD::DELETED_NODE &&((N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode
() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned new node!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1571, __PRETTY_FUNCTION__))
1570 RV.getOpcode() != ISD::DELETED_NODE &&((N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode
() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned new node!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1571, __PRETTY_FUNCTION__))
1571 "Node was deleted but visit returned new node!")((N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode
() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned new node!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1571, __PRETTY_FUNCTION__))
;
1572
1573 LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << " ... into: "; RV.getNode()
->dump(&DAG); } } while (false)
;
1574
1575 if (N->getNumValues() == RV.getNode()->getNumValues())
1576 DAG.ReplaceAllUsesWith(N, RV.getNode());
1577 else {
1578 assert(N->getValueType(0) == RV.getValueType() &&((N->getValueType(0) == RV.getValueType() && N->
getNumValues() == 1 && "Type mismatch") ? static_cast
<void> (0) : __assert_fail ("N->getValueType(0) == RV.getValueType() && N->getNumValues() == 1 && \"Type mismatch\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1579, __PRETTY_FUNCTION__))
1579 N->getNumValues() == 1 && "Type mismatch")((N->getValueType(0) == RV.getValueType() && N->
getNumValues() == 1 && "Type mismatch") ? static_cast
<void> (0) : __assert_fail ("N->getValueType(0) == RV.getValueType() && N->getNumValues() == 1 && \"Type mismatch\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1579, __PRETTY_FUNCTION__))
;
1580 DAG.ReplaceAllUsesWith(N, &RV);
1581 }
1582
1583 // Push the new node and any users onto the worklist. Omit this if the
1584 // new node is the EntryToken (e.g. if a store managed to get optimized
1585 // out), because re-visiting the EntryToken and its users will not uncover
1586 // any additional opportunities, but there may be a large number of such
1587 // users, potentially causing compile time explosion.
1588 if (RV.getOpcode() != ISD::EntryToken) {
1589 AddToWorklist(RV.getNode());
1590 AddUsersToWorklist(RV.getNode());
1591 }
1592
1593 // Finally, if the node is now dead, remove it from the graph. The node
1594 // may not be dead if the replacement process recursively simplified to
1595 // something else needing this node. This will also take care of adding any
1596 // operands which have lost a user to the worklist.
1597 recursivelyDeleteUnusedNodes(N);
1598 }
1599
1600 // If the root changed (e.g. it was a dead load, update the root).
1601 DAG.setRoot(Dummy.getValue());
1602 DAG.RemoveDeadNodes();
1603}
1604
1605SDValue DAGCombiner::visit(SDNode *N) {
1606 switch (N->getOpcode()) {
1607 default: break;
1608 case ISD::TokenFactor: return visitTokenFactor(N);
1609 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1610 case ISD::ADD: return visitADD(N);
1611 case ISD::SUB: return visitSUB(N);
1612 case ISD::SADDSAT:
1613 case ISD::UADDSAT: return visitADDSAT(N);
1614 case ISD::SSUBSAT:
1615 case ISD::USUBSAT: return visitSUBSAT(N);
1616 case ISD::ADDC: return visitADDC(N);
1617 case ISD::SADDO:
1618 case ISD::UADDO: return visitADDO(N);
1619 case ISD::SUBC: return visitSUBC(N);
1620 case ISD::SSUBO:
1621 case ISD::USUBO: return visitSUBO(N);
1622 case ISD::ADDE: return visitADDE(N);
1623 case ISD::ADDCARRY: return visitADDCARRY(N);
1624 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1625 case ISD::SUBE: return visitSUBE(N);
1626 case ISD::SUBCARRY: return visitSUBCARRY(N);
1627 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1628 case ISD::SMULFIX:
1629 case ISD::SMULFIXSAT:
1630 case ISD::UMULFIX:
1631 case ISD::UMULFIXSAT: return visitMULFIX(N);
1632 case ISD::MUL: return visitMUL(N);
1633 case ISD::SDIV: return visitSDIV(N);
1634 case ISD::UDIV: return visitUDIV(N);
1635 case ISD::SREM:
1636 case ISD::UREM: return visitREM(N);
1637 case ISD::MULHU: return visitMULHU(N);
1638 case ISD::MULHS: return visitMULHS(N);
1639 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1640 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1641 case ISD::SMULO:
1642 case ISD::UMULO: return visitMULO(N);
1643 case ISD::SMIN:
1644 case ISD::SMAX:
1645 case ISD::UMIN:
1646 case ISD::UMAX: return visitIMINMAX(N);
1647 case ISD::AND: return visitAND(N);
1648 case ISD::OR: return visitOR(N);
1649 case ISD::XOR: return visitXOR(N);
1650 case ISD::SHL: return visitSHL(N);
1651 case ISD::SRA: return visitSRA(N);
1652 case ISD::SRL: return visitSRL(N);
1653 case ISD::ROTR:
1654 case ISD::ROTL: return visitRotate(N);
1655 case ISD::FSHL:
1656 case ISD::FSHR: return visitFunnelShift(N);
1657 case ISD::ABS: return visitABS(N);
1658 case ISD::BSWAP: return visitBSWAP(N);
1659 case ISD::BITREVERSE: return visitBITREVERSE(N);
1660 case ISD::CTLZ: return visitCTLZ(N);
1661 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1662 case ISD::CTTZ: return visitCTTZ(N);
1663 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1664 case ISD::CTPOP: return visitCTPOP(N);
1665 case ISD::SELECT: return visitSELECT(N);
1666 case ISD::VSELECT: return visitVSELECT(N);
1667 case ISD::SELECT_CC: return visitSELECT_CC(N);
1668 case ISD::SETCC: return visitSETCC(N);
1669 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1670 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1671 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1672 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1673 case ISD::AssertSext:
1674 case ISD::AssertZext: return visitAssertExt(N);
1675 case ISD::AssertAlign: return visitAssertAlign(N);
1676 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1677 case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1678 case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1679 case ISD::TRUNCATE: return visitTRUNCATE(N);
1680 case ISD::BITCAST: return visitBITCAST(N);
1681 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1682 case ISD::FADD: return visitFADD(N);
1683 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1684 case ISD::FSUB: return visitFSUB(N);
1685 case ISD::FMUL: return visitFMUL(N);
1686 case ISD::FMA: return visitFMA(N);
1687 case ISD::FDIV: return visitFDIV(N);
1688 case ISD::FREM: return visitFREM(N);
1689 case ISD::FSQRT: return visitFSQRT(N);
1690 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1691 case ISD::FPOW: return visitFPOW(N);
1692 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1693 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1694 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1695 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1696 case ISD::FP_ROUND: return visitFP_ROUND(N);
1697 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1698 case ISD::FNEG: return visitFNEG(N);
1699 case ISD::FABS: return visitFABS(N);
1700 case ISD::FFLOOR: return visitFFLOOR(N);
1701 case ISD::FMINNUM: return visitFMINNUM(N);
1702 case ISD::FMAXNUM: return visitFMAXNUM(N);
1703 case ISD::FMINIMUM: return visitFMINIMUM(N);
1704 case ISD::FMAXIMUM: return visitFMAXIMUM(N);
1705 case ISD::FCEIL: return visitFCEIL(N);
1706 case ISD::FTRUNC: return visitFTRUNC(N);
1707 case ISD::BRCOND: return visitBRCOND(N);
1708 case ISD::BR_CC: return visitBR_CC(N);
1709 case ISD::LOAD: return visitLOAD(N);
1710 case ISD::STORE: return visitSTORE(N);
1711 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1712 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1713 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1714 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1715 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1716 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1717 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1718 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1719 case ISD::MGATHER: return visitMGATHER(N);
1720 case ISD::MLOAD: return visitMLOAD(N);
1721 case ISD::MSCATTER: return visitMSCATTER(N);
1722 case ISD::MSTORE: return visitMSTORE(N);
1723 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1724 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1725 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1726 case ISD::FREEZE: return visitFREEZE(N);
1727 case ISD::VECREDUCE_FADD:
1728 case ISD::VECREDUCE_FMUL:
1729 case ISD::VECREDUCE_ADD:
1730 case ISD::VECREDUCE_MUL:
1731 case ISD::VECREDUCE_AND:
1732 case ISD::VECREDUCE_OR:
1733 case ISD::VECREDUCE_XOR:
1734 case ISD::VECREDUCE_SMAX:
1735 case ISD::VECREDUCE_SMIN:
1736 case ISD::VECREDUCE_UMAX:
1737 case ISD::VECREDUCE_UMIN:
1738 case ISD::VECREDUCE_FMAX:
1739 case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
1740 }
1741 return SDValue();
1742}
1743
1744SDValue DAGCombiner::combine(SDNode *N) {
1745 SDValue RV;
1746 if (!DisableGenericCombines)
1747 RV = visit(N);
1748
1749 // If nothing happened, try a target-specific DAG combine.
1750 if (!RV.getNode()) {
1751 assert(N->getOpcode() != ISD::DELETED_NODE &&((N->getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned NULL!"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned NULL!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1752, __PRETTY_FUNCTION__))
1752 "Node was deleted but visit returned NULL!")((N->getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned NULL!"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned NULL!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1752, __PRETTY_FUNCTION__))
;
1753
1754 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1755 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1756
1757 // Expose the DAG combiner to the target combiner impls.
1758 TargetLowering::DAGCombinerInfo
1759 DagCombineInfo(DAG, Level, false, this);
1760
1761 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1762 }
1763 }
1764
1765 // If nothing happened still, try promoting the operation.
1766 if (!RV.getNode()) {
1767 switch (N->getOpcode()) {
1768 default: break;
1769 case ISD::ADD:
1770 case ISD::SUB:
1771 case ISD::MUL:
1772 case ISD::AND:
1773 case ISD::OR:
1774 case ISD::XOR:
1775 RV = PromoteIntBinOp(SDValue(N, 0));
1776 break;
1777 case ISD::SHL:
1778 case ISD::SRA:
1779 case ISD::SRL:
1780 RV = PromoteIntShiftOp(SDValue(N, 0));
1781 break;
1782 case ISD::SIGN_EXTEND:
1783 case ISD::ZERO_EXTEND:
1784 case ISD::ANY_EXTEND:
1785 RV = PromoteExtend(SDValue(N, 0));
1786 break;
1787 case ISD::LOAD:
1788 if (PromoteLoad(SDValue(N, 0)))
1789 RV = SDValue(N, 0);
1790 break;
1791 }
1792 }
1793
1794 // If N is a commutative binary node, try to eliminate it if the commuted
1795 // version is already present in the DAG.
1796 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1797 N->getNumValues() == 1) {
1798 SDValue N0 = N->getOperand(0);
1799 SDValue N1 = N->getOperand(1);
1800
1801 // Constant operands are canonicalized to RHS.
1802 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1803 SDValue Ops[] = {N1, N0};
1804 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1805 N->getFlags());
1806 if (CSENode)
1807 return SDValue(CSENode, 0);
1808 }
1809 }
1810
1811 return RV;
1812}
1813
1814/// Given a node, return its input chain if it has one, otherwise return a null
1815/// sd operand.
1816static SDValue getInputChainForNode(SDNode *N) {
1817 if (unsigned NumOps = N->getNumOperands()) {
1818 if (N->getOperand(0).getValueType() == MVT::Other)
1819 return N->getOperand(0);
1820 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1821 return N->getOperand(NumOps-1);
1822 for (unsigned i = 1; i < NumOps-1; ++i)
1823 if (N->getOperand(i).getValueType() == MVT::Other)
1824 return N->getOperand(i);
1825 }
1826 return SDValue();
1827}
1828
1829SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1830 // If N has two operands, where one has an input chain equal to the other,
1831 // the 'other' chain is redundant.
1832 if (N->getNumOperands() == 2) {
1833 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1834 return N->getOperand(0);
1835 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1836 return N->getOperand(1);
1837 }
1838
1839 // Don't simplify token factors if optnone.
1840 if (OptLevel == CodeGenOpt::None)
1841 return SDValue();
1842
1843 // Don't simplify the token factor if the node itself has too many operands.
1844 if (N->getNumOperands() > TokenFactorInlineLimit)
1845 return SDValue();
1846
1847 // If the sole user is a token factor, we should make sure we have a
1848 // chance to merge them together. This prevents TF chains from inhibiting
1849 // optimizations.
1850 if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1851 AddToWorklist(*(N->use_begin()));
1852
1853 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1854 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1855 SmallPtrSet<SDNode*, 16> SeenOps;
1856 bool Changed = false; // If we should replace this token factor.
1857
1858 // Start out with this token factor.
1859 TFs.push_back(N);
1860
1861 // Iterate through token factors. The TFs grows when new token factors are
1862 // encountered.
1863 for (unsigned i = 0; i < TFs.size(); ++i) {
1864 // Limit number of nodes to inline, to avoid quadratic compile times.
1865 // We have to add the outstanding Token Factors to Ops, otherwise we might
1866 // drop Ops from the resulting Token Factors.
1867 if (Ops.size() > TokenFactorInlineLimit) {
1868 for (unsigned j = i; j < TFs.size(); j++)
1869 Ops.emplace_back(TFs[j], 0);
1870 // Drop unprocessed Token Factors from TFs, so we do not add them to the
1871 // combiner worklist later.
1872 TFs.resize(i);
1873 break;
1874 }
1875
1876 SDNode *TF = TFs[i];
1877 // Check each of the operands.
1878 for (const SDValue &Op : TF->op_values()) {
1879 switch (Op.getOpcode()) {
1880 case ISD::EntryToken:
1881 // Entry tokens don't need to be added to the list. They are
1882 // redundant.
1883 Changed = true;
1884 break;
1885
1886 case ISD::TokenFactor:
1887 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1888 // Queue up for processing.
1889 TFs.push_back(Op.getNode());
1890 Changed = true;
1891 break;
1892 }
1893 LLVM_FALLTHROUGH[[gnu::fallthrough]];
1894
1895 default:
1896 // Only add if it isn't already in the list.
1897 if (SeenOps.insert(Op.getNode()).second)
1898 Ops.push_back(Op);
1899 else
1900 Changed = true;
1901 break;
1902 }
1903 }
1904 }
1905
1906 // Re-visit inlined Token Factors, to clean them up in case they have been
1907 // removed. Skip the first Token Factor, as this is the current node.
1908 for (unsigned i = 1, e = TFs.size(); i < e; i++)
1909 AddToWorklist(TFs[i]);
1910
1911 // Remove Nodes that are chained to another node in the list. Do so
1912 // by walking up chains breath-first stopping when we've seen
1913 // another operand. In general we must climb to the EntryNode, but we can exit
1914 // early if we find all remaining work is associated with just one operand as
1915 // no further pruning is possible.
1916
1917 // List of nodes to search through and original Ops from which they originate.
1918 SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1919 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1920 SmallPtrSet<SDNode *, 16> SeenChains;
1921 bool DidPruneOps = false;
1922
1923 unsigned NumLeftToConsider = 0;
1924 for (const SDValue &Op : Ops) {
1925 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1926 OpWorkCount.push_back(1);
1927 }
1928
1929 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1930 // If this is an Op, we can remove the op from the list. Remark any
1931 // search associated with it as from the current OpNumber.
1932 if (SeenOps.contains(Op)) {
1933 Changed = true;
1934 DidPruneOps = true;
1935 unsigned OrigOpNumber = 0;
1936 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1937 OrigOpNumber++;
1938 assert((OrigOpNumber != Ops.size()) &&(((OrigOpNumber != Ops.size()) && "expected to find TokenFactor Operand"
) ? static_cast<void> (0) : __assert_fail ("(OrigOpNumber != Ops.size()) && \"expected to find TokenFactor Operand\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1939, __PRETTY_FUNCTION__))
1939 "expected to find TokenFactor Operand")(((OrigOpNumber != Ops.size()) && "expected to find TokenFactor Operand"
) ? static_cast<void> (0) : __assert_fail ("(OrigOpNumber != Ops.size()) && \"expected to find TokenFactor Operand\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1939, __PRETTY_FUNCTION__))
;
1940 // Re-mark worklist from OrigOpNumber to OpNumber
1941 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1942 if (Worklist[i].second == OrigOpNumber) {
1943 Worklist[i].second = OpNumber;
1944 }
1945 }
1946 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1947 OpWorkCount[OrigOpNumber] = 0;
1948 NumLeftToConsider--;
1949 }
1950 // Add if it's a new chain
1951 if (SeenChains.insert(Op).second) {
1952 OpWorkCount[OpNumber]++;
1953 Worklist.push_back(std::make_pair(Op, OpNumber));
1954 }
1955 };
1956
1957 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1958 // We need at least be consider at least 2 Ops to prune.
1959 if (NumLeftToConsider <= 1)
1960 break;
1961 auto CurNode = Worklist[i].first;
1962 auto CurOpNumber = Worklist[i].second;
1963 assert((OpWorkCount[CurOpNumber] > 0) &&(((OpWorkCount[CurOpNumber] > 0) && "Node should not appear in worklist"
) ? static_cast<void> (0) : __assert_fail ("(OpWorkCount[CurOpNumber] > 0) && \"Node should not appear in worklist\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1964, __PRETTY_FUNCTION__))
1964 "Node should not appear in worklist")(((OpWorkCount[CurOpNumber] > 0) && "Node should not appear in worklist"
) ? static_cast<void> (0) : __assert_fail ("(OpWorkCount[CurOpNumber] > 0) && \"Node should not appear in worklist\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1964, __PRETTY_FUNCTION__))
;
1965 switch (CurNode->getOpcode()) {
1966 case ISD::EntryToken:
1967 // Hitting EntryToken is the only way for the search to terminate without
1968 // hitting
1969 // another operand's search. Prevent us from marking this operand
1970 // considered.
1971 NumLeftToConsider++;
1972 break;
1973 case ISD::TokenFactor:
1974 for (const SDValue &Op : CurNode->op_values())
1975 AddToWorklist(i, Op.getNode(), CurOpNumber);
1976 break;
1977 case ISD::LIFETIME_START:
1978 case ISD::LIFETIME_END:
1979 case ISD::CopyFromReg:
1980 case ISD::CopyToReg:
1981 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1982 break;
1983 default:
1984 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1985 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1986 break;
1987 }
1988 OpWorkCount[CurOpNumber]--;
1989 if (OpWorkCount[CurOpNumber] == 0)
1990 NumLeftToConsider--;
1991 }
1992
1993 // If we've changed things around then replace token factor.
1994 if (Changed) {
1995 SDValue Result;
1996 if (Ops.empty()) {
1997 // The entry token is the only possible outcome.
1998 Result = DAG.getEntryNode();
1999 } else {
2000 if (DidPruneOps) {
2001 SmallVector<SDValue, 8> PrunedOps;
2002 //
2003 for (const SDValue &Op : Ops) {
2004 if (SeenChains.count(Op.getNode()) == 0)
2005 PrunedOps.push_back(Op);
2006 }
2007 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2008 } else {
2009 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2010 }
2011 }
2012 return Result;
2013 }
2014 return SDValue();
2015}
2016
2017/// MERGE_VALUES can always be eliminated.
2018SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2019 WorklistRemover DeadNodes(*this);
2020 // Replacing results may cause a different MERGE_VALUES to suddenly
2021 // be CSE'd with N, and carry its uses with it. Iterate until no
2022 // uses remain, to ensure that the node can be safely deleted.
2023 // First add the users of this node to the work list so that they
2024 // can be tried again once they have new operands.
2025 AddUsersToWorklist(N);
2026 do {
2027 // Do as a single replacement to avoid rewalking use lists.
2028 SmallVector<SDValue, 8> Ops;
2029 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2030 Ops.push_back(N->getOperand(i));
2031 DAG.ReplaceAllUsesWith(N, Ops.data());
2032 } while (!N->use_empty());
2033 deleteAndRecombine(N);
2034 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2035}
2036
2037/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2038/// ConstantSDNode pointer else nullptr.
2039static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2040 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2041 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2042}
2043
2044/// Return true if 'Use' is a load or a store that uses N as its base pointer
2045/// and that N may be folded in the load / store addressing mode.
2046static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
2047 const TargetLowering &TLI) {
2048 EVT VT;
2049 unsigned AS;
2050
2051 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2052 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2053 return false;
2054 VT = LD->getMemoryVT();
2055 AS = LD->getAddressSpace();
2056 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2057 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2058 return false;
2059 VT = ST->getMemoryVT();
2060 AS = ST->getAddressSpace();
2061 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2062 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2063 return false;
2064 VT = LD->getMemoryVT();
2065 AS = LD->getAddressSpace();
2066 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2067 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2068 return false;
2069 VT = ST->getMemoryVT();
2070 AS = ST->getAddressSpace();
2071 } else
2072 return false;
2073
2074 TargetLowering::AddrMode AM;
2075 if (N->getOpcode() == ISD::ADD) {
2076 AM.HasBaseReg = true;
2077 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2078 if (Offset)
2079 // [reg +/- imm]
2080 AM.BaseOffs = Offset->getSExtValue();
2081 else
2082 // [reg +/- reg]
2083 AM.Scale = 1;
2084 } else if (N->getOpcode() == ISD::SUB) {
2085 AM.HasBaseReg = true;
2086 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2087 if (Offset)
2088 // [reg +/- imm]
2089 AM.BaseOffs = -Offset->getSExtValue();
2090 else
2091 // [reg +/- reg]
2092 AM.Scale = 1;
2093 } else
2094 return false;
2095
2096 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2097 VT.getTypeForEVT(*DAG.getContext()), AS);
2098}
2099
2100SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2101 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&((TLI.isBinOp(BO->getOpcode()) && BO->getNumValues
() == 1 && "Unexpected binary operator") ? static_cast
<void> (0) : __assert_fail ("TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 && \"Unexpected binary operator\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2102, __PRETTY_FUNCTION__))
2102 "Unexpected binary operator")((TLI.isBinOp(BO->getOpcode()) && BO->getNumValues
() == 1 && "Unexpected binary operator") ? static_cast
<void> (0) : __assert_fail ("TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 && \"Unexpected binary operator\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2102, __PRETTY_FUNCTION__))
;
2103
2104 // Don't do this unless the old select is going away. We want to eliminate the
2105 // binary operator, not replace a binop with a select.
2106 // TODO: Handle ISD::SELECT_CC.
2107 unsigned SelOpNo = 0;
2108 SDValue Sel = BO->getOperand(0);
2109 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2110 SelOpNo = 1;
2111 Sel = BO->getOperand(1);
2112 }
2113
2114 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2115 return SDValue();
2116
2117 SDValue CT = Sel.getOperand(1);
2118 if (!isConstantOrConstantVector(CT, true) &&
2119 !DAG.isConstantFPBuildVectorOrConstantFP(CT))
2120 return SDValue();
2121
2122 SDValue CF = Sel.getOperand(2);
2123 if (!isConstantOrConstantVector(CF, true) &&
2124 !DAG.isConstantFPBuildVectorOrConstantFP(CF))
2125 return SDValue();
2126
2127 // Bail out if any constants are opaque because we can't constant fold those.
2128 // The exception is "and" and "or" with either 0 or -1 in which case we can
2129 // propagate non constant operands into select. I.e.:
2130 // and (select Cond, 0, -1), X --> select Cond, 0, X
2131 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2132 auto BinOpcode = BO->getOpcode();
2133 bool CanFoldNonConst =
2134 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2135 (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
2136 (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
2137
2138 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2139 if (!CanFoldNonConst &&
2140 !isConstantOrConstantVector(CBO, true) &&
2141 !DAG.isConstantFPBuildVectorOrConstantFP(CBO))
2142 return SDValue();
2143
2144 EVT VT = BO->getValueType(0);
2145
2146 // We have a select-of-constants followed by a binary operator with a
2147 // constant. Eliminate the binop by pulling the constant math into the select.
2148 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2149 SDLoc DL(Sel);
2150 SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2151 : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2152 if (!CanFoldNonConst && !NewCT.isUndef() &&
2153 !isConstantOrConstantVector(NewCT, true) &&
2154 !DAG.isConstantFPBuildVectorOrConstantFP(NewCT))
2155 return SDValue();
2156
2157 SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2158 : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2159 if (!CanFoldNonConst && !NewCF.isUndef() &&
2160 !isConstantOrConstantVector(NewCF, true) &&
2161 !DAG.isConstantFPBuildVectorOrConstantFP(NewCF))
2162 return SDValue();
2163
2164 SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2165 SelectOp->setFlags(BO->getFlags());
2166 return SelectOp;
2167}
2168
2169static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2170 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&(((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::
SUB) && "Expecting add or sub") ? static_cast<void
> (0) : __assert_fail ("(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && \"Expecting add or sub\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2171, __PRETTY_FUNCTION__))
2171 "Expecting add or sub")(((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::
SUB) && "Expecting add or sub") ? static_cast<void
> (0) : __assert_fail ("(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && \"Expecting add or sub\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2171, __PRETTY_FUNCTION__))
;
2172
2173 // Match a constant operand and a zext operand for the math instruction:
2174 // add Z, C
2175 // sub C, Z
2176 bool IsAdd = N->getOpcode() == ISD::ADD;
2177 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2178 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2179 auto *CN = dyn_cast<ConstantSDNode>(C);
2180 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2181 return SDValue();
2182
2183 // Match the zext operand as a setcc of a boolean.
2184 if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2185 Z.getOperand(0).getValueType() != MVT::i1)
2186 return SDValue();
2187
2188 // Match the compare as: setcc (X & 1), 0, eq.
2189 SDValue SetCC = Z.getOperand(0);
2190 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2191 if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2192 SetCC.getOperand(0).getOpcode() != ISD::AND ||
2193 !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2194 return SDValue();
2195
2196 // We are adding/subtracting a constant and an inverted low bit. Turn that
2197 // into a subtract/add of the low bit with incremented/decremented constant:
2198 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2199 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2200 EVT VT = C.getValueType();
2201 SDLoc DL(N);
2202 SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2203 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2204 DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2205 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2206}
2207
2208/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2209/// a shift and add with a different constant.
2210static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2211 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&(((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::
SUB) && "Expecting add or sub") ? static_cast<void
> (0) : __assert_fail ("(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && \"Expecting add or sub\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2212, __PRETTY_FUNCTION__))
2212 "Expecting add or sub")(((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::
SUB) && "Expecting add or sub") ? static_cast<void
> (0) : __assert_fail ("(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && \"Expecting add or sub\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2212, __PRETTY_FUNCTION__))
;
2213
2214 // We need a constant operand for the add/sub, and the other operand is a
2215 // logical shift right: add (srl), C or sub C, (srl).
2216 bool IsAdd = N->getOpcode() == ISD::ADD;
2217 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2218 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2219 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2220 ShiftOp.getOpcode() != ISD::SRL)
2221 return SDValue();
2222
2223 // The shift must be of a 'not' value.
2224 SDValue Not = ShiftOp.getOperand(0);
2225 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2226 return SDValue();
2227
2228 // The shift must be moving the sign bit to the least-significant-bit.
2229 EVT VT = ShiftOp.getValueType();
2230 SDValue ShAmt = ShiftOp.getOperand(1);
2231 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2232 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2233 return SDValue();
2234
2235 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2236 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2237 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2238 SDLoc DL(N);
2239 auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2240 SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2241 if (SDValue NewC =
2242 DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2243 {ConstantOp, DAG.getConstant(1, DL, VT)}))
2244 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2245 return SDValue();
2246}
2247
2248/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2249/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2250/// are no common bits set in the operands).
2251SDValue DAGCombiner::visitADDLike(SDNode *N) {
2252 SDValue N0 = N->getOperand(0);
2253 SDValue N1 = N->getOperand(1);
2254 EVT VT = N0.getValueType();
2255 SDLoc DL(N);
2256
2257 // fold vector ops
2258 if (VT.isVector()) {
2259 if (SDValue FoldedVOp = SimplifyVBinOp(N))
2260 return FoldedVOp;
2261
2262 // fold (add x, 0) -> x, vector edition
2263 if (ISD::isBuildVectorAllZeros(N1.getNode()))
2264 return N0;
2265 if (ISD::isBuildVectorAllZeros(N0.getNode()))
2266 return N1;
2267 }
2268
2269 // fold (add x, undef) -> undef
2270 if (N0.isUndef())
2271 return N0;
2272
2273 if (N1.isUndef())
2274 return N1;
2275
2276 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2277 // canonicalize constant to RHS
2278 if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2279 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2280 // fold (add c1, c2) -> c1+c2
2281 return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1});
2282 }
2283
2284 // fold (add x, 0) -> x
2285 if (isNullConstant(N1))
2286 return N0;
2287
2288 if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2289 // fold ((A-c1)+c2) -> (A+(c2-c1))
2290 if (N0.getOpcode() == ISD::SUB &&
2291 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2292 SDValue Sub =
2293 DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
2294 assert(Sub && "Constant folding failed")((Sub && "Constant folding failed") ? static_cast<
void> (0) : __assert_fail ("Sub && \"Constant folding failed\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2294, __PRETTY_FUNCTION__))
;
2295 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2296 }
2297
2298 // fold ((c1-A)+c2) -> (c1+c2)-A
2299 if (N0.getOpcode() == ISD::SUB &&
2300 isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2301 SDValue Add =
2302 DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
2303 assert(Add && "Constant folding failed")((Add && "Constant folding failed") ? static_cast<
void> (0) : __assert_fail ("Add && \"Constant folding failed\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2303, __PRETTY_FUNCTION__))
;
2304 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2305 }
2306
2307 // add (sext i1 X), 1 -> zext (not i1 X)
2308 // We don't transform this pattern:
2309 // add (zext i1 X), -1 -> sext (not i1 X)
2310 // because most (?) targets generate better code for the zext form.
2311 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2312 isOneOrOneSplat(N1)) {
2313 SDValue X = N0.getOperand(0);
2314 if ((!LegalOperations ||
2315 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2316 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2317 X.getScalarValueSizeInBits() == 1) {
2318 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2319 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2320 }
2321 }
2322
2323 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
2324 // equivalent to (add x, c0).
2325 if (N0.getOpcode() == ISD::OR &&
2326 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2327 DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2328 if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT,
2329 {N1, N0.getOperand(1)}))
2330 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2331 }
2332 }
2333
2334 if (SDValue NewSel = foldBinOpIntoSelect(N))
2335 return NewSel;
2336
2337 // reassociate add
2338 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2339 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2340 return RADD;
2341 }
2342 // fold ((0-A) + B) -> B-A
2343 if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2344 return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2345
2346 // fold (A + (0-B)) -> A-B
2347 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2348 return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2349
2350 // fold (A+(B-A)) -> B
2351 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2352 return N1.getOperand(0);
2353
2354 // fold ((B-A)+A) -> B
2355 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2356 return N0.getOperand(0);
2357
2358 // fold ((A-B)+(C-A)) -> (C-B)
2359 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2360 N0.getOperand(0) == N1.getOperand(1))
2361 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2362 N0.getOperand(1));
2363
2364 // fold ((A-B)+(B-C)) -> (A-C)
2365 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2366 N0.getOperand(1) == N1.getOperand(0))
2367 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2368 N1.getOperand(1));
2369
2370 // fold (A+(B-(A+C))) to (B-C)
2371 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2372 N0 == N1.getOperand(1).getOperand(0))
2373 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2374 N1.getOperand(1).getOperand(1));
2375
2376 // fold (A+(B-(C+A))) to (B-C)
2377 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2378 N0 == N1.getOperand(1).getOperand(1))
2379 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2380 N1.getOperand(1).getOperand(0));
2381
2382 // fold (A+((B-A)+or-C)) to (B+or-C)
2383 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2384 N1.getOperand(0).getOpcode() == ISD::SUB &&
2385 N0 == N1.getOperand(0).getOperand(1))
2386 return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2387 N1.getOperand(1));
2388
2389 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2390 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2391 SDValue N00 = N0.getOperand(0);
2392 SDValue N01 = N0.getOperand(1);
2393 SDValue N10 = N1.getOperand(0);
2394 SDValue N11 = N1.getOperand(1);
2395
2396 if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2397 return DAG.getNode(ISD::SUB, DL, VT,
2398 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2399 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2400 }
2401
2402 // fold (add (umax X, C), -C) --> (usubsat X, C)
2403 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2404 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2405 return (!Max && !Op) ||
2406 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2407 };
2408 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2409 /*AllowUndefs*/ true))
2410 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2411 N0.getOperand(1));
2412 }
2413
2414 if (SimplifyDemandedBits(SDValue(N, 0)))
2415 return SDValue(N, 0);
2416
2417 if (isOneOrOneSplat(N1)) {
2418 // fold (add (xor a, -1), 1) -> (sub 0, a)
2419 if (isBitwiseNot(N0))
2420 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2421 N0.getOperand(0));
2422
2423 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2424 if (N0.getOpcode() == ISD::ADD ||
2425 N0.getOpcode() == ISD::UADDO ||
2426 N0.getOpcode() == ISD::SADDO) {
2427 SDValue A, Xor;
2428
2429 if (isBitwiseNot(N0.getOperand(0))) {
2430 A = N0.getOperand(1);
2431 Xor = N0.getOperand(0);
2432 } else if (isBitwiseNot(N0.getOperand(1))) {
2433 A = N0.getOperand(0);
2434 Xor = N0.getOperand(1);
2435 }
2436
2437 if (Xor)
2438 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2439 }
2440
2441 // Look for:
2442 // add (add x, y), 1
2443 // And if the target does not like this form then turn into:
2444 // sub y, (xor x, -1)
2445 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2446 N0.getOpcode() == ISD::ADD) {
2447 SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2448 DAG.getAllOnesConstant(DL, VT));
2449 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2450 }
2451 }
2452
2453 // (x - y) + -1 -> add (xor y, -1), x
2454 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2455 isAllOnesOrAllOnesSplat(N1)) {
2456 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2457 return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2458 }
2459
2460 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2461 return Combined;
2462
2463 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2464 return Combined;
2465
2466 return SDValue();
2467}
2468
2469SDValue DAGCombiner::visitADD(SDNode *N) {
2470 SDValue N0 = N->getOperand(0);
2471 SDValue N1 = N->getOperand(1);
2472 EVT VT = N0.getValueType();
2473 SDLoc DL(N);
2474
2475 if (SDValue Combined = visitADDLike(N))
2476 return Combined;
2477
2478 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2479 return V;
2480
2481 if (SDValue V = foldAddSubOfSignBit(N, DAG))
2482 return V;
2483
2484 // fold (a+b) -> (a|b) iff a and b share no bits.
2485 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2486 DAG.haveNoCommonBitsSet(N0, N1))
2487 return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2488
2489 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2490 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2491 const APInt &C0 = N0->getConstantOperandAPInt(0);
2492 const APInt &C1 = N1->getConstantOperandAPInt(0);
2493 return DAG.getVScale(DL, VT, C0 + C1);
2494 }
2495
2496 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2497 if ((N0.getOpcode() == ISD::ADD) &&
2498 (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
2499 (N1.getOpcode() == ISD::VSCALE)) {
2500 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2501 const APInt &VS1 = N1->getConstantOperandAPInt(0);
2502 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2503 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2504 }
2505
2506 return SDValue();
2507}
2508
2509SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2510 unsigned Opcode = N->getOpcode();
2511 SDValue N0 = N->getOperand(0);
2512 SDValue N1 = N->getOperand(1);
2513 EVT VT = N0.getValueType();
2514 SDLoc DL(N);
2515
2516 // fold vector ops
2517 if (VT.isVector()) {
2518 // TODO SimplifyVBinOp
2519
2520 // fold (add_sat x, 0) -> x, vector edition
2521 if (ISD::isBuildVectorAllZeros(N1.getNode()))
2522 return N0;
2523 if (ISD::isBuildVectorAllZeros(N0.getNode()))
2524 return N1;
2525 }
2526
2527 // fold (add_sat x, undef) -> -1
2528 if (N0.isUndef() || N1.isUndef())
2529 return DAG.getAllOnesConstant(DL, VT);
2530
2531 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2532 // canonicalize constant to RHS
2533 if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2534 return DAG.getNode(Opcode, DL, VT, N1, N0);
2535 // fold (add_sat c1, c2) -> c3
2536 return DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1});
2537 }
2538
2539 // fold (add_sat x, 0) -> x
2540 if (isNullConstant(N1))
2541 return N0;
2542
2543 // If it cannot overflow, transform into an add.
2544 if (Opcode == ISD::UADDSAT)
2545 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2546 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2547
2548 return SDValue();
2549}
2550
2551static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2552 bool Masked = false;
2553
2554 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2555 while (true) {
2556 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2557 V = V.getOperand(0);
2558 continue;
2559 }
2560
2561 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2562 Masked = true;
2563 V = V.getOperand(0);
2564 continue;
2565 }
2566
2567 break;
2568 }
2569
2570 // If this is not a carry, return.
2571 if (V.getResNo() != 1)
2572 return SDValue();
2573
2574 if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2575 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2576 return SDValue();
2577
2578 EVT VT = V.getNode()->getValueType(0);
2579 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2580 return SDValue();
2581
2582 // If the result is masked, then no matter what kind of bool it is we can
2583 // return. If it isn't, then we need to make sure the bool type is either 0 or
2584 // 1 and not other values.
2585 if (Masked ||
2586 TLI.getBooleanContents(V.getValueType()) ==
2587 TargetLoweringBase::ZeroOrOneBooleanContent)
2588 return V;
2589
2590 return SDValue();
2591}
2592
2593/// Given the operands of an add/sub operation, see if the 2nd operand is a
2594/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2595/// the opcode and bypass the mask operation.
2596static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2597 SelectionDAG &DAG, const SDLoc &DL) {
2598 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2599 return SDValue();
2600
2601 EVT VT = N0.getValueType();
2602 if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2603 return SDValue();
2604
2605 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2606 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2607 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2608}
2609
2610/// Helper for doing combines based on N0 and N1 being added to each other.
2611SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2612 SDNode *LocReference) {
2613 EVT VT = N0.getValueType();
2614 SDLoc DL(LocReference);
2615
2616 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2617 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2618 isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2619 return DAG.getNode(ISD::SUB, DL, VT, N0,
2620 DAG.getNode(ISD::SHL, DL, VT,
2621 N1.getOperand(0).getOperand(1),
2622 N1.getOperand(1)));
2623
2624 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2625 return V;
2626
2627 // Look for:
2628 // add (add x, 1), y
2629 // And if the target does not like this form then turn into:
2630 // sub y, (xor x, -1)
2631 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2632 N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2633 SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2634 DAG.getAllOnesConstant(DL, VT));
2635 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2636 }
2637
2638 // Hoist one-use subtraction by non-opaque constant:
2639 // (x - C) + y -> (x + y) - C
2640 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2641 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2642 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2643 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2644 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2645 }
2646 // Hoist one-use subtraction from non-opaque constant:
2647 // (C - x) + y -> (y - x) + C
2648 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2649 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2650 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2651 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2652 }
2653
2654 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2655 // rather than 'add 0/-1' (the zext should get folded).
2656 // add (sext i1 Y), X --> sub X, (zext i1 Y)
2657 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2658 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2659 TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2660 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2661 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2662 }
2663
2664 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2665 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2666 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2667 if (TN->getVT() == MVT::i1) {
2668 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2669 DAG.getConstant(1, DL, VT));
2670 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2671 }
2672 }
2673
2674 // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2675 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2676 N1.getResNo() == 0)
2677 return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2678 N0, N1.getOperand(0), N1.getOperand(2));
2679
2680 // (add X, Carry) -> (addcarry X, 0, Carry)
2681 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2682 if (SDValue Carry = getAsCarry(TLI, N1))
2683 return DAG.getNode(ISD::ADDCARRY, DL,
2684 DAG.getVTList(VT, Carry.getValueType()), N0,
2685 DAG.getConstant(0, DL, VT), Carry);
2686
2687 return SDValue();
2688}
2689
2690SDValue DAGCombiner::visitADDC(SDNode *N) {
2691 SDValue N0 = N->getOperand(0);
2692 SDValue N1 = N->getOperand(1);
2693 EVT VT = N0.getValueType();
2694 SDLoc DL(N);
2695
2696 // If the flag result is dead, turn this into an ADD.
2697 if (!N->hasAnyUseOfValue(1))
2698 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2699 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2700
2701 // canonicalize constant to RHS.
2702 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2703 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2704 if (N0C && !N1C)
2705 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2706
2707 // fold (addc x, 0) -> x + no carry out
2708 if (isNullConstant(N1))
2709 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2710 DL, MVT::Glue));
2711
2712 // If it cannot overflow, transform into an add.
2713 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2714 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2715 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2716
2717 return SDValue();
2718}
2719
2720/**
2721 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2722 * then the flip also occurs if computing the inverse is the same cost.
2723 * This function returns an empty SDValue in case it cannot flip the boolean
2724 * without increasing the cost of the computation. If you want to flip a boolean
2725 * no matter what, use DAG.getLogicalNOT.
2726 */
2727static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2728 const TargetLowering &TLI,
2729 bool Force) {
2730 if (Force && isa<ConstantSDNode>(V))
2731 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2732
2733 if (V.getOpcode() != ISD::XOR)
2734 return SDValue();
2735
2736 ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2737 if (!Const)
2738 return SDValue();
2739
2740 EVT VT = V.getValueType();
2741
2742 bool IsFlip = false;
2743 switch(TLI.getBooleanContents(VT)) {
2744 case TargetLowering::ZeroOrOneBooleanContent:
2745 IsFlip = Const->isOne();
2746 break;
2747 case TargetLowering::ZeroOrNegativeOneBooleanContent:
2748 IsFlip = Const->isAllOnesValue();
2749 break;
2750 case TargetLowering::UndefinedBooleanContent:
2751 IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2752 break;
2753 }
2754
2755 if (IsFlip)
2756 return V.getOperand(0);
2757 if (Force)
2758 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2759 return SDValue();
2760}
2761
2762SDValue DAGCombiner::visitADDO(SDNode *N) {
2763 SDValue N0 = N->getOperand(0);
2764 SDValue N1 = N->getOperand(1);
2765 EVT VT = N0.getValueType();
2766 bool IsSigned = (ISD::SADDO == N->getOpcode());
2767
2768 EVT CarryVT = N->getValueType(1);
2769 SDLoc DL(N);
2770
2771 // If the flag result is dead, turn this into an ADD.
2772 if (!N->hasAnyUseOfValue(1))
2773 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2774 DAG.getUNDEF(CarryVT));
2775
2776 // canonicalize constant to RHS.
2777 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2778 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2779 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2780
2781 // fold (addo x, 0) -> x + no carry out
2782 if (isNullOrNullSplat(N1))
2783 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2784
2785 if (!IsSigned) {
2786 // If it cannot overflow, transform into an add.
2787 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2788 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2789 DAG.getConstant(0, DL, CarryVT));
2790
2791 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2792 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2793 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2794 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2795 return CombineTo(
2796 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
2797 }
2798
2799 if (SDValue Combined = visitUADDOLike(N0, N1, N))
2800 return Combined;
2801
2802 if (SDValue Combined = visitUADDOLike(N1, N0, N))
2803 return Combined;
2804 }
2805
2806 return SDValue();
2807}
2808
2809SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2810 EVT VT = N0.getValueType();
2811 if (VT.isVector())
2812 return SDValue();
2813
2814 // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2815 // If Y + 1 cannot overflow.
2816 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2817 SDValue Y = N1.getOperand(0);
2818 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2819 if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2820 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2821 N1.getOperand(2));
2822 }
2823
2824 // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2825 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2826 if (SDValue Carry = getAsCarry(TLI, N1))
2827 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2828 DAG.getConstant(0, SDLoc(N), VT), Carry);
2829
2830 return SDValue();
2831}
2832
2833SDValue DAGCombiner::visitADDE(SDNode *N) {
2834 SDValue N0 = N->getOperand(0);
2835 SDValue N1 = N->getOperand(1);
2836 SDValue CarryIn = N->getOperand(2);
2837
2838 // canonicalize constant to RHS
2839 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2840 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2841 if (N0C && !N1C)
2842 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2843 N1, N0, CarryIn);
2844
2845 // fold (adde x, y, false) -> (addc x, y)
2846 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2847 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2848
2849 return SDValue();
2850}
2851
2852SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2853 SDValue N0 = N->getOperand(0);
2854 SDValue N1 = N->getOperand(1);
2855 SDValue CarryIn = N->getOperand(2);
2856 SDLoc DL(N);
2857
2858 // canonicalize constant to RHS
2859 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2860 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2861 if (N0C && !N1C)
2862 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2863
2864 // fold (addcarry x, y, false) -> (uaddo x, y)
2865 if (isNullConstant(CarryIn)) {
2866 if (!LegalOperations ||
2867 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2868 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2869 }
2870
2871 // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2872 if (isNullConstant(N0) && isNullConstant(N1)) {
2873 EVT VT = N0.getValueType();
2874 EVT CarryVT = CarryIn.getValueType();
2875 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2876 AddToWorklist(CarryExt.getNode());
2877 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2878 DAG.getConstant(1, DL, VT)),
2879 DAG.getConstant(0, DL, CarryVT));
2880 }
2881
2882 if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2883 return Combined;
2884
2885 if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2886 return Combined;
2887
2888 return SDValue();
2889}
2890
2891SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
2892 SDValue N0 = N->getOperand(0);
2893 SDValue N1 = N->getOperand(1);
2894 SDValue CarryIn = N->getOperand(2);
2895 SDLoc DL(N);
2896
2897 // canonicalize constant to RHS
2898 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2899 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2900 if (N0C && !N1C)
2901 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
2902
2903 // fold (saddo_carry x, y, false) -> (saddo x, y)
2904 if (isNullConstant(CarryIn)) {
2905 if (!LegalOperations ||
2906 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
2907 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
2908 }
2909
2910 return SDValue();
2911}
2912
2913/**
2914 * If we are facing some sort of diamond carry propapagtion pattern try to
2915 * break it up to generate something like:
2916 * (addcarry X, 0, (addcarry A, B, Z):Carry)
2917 *
2918 * The end result is usually an increase in operation required, but because the
2919 * carry is now linearized, other tranforms can kick in and optimize the DAG.
2920 *
2921 * Patterns typically look something like
2922 * (uaddo A, B)
2923 * / \
2924 * Carry Sum
2925 * | \
2926 * | (addcarry *, 0, Z)
2927 * | /
2928 * \ Carry
2929 * | /
2930 * (addcarry X, *, *)
2931 *
2932 * But numerous variation exist. Our goal is to identify A, B, X and Z and
2933 * produce a combine with a single path for carry propagation.
2934 */
2935static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2936 SDValue X, SDValue Carry0, SDValue Carry1,
2937 SDNode *N) {
2938 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2939 return SDValue();
2940 if (Carry1.getOpcode() != ISD::UADDO)
2941 return SDValue();
2942
2943 SDValue Z;
2944
2945 /**
2946 * First look for a suitable Z. It will present itself in the form of
2947 * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2948 */
2949 if (Carry0.getOpcode() == ISD::ADDCARRY &&
2950 isNullConstant(Carry0.getOperand(1))) {
2951 Z = Carry0.getOperand(2);
2952 } else if (Carry0.getOpcode() == ISD::UADDO &&
2953 isOneConstant(Carry0.getOperand(1))) {
2954 EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2955 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2956 } else {
2957 // We couldn't find a suitable Z.
2958 return SDValue();
2959 }
2960
2961
2962 auto cancelDiamond = [&](SDValue A,SDValue B) {
2963 SDLoc DL(N);
2964 SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
2965 Combiner.AddToWorklist(NewY.getNode());
2966 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
2967 DAG.getConstant(0, DL, X.getValueType()),
2968 NewY.getValue(1));
2969 };
2970
2971 /**
2972 * (uaddo A, B)
2973 * |
2974 * Sum
2975 * |
2976 * (addcarry *, 0, Z)
2977 */
2978 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
2979 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
2980 }
2981
2982 /**
2983 * (addcarry A, 0, Z)
2984 * |
2985 * Sum
2986 * |
2987 * (uaddo *, B)
2988 */
2989 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
2990 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
2991 }
2992
2993 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
2994 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
2995 }
2996
2997 return SDValue();
2998}
2999
3000// If we are facing some sort of diamond carry/borrow in/out pattern try to
3001// match patterns like:
3002//
3003// (uaddo A, B) CarryIn
3004// | \ |
3005// | \ |
3006// PartialSum PartialCarryOutX /
3007// | | /
3008// | ____|____________/
3009// | / |
3010// (uaddo *, *) \________
3011// | \ \
3012// | \ |
3013// | PartialCarryOutY |
3014// | \ |
3015// | \ /
3016// AddCarrySum | ______/
3017// | /
3018// CarryOut = (or *, *)
3019//
3020// And generate ADDCARRY (or SUBCARRY) with two result values:
3021//
3022// {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
3023//
3024// Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
3025// a single path for carry/borrow out propagation:
3026static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
3027 const TargetLowering &TLI, SDValue Carry0,
3028 SDValue Carry1, SDNode *N) {
3029 if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
3030 return SDValue();
3031 unsigned Opcode = Carry0.getOpcode();
3032 if (Opcode != Carry1.getOpcode())
3033 return SDValue();
3034 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3035 return SDValue();
3036
3037 // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
3038 // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
3039 // the above ASCII art.)
3040 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3041 Carry1.getOperand(1) != Carry0.getValue(0))
3042 std::swap(Carry0, Carry1);
3043 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3044 Carry1.getOperand(1) != Carry0.getValue(0))
3045 return SDValue();
3046
3047 // The carry in value must be on the righthand side for subtraction.
3048 unsigned CarryInOperandNum =
3049 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3050 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3051 return SDValue();
3052 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3053
3054 unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
3055 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3056 return SDValue();
3057
3058 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3059 // TODO: make getAsCarry() aware of how partial carries are merged.
3060 if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
3061 return SDValue();
3062 CarryIn = CarryIn.getOperand(0);
3063 if (CarryIn.getValueType() != MVT::i1)
3064 return SDValue();
3065
3066 SDLoc DL(N);
3067 SDValue Merged =
3068 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3069 Carry0.getOperand(1), CarryIn);
3070
3071 // Please note that because we have proven that the result of the UADDO/USUBO
3072 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3073 // therefore prove that if the first UADDO/USUBO overflows, the second
3074 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3075 // maximum value.
3076 //
3077 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3078 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3079 //
3080 // This is important because it means that OR and XOR can be used to merge
3081 // carry flags; and that AND can return a constant zero.
3082 //
3083 // TODO: match other operations that can merge flags (ADD, etc)
3084 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3085 if (N->getOpcode() == ISD::AND)
3086 return DAG.getConstant(0, DL, MVT::i1);
3087 return Merged.getValue(1);
3088}
3089
3090SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3091 SDNode *N) {
3092 // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3093 if (isBitwiseNot(N0))
3094 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3095 SDLoc DL(N);
3096 SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3097 N0.getOperand(0), NotC);
3098 return CombineTo(
3099 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3100 }
3101
3102 // Iff the flag result is dead:
3103 // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3104 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3105 // or the dependency between the instructions.
3106 if ((N0.getOpcode() == ISD::ADD ||
3107 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3108 N0.getValue(1) != CarryIn)) &&
3109 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3110 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3111 N0.getOperand(0), N0.getOperand(1), CarryIn);
3112
3113 /**
3114 * When one of the addcarry argument is itself a carry, we may be facing
3115 * a diamond carry propagation. In which case we try to transform the DAG
3116 * to ensure linear carry propagation if that is possible.
3117 */
3118 if (auto Y = getAsCarry(TLI, N1)) {
3119 // Because both are carries, Y and Z can be swapped.
3120 if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3121 return R;
3122 if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3123 return R;
3124 }
3125
3126 return SDValue();
3127}
3128
3129// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3130// clamp/truncation if necessary.
3131static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3132 SDValue RHS, SelectionDAG &DAG,
3133 const SDLoc &DL) {
3134 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&((DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits
() && "Illegal truncation") ? static_cast<void>
(0) : __assert_fail ("DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() && \"Illegal truncation\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3135, __PRETTY_FUNCTION__))
3135 "Illegal truncation")((DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits
() && "Illegal truncation") ? static_cast<void>
(0) : __assert_fail ("DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() && \"Illegal truncation\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3135, __PRETTY_FUNCTION__))
;
3136
3137 if (DstVT == SrcVT)
3138 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3139
3140 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3141 // clamping RHS.
3142 APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
3143 DstVT.getScalarSizeInBits());
3144 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3145 return SDValue();
3146
3147 SDValue SatLimit =
3148 DAG.getConstant(APInt::getLowBitsSet(SrcVT.getScalarSizeInBits(),
3149 DstVT.getScalarSizeInBits()),
3150 DL, SrcVT);
3151 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3152 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3153 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3154 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3155}
3156
3157// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3158// usubsat(a,b), optionally as a truncated type.
3159SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
3160 if (N->getOpcode() != ISD::SUB ||
3161 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3162 return SDValue();
3163
3164 EVT SubVT = N->getValueType(0);
3165 SDValue Op0 = N->getOperand(0);
3166 SDValue Op1 = N->getOperand(1);
3167
3168 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3169 // they may be converted to usubsat(a,b).
3170 if (Op0.getOpcode() == ISD::UMAX) {
3171 SDValue MaxLHS = Op0.getOperand(0);
3172 SDValue MaxRHS = Op0.getOperand(1);
3173 if (MaxLHS == Op1)
3174 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
3175 if (MaxRHS == Op1)
3176 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
3177 }
3178
3179 if (Op1.getOpcode() == ISD::UMIN) {
3180 SDValue MinLHS = Op1.getOperand(0);
3181 SDValue MinRHS = Op1.getOperand(1);
3182 if (MinLHS == Op0)
3183 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
3184 if (MinRHS == Op0)
3185 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
3186 }
3187
3188 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3189 if (Op1.getOpcode() == ISD::TRUNCATE &&
3190 Op1.getOperand(0).getOpcode() == ISD::UMIN) {
3191 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3192 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3193 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3194 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3195 DAG, SDLoc(N));
3196 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3197 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3198 DAG, SDLoc(N));
3199 }
3200
3201 return SDValue();
3202}
3203
3204// Since it may not be valid to emit a fold to zero for vector initializers
3205// check if we can before folding.
3206static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3207 SelectionDAG &DAG, bool LegalOperations) {
3208 if (!VT.isVector())
3209 return DAG.getConstant(0, DL, VT);
3210 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3211 return DAG.getConstant(0, DL, VT);
3212 return SDValue();
3213}
3214
3215SDValue DAGCombiner::visitSUB(SDNode *N) {
3216 SDValue N0 = N->getOperand(0);
3217 SDValue N1 = N->getOperand(1);
3218 EVT VT = N0.getValueType();
3219 SDLoc DL(N);
3220
3221 // fold vector ops
3222 if (VT.isVector()) {
3223 if (SDValue FoldedVOp = SimplifyVBinOp(N))
3224 return FoldedVOp;
3225
3226 // fold (sub x, 0) -> x, vector edition
3227 if (ISD::isBuildVectorAllZeros(N1.getNode()))
3228 return N0;
3229 }
3230
3231 // fold (sub x, x) -> 0
3232 // FIXME: Refactor this and xor and other similar operations together.
3233 if (N0 == N1)
3234 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3235
3236 // fold (sub c1, c2) -> c3
3237 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3238 return C;
3239
3240 if (SDValue NewSel = foldBinOpIntoSelect(N))
3241 return NewSel;
3242
3243 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3244
3245 // fold (sub x, c) -> (add x, -c)
3246 if (N1C) {
3247 return DAG.getNode(ISD::ADD, DL, VT, N0,
3248 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3249 }
3250
3251 if (isNullOrNullSplat(N0)) {
3252 unsigned BitWidth = VT.getScalarSizeInBits();
3253 // Right-shifting everything out but the sign bit followed by negation is
3254 // the same as flipping arithmetic/logical shift type without the negation:
3255 // -(X >>u 31) -> (X >>s 31)
3256 // -(X >>s 31) -> (X >>u 31)
3257 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3258 ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3259 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3260 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3261 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3262 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3263 }
3264 }
3265
3266 // 0 - X --> 0 if the sub is NUW.
3267 if (N->getFlags().hasNoUnsignedWrap())
3268 return N0;
3269
3270 if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3271 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3272 // N1 must be 0 because negating the minimum signed value is undefined.
3273 if (N->getFlags().hasNoSignedWrap())
3274 return N0;
3275
3276 // 0 - X --> X if X is 0 or the minimum signed value.
3277 return N1;
3278 }
3279
3280 // Convert 0 - abs(x).
3281 SDValue Result;
3282 if (N1->getOpcode() == ISD::ABS &&
3283 !TLI.isOperationLegalOrCustom(ISD::ABS, VT) &&
3284 TLI.expandABS(N1.getNode(), Result, DAG, true))
3285 return Result;
3286 }
3287
3288 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3289 if (isAllOnesOrAllOnesSplat(N0))
3290 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3291
3292 // fold (A - (0-B)) -> A+B
3293 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3294 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3295
3296 // fold A-(A-B) -> B
3297 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3298 return N1.getOperand(1);
3299
3300 // fold (A+B)-A -> B
3301 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3302 return N0.getOperand(1);
3303
3304 // fold (A+B)-B -> A
3305 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3306 return N0.getOperand(0);
3307
3308 // fold (A+C1)-C2 -> A+(C1-C2)
3309 if (N0.getOpcode() == ISD::ADD &&
3310 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3311 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3312 SDValue NewC =
3313 DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
3314 assert(NewC && "Constant folding failed")((NewC && "Constant folding failed") ? static_cast<
void> (0) : __assert_fail ("NewC && \"Constant folding failed\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3314, __PRETTY_FUNCTION__))
;
3315 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3316 }
3317
3318 // fold C2-(A+C1) -> (C2-C1)-A
3319 if (N1.getOpcode() == ISD::ADD) {
3320 SDValue N11 = N1.getOperand(1);
3321 if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3322 isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3323 SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
3324 assert(NewC && "Constant folding failed")((NewC && "Constant folding failed") ? static_cast<
void> (0) : __assert_fail ("NewC && \"Constant folding failed\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3324, __PRETTY_FUNCTION__))
;
3325 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3326 }
3327 }
3328
3329 // fold (A-C1)-C2 -> A-(C1+C2)
3330 if (N0.getOpcode() == ISD::SUB &&
3331 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3332 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3333 SDValue NewC =
3334 DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
3335 assert(NewC && "Constant folding failed")((NewC && "Constant folding failed") ? static_cast<
void> (0) : __assert_fail ("NewC && \"Constant folding failed\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3335, __PRETTY_FUNCTION__))
;
3336 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3337 }
3338
3339 // fold (c1-A)-c2 -> (c1-c2)-A
3340 if (N0.getOpcode() == ISD::SUB &&
3341 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3342 isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3343 SDValue NewC =
3344 DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
3345 assert(NewC && "Constant folding failed")((NewC && "Constant folding failed") ? static_cast<
void> (0) : __assert_fail ("NewC && \"Constant folding failed\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3345, __PRETTY_FUNCTION__))
;
3346 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3347 }
3348
3349 // fold ((A+(B+or-C))-B) -> A+or-C
3350 if (N0.getOpcode() == ISD::ADD &&
3351 (N0.getOperand(1).getOpcode() == ISD::SUB ||
3352 N0.getOperand(1).getOpcode() == ISD::ADD) &&
3353 N0.getOperand(1).getOperand(0) == N1)
3354 return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3355 N0.getOperand(1).getOperand(1));
3356
3357 // fold ((A+(C+B))-B) -> A+C
3358 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3359 N0.getOperand(1).getOperand(1) == N1)
3360 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3361 N0.getOperand(1).getOperand(0));
3362
3363 // fold ((A-(B-C))-C) -> A-B
3364 if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3365 N0.getOperand(1).getOperand(1) == N1)
3366 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3367 N0.getOperand(1).getOperand(0));
3368
3369 // fold (A-(B-C)) -> A+(C-B)
3370 if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3371 return DAG.getNode(ISD::ADD, DL, VT, N0,
3372 DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3373 N1.getOperand(0)));
3374
3375 // A - (A & B) -> A & (~B)
3376 if (N1.getOpcode() == ISD::AND) {
3377 SDValue A = N1.getOperand(0);
3378 SDValue B = N1.getOperand(1);
3379 if (A != N0)
3380 std::swap(A, B);
3381 if (A == N0 &&
3382 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3383 SDValue InvB =
3384 DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3385 return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3386 }
3387 }
3388
3389 // fold (X - (-Y * Z)) -> (X + (Y * Z))
3390 if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3391 if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3392 isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3393 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3394 N1.getOperand(0).getOperand(1),
3395 N1.getOperand(1));
3396 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3397 }
3398 if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3399 isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3400 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3401 N1.getOperand(0),
3402 N1.getOperand(1).getOperand(1));
3403 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3404 }
3405 }
3406
3407 // If either operand of a sub is undef, the result is undef
3408 if (N0.isUndef())
3409 return N0;
3410 if (N1.isUndef())
3411 return N1;
3412
3413 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3414 return V;
3415
3416 if (SDValue V = foldAddSubOfSignBit(N, DAG))
3417 return V;
3418
3419 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3420 return V;
3421
3422 if (SDValue V = foldSubToUSubSat(VT, N))
3423 return V;
3424
3425 // (x - y) - 1 -> add (xor y, -1), x
3426 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3427 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3428 DAG.getAllOnesConstant(DL, VT));
3429 return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3430 }
3431
3432 // Look for:
3433 // sub y, (xor x, -1)
3434 // And if the target does not like this form then turn into:
3435 // add (add x, y), 1
3436 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3437 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3438 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3439 }
3440
3441 // Hoist one-use addition by non-opaque constant:
3442 // (x + C) - y -> (x - y) + C
3443 if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3444 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3445 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3446 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3447 }
3448 // y - (x + C) -> (y - x) - C
3449 if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3450 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3451 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3452 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3453 }
3454 // (x - C) - y -> (x - y) - C
3455 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3456 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3457 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3458 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3459 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3460 }
3461 // (C - x) - y -> C - (x + y)
3462 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3463 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3464 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3465 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3466 }
3467
3468 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3469 // rather than 'sub 0/1' (the sext should get folded).
3470 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3471 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3472 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3473 TLI.getBooleanContents(VT) ==
3474 TargetLowering::ZeroOrNegativeOneBooleanContent) {
3475 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3476 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3477 }
3478
3479 // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3480 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3481 if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3482 SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3483 SDValue S0 = N1.getOperand(0);
3484 if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3485 if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3486 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
3487 return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3488 }
3489 }
3490
3491 // If the relocation model supports it, consider symbol offsets.
3492 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3493 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3494 // fold (sub Sym, c) -> Sym-c
3495 if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3496 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3497 GA->getOffset() -
3498 (uint64_t)N1C->getSExtValue());
3499 // fold (sub Sym+c1, Sym+c2) -> c1-c2
3500 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3501 if (GA->getGlobal() == GB->getGlobal())
3502 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3503 DL, VT);
3504 }
3505
3506 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3507 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3508 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3509 if (TN->getVT() == MVT::i1) {
3510 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3511 DAG.getConstant(1, DL, VT));
3512 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3513 }
3514 }
3515
3516 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3517 if (N1.getOpcode() == ISD::VSCALE) {
3518 const APInt &IntVal = N1.getConstantOperandAPInt(0);
3519 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3520 }
3521
3522 // Prefer an add for more folding potential and possibly better codegen:
3523 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3524 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3525 SDValue ShAmt = N1.getOperand(1);
3526 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3527 if (ShAmtC &&
3528 ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3529 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3530 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3531 }
3532 }
3533
3534 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3535 // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
3536 if (SDValue Carry = getAsCarry(TLI, N0)) {
3537 SDValue X = N1;
3538 SDValue Zero = DAG.getConstant(0, DL, VT);
3539 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3540 return DAG.getNode(ISD::ADDCARRY, DL,
3541 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3542 Carry);
3543 }
3544 }
3545
3546 return SDValue();
3547}
3548
3549SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3550 SDValue N0 = N->getOperand(0);
3551 SDValue N1 = N->getOperand(1);
3552 EVT VT = N0.getValueType();
3553 SDLoc DL(N);
3554
3555 // fold vector ops
3556 if (VT.isVector()) {
3557 // TODO SimplifyVBinOp
3558
3559 // fold (sub_sat x, 0) -> x, vector edition
3560 if (ISD::isBuildVectorAllZeros(N1.getNode()))
3561 return N0;
3562 }
3563
3564 // fold (sub_sat x, undef) -> 0
3565 if (N0.isUndef() || N1.isUndef())
3566 return DAG.getConstant(0, DL, VT);
3567
3568 // fold (sub_sat x, x) -> 0
3569 if (N0 == N1)
3570 return DAG.getConstant(0, DL, VT);
3571
3572 // fold (sub_sat c1, c2) -> c3
3573 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3574 return C;
3575
3576 // fold (sub_sat x, 0) -> x
3577 if (isNullConstant(N1))
3578 return N0;
3579
3580 return SDValue();
3581}
3582
3583SDValue DAGCombiner::visitSUBC(SDNode *N) {
3584 SDValue N0 = N->getOperand(0);
3585 SDValue N1 = N->getOperand(1);
3586 EVT VT = N0.getValueType();
3587 SDLoc DL(N);
3588
3589 // If the flag result is dead, turn this into an SUB.
3590 if (!N->hasAnyUseOfValue(1))
3591 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3592 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3593
3594 // fold (subc x, x) -> 0 + no borrow
3595 if (N0 == N1)
3596 return CombineTo(N, DAG.getConstant(0, DL, VT),
3597 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3598
3599 // fold (subc x, 0) -> x + no borrow
3600 if (isNullConstant(N1))
3601 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3602
3603 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3604 if (isAllOnesConstant(N0))
3605 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3606 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3607
3608 return SDValue();
3609}
3610
3611SDValue DAGCombiner::visitSUBO(SDNode *N) {
3612 SDValue N0 = N->getOperand(0);
3613 SDValue N1 = N->getOperand(1);
3614 EVT VT = N0.getValueType();
3615 bool IsSigned = (ISD::SSUBO == N->getOpcode());
3616
3617 EVT CarryVT = N->getValueType(1);
3618 SDLoc DL(N);
3619
3620 // If the flag result is dead, turn this into an SUB.
3621 if (!N->hasAnyUseOfValue(1))
3622 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3623 DAG.getUNDEF(CarryVT));
3624
3625 // fold (subo x, x) -> 0 + no borrow
3626 if (N0 == N1)
3627 return CombineTo(N, DAG.getConstant(0, DL, VT),
3628 DAG.getConstant(0, DL, CarryVT));
3629
3630 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3631
3632 // fold (subox, c) -> (addo x, -c)
3633 if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3634 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3635 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3636 }
3637
3638 // fold (subo x, 0) -> x + no borrow
3639 if (isNullOrNullSplat(N1))
3640 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3641
3642 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3643 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3644 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3645 DAG.getConstant(0, DL, CarryVT));
3646
3647 return SDValue();
3648}
3649
3650SDValue DAGCombiner::visitSUBE(SDNode *N) {
3651 SDValue N0 = N->getOperand(0);
3652 SDValue N1 = N->getOperand(1);
3653 SDValue CarryIn = N->getOperand(2);
3654
3655 // fold (sube x, y, false) -> (subc x, y)
3656 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3657 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3658
3659 return SDValue();
3660}
3661
3662SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3663 SDValue N0 = N->getOperand(0);
3664 SDValue N1 = N->getOperand(1);
3665 SDValue CarryIn = N->getOperand(2);
3666
3667 // fold (subcarry x, y, false) -> (usubo x, y)
3668 if (isNullConstant(CarryIn)) {
3669 if (!LegalOperations ||
3670 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3671 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3672 }
3673
3674 return SDValue();
3675}
3676
3677SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
3678 SDValue N0 = N->getOperand(0);
3679 SDValue N1 = N->getOperand(1);
3680 SDValue CarryIn = N->getOperand(2);
3681
3682 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
3683 if (isNullConstant(CarryIn)) {
3684 if (!LegalOperations ||
3685 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
3686 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
3687 }
3688
3689 return SDValue();
3690}
3691
3692// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3693// UMULFIXSAT here.
3694SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3695 SDValue N0 = N->getOperand(0);
3696 SDValue N1 = N->getOperand(1);
3697 SDValue Scale = N->getOperand(2);
3698 EVT VT = N0.getValueType();
3699
3700 // fold (mulfix x, undef, scale) -> 0
3701 if (N0.isUndef() || N1.isUndef())
3702 return DAG.getConstant(0, SDLoc(N), VT);
3703
3704 // Canonicalize constant to RHS (vector doesn't have to splat)
3705 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3706 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3707 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3708
3709 // fold (mulfix x, 0, scale) -> 0
3710 if (isNullConstant(N1))
3711 return DAG.getConstant(0, SDLoc(N), VT);
3712
3713 return SDValue();
3714}
3715
3716SDValue DAGCombiner::visitMUL(SDNode *N) {
3717 SDValue N0 = N->getOperand(0);
3718 SDValue N1 = N->getOperand(1);
3719 EVT VT = N0.getValueType();
3720
3721 // fold (mul x, undef) -> 0
3722 if (N0.isUndef() || N1.isUndef())
3723 return DAG.getConstant(0, SDLoc(N), VT);
3724
3725 bool N1IsConst = false;
3726 bool N1IsOpaqueConst = false;
3727 APInt ConstValue1;
3728
3729 // fold vector ops
3730 if (VT.isVector()) {
3731 if (SDValue FoldedVOp = SimplifyVBinOp(N))
3732 return FoldedVOp;
3733
3734 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3735 assert((!N1IsConst ||(((!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits
()) && "Splat APInt should be element width") ? static_cast
<void> (0) : __assert_fail ("(!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && \"Splat APInt should be element width\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3737, __PRETTY_FUNCTION__))
3736 ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&(((!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits
()) && "Splat APInt should be element width") ? static_cast
<void> (0) : __assert_fail ("(!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && \"Splat APInt should be element width\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3737, __PRETTY_FUNCTION__))
3737 "Splat APInt should be element width")(((!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits
()) && "Splat APInt should be element width") ? static_cast
<void> (0) : __assert_fail ("(!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && \"Splat APInt should be element width\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3737, __PRETTY_FUNCTION__))
;
3738 } else {
3739 N1IsConst = isa<ConstantSDNode>(N1);
3740 if (N1IsConst) {
3741 ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3742 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3743 }
3744 }
3745
3746 // fold (mul c1, c2) -> c1*c2
3747 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
3748 return C;
3749
3750 // canonicalize constant to RHS (vector doesn't have to splat)
3751 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3752 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3753 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3754
3755 // fold (mul x, 0) -> 0
3756 if (N1IsConst && ConstValue1.isNullValue())
3757 return N1;
3758
3759 // fold (mul x, 1) -> x
3760 if (N1IsConst && ConstValue1.isOneValue())
3761 return N0;
3762
3763 if (SDValue NewSel = foldBinOpIntoSelect(N))
3764 return NewSel;
3765
3766 // fold (mul x, -1) -> 0-x
3767 if (N1IsConst && ConstValue1.isAllOnesValue()) {
3768 SDLoc DL(N);
3769 return DAG.getNode(ISD::SUB, DL, VT,
3770 DAG.getConstant(0, DL, VT), N0);
3771 }
3772
3773 // fold (mul x, (1 << c)) -> x << c
3774 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3775 DAG.isKnownToBeAPowerOfTwo(N1) &&
3776 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3777 SDLoc DL(N);
3778 SDValue LogBase2 = BuildLogBase2(N1, DL);
3779 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3780 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3781 return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3782 }
3783
3784 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3785 if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3786 unsigned Log2Val = (-ConstValue1).logBase2();
3787 SDLoc DL(N);
3788 // FIXME: If the input is something that is easily negated (e.g. a
3789 // single-use add), we should put the negate there.
3790 return DAG.getNode(ISD::SUB, DL, VT,
3791 DAG.getConstant(0, DL, VT),
3792 DAG.getNode(ISD::SHL, DL, VT, N0,
3793 DAG.getConstant(Log2Val, DL,
3794 getShiftAmountTy(N0.getValueType()))));
3795 }
3796
3797 // Try to transform:
3798 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3799 // mul x, (2^N + 1) --> add (shl x, N), x
3800 // mul x, (2^N - 1) --> sub (shl x, N), x
3801 // Examples: x * 33 --> (x << 5) + x
3802 // x * 15 --> (x << 4) - x
3803 // x * -33 --> -((x << 5) + x)
3804 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3805 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
3806 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
3807 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
3808 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
3809 // x * 0xf800 --> (x << 16) - (x << 11)
3810 // x * -0x8800 --> -((x << 15) + (x << 11))
3811 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
3812 if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3813 // TODO: We could handle more general decomposition of any constant by
3814 // having the target set a limit on number of ops and making a
3815 // callback to determine that sequence (similar to sqrt expansion).
3816 unsigned MathOp = ISD::DELETED_NODE;
3817 APInt MulC = ConstValue1.abs();
3818 // The constant `2` should be treated as (2^0 + 1).
3819 unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
3820 MulC.lshrInPlace(TZeros);
3821 if ((MulC - 1).isPowerOf2())
3822 MathOp = ISD::ADD;
3823 else if ((MulC + 1).isPowerOf2())
3824 MathOp = ISD::SUB;
3825
3826 if (MathOp != ISD::DELETED_NODE) {
3827 unsigned ShAmt =
3828 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3829 ShAmt += TZeros;
3830 assert(ShAmt < VT.getScalarSizeInBits() &&((ShAmt < VT.getScalarSizeInBits() && "multiply-by-constant generated out of bounds shift"
) ? static_cast<void> (0) : __assert_fail ("ShAmt < VT.getScalarSizeInBits() && \"multiply-by-constant generated out of bounds shift\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3831, __PRETTY_FUNCTION__))
3831 "multiply-by-constant generated out of bounds shift")((ShAmt < VT.getScalarSizeInBits() && "multiply-by-constant generated out of bounds shift"
) ? static_cast<void> (0) : __assert_fail ("ShAmt < VT.getScalarSizeInBits() && \"multiply-by-constant generated out of bounds shift\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3831, __PRETTY_FUNCTION__))
;
3832 SDLoc DL(N);
3833 SDValue Shl =
3834 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3835 SDValue R =
3836 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
3837 DAG.getNode(ISD::SHL, DL, VT, N0,
3838 DAG.getConstant(TZeros, DL, VT)))
3839 : DAG.getNode(MathOp, DL, VT, Shl, N0);
3840 if (ConstValue1.isNegative())
3841 R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3842 return R;
3843 }
3844 }
3845
3846 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3847 if (N0.getOpcode() == ISD::SHL &&
3848 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3849 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3850 SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3851 if (isConstantOrConstantVector(C3))
3852 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3853 }
3854
3855 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3856 // use.
3857 {
3858 SDValue Sh(nullptr, 0), Y(nullptr, 0);
3859
3860 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
3861 if (N0.getOpcode() == ISD::SHL &&
3862 isConstantOrConstantVector(N0.getOperand(1)) &&
3863 N0.getNode()->hasOneUse()) {
3864 Sh = N0; Y = N1;
3865 } else if (N1.getOpcode() == ISD::SHL &&
3866 isConstantOrConstantVector(N1.getOperand(1)) &&
3867 N1.getNode()->hasOneUse()) {
3868 Sh = N1; Y = N0;
3869 }
3870
3871 if (Sh.getNode()) {
3872 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3873 return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3874 }
3875 }
3876
3877 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3878 if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3879 N0.getOpcode() == ISD::ADD &&
3880 DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
3881 isMulAddWithConstProfitable(N, N0, N1))
3882 return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3883 DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3884 N0.getOperand(0), N1),
3885 DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3886 N0.getOperand(1), N1));
3887
3888 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
3889 if (N0.getOpcode() == ISD::VSCALE)
3890 if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
3891 const APInt &C0 = N0.getConstantOperandAPInt(0);
3892 const APInt &C1 = NC1->getAPIntValue();
3893 return DAG.getVScale(SDLoc(N), VT, C0 * C1);
3894 }
3895
3896 // Fold ((mul x, 0/undef) -> 0,
3897 // (mul x, 1) -> x) -> x)
3898 // -> and(x, mask)
3899 // We can replace vectors with '0' and '1' factors with a clearing mask.
3900 if (VT.isFixedLengthVector()) {
3901 unsigned NumElts = VT.getVectorNumElements();
3902 SmallBitVector ClearMask;
3903 ClearMask.reserve(NumElts);
3904 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
3905 if (!V || V->isNullValue()) {
3906 ClearMask.push_back(true);
3907 return true;
3908 }
3909 ClearMask.push_back(false);
3910 return V->isOne();
3911 };
3912 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
3913 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
3914 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector")((N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector"
) ? static_cast<void> (0) : __assert_fail ("N1.getOpcode() == ISD::BUILD_VECTOR && \"Unknown constant vector\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3914, __PRETTY_FUNCTION__))
;
3915 SDLoc DL(N);
3916 EVT LegalSVT = N1.getOperand(0).getValueType();
3917 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
3918 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
3919 SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
3920 for (unsigned I = 0; I != NumElts; ++I)
3921 if (ClearMask[I])
3922 Mask[I] = Zero;
3923 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
3924 }
3925 }
3926
3927 // reassociate mul
3928 if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3929 return RMUL;
3930
3931 return SDValue();
3932}
3933
3934/// Return true if divmod libcall is available.
3935static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3936 const TargetLowering &TLI) {
3937 RTLIB::Libcall LC;
3938 EVT NodeType = Node->getValueType(0);
3939 if (!NodeType.isSimple())
3940 return false;
3941 switch (NodeType.getSimpleVT().SimpleTy) {
3942 default: return false; // No libcall for vector types.
3943 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
3944 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3945 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3946 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3947 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3948 }
3949
3950 return TLI.getLibcallName(LC) != nullptr;
3951}
3952
3953/// Issue divrem if both quotient and remainder are needed.
3954SDValue DAGCombiner::useDivRem(SDNode *Node) {
3955 if (Node->use_empty())
3956 return SDValue(); // This is a dead node, leave it alone.
3957
3958 unsigned Opcode = Node->getOpcode();
3959 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3960 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3961
3962 // DivMod lib calls can still work on non-legal types if using lib-calls.
3963 EVT VT = Node->getValueType(0);
3964 if (VT.isVector() || !VT.isInteger())
3965 return SDValue();
3966
3967 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3968 return SDValue();
3969
3970 // If DIVREM is going to get expanded into a libcall,
3971 // but there is no libcall available, then don't combine.
3972 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3973 !isDivRemLibcallAvailable(Node, isSigned, TLI))
3974 return SDValue();
3975
3976 // If div is legal, it's better to do the normal expansion
3977 unsigned OtherOpcode = 0;
3978 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3979 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3980 if (TLI.isOperationLegalOrCustom(Opcode, VT))
3981 return SDValue();
3982 } else {
3983 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3984 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3985 return SDValue();
3986 }
3987
3988 SDValue Op0 = Node->getOperand(0);
3989 SDValue Op1 = Node->getOperand(1);
3990 SDValue combined;
3991 for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3992 UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3993 SDNode *User = *UI;
3994 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3995 User->use_empty())
3996 continue;
3997 // Convert the other matching node(s), too;
3998 // otherwise, the DIVREM may get target-legalized into something
3999 // target-specific that we won't be able to recognize.
4000 unsigned UserOpc = User->getOpcode();
4001 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4002 User->getOperand(0) == Op0 &&
4003 User->getOperand(1) == Op1) {
4004 if (!combined) {
4005 if (UserOpc == OtherOpcode) {
4006 SDVTList VTs = DAG.getVTList(VT, VT);
4007 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4008 } else if (UserOpc == DivRemOpc) {
4009 combined = SDValue(User, 0);
4010 } else {
4011 assert(UserOpc == Opcode)((UserOpc == Opcode) ? static_cast<void> (0) : __assert_fail
("UserOpc == Opcode", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4011, __PRETTY_FUNCTION__))
;
4012 continue;
4013 }
4014 }
4015 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4016 CombineTo(User, combined);
4017 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4018 CombineTo(User, combined.getValue(1));
4019 }
4020 }
4021 return combined;
4022}
4023
4024static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
4025 SDValue N0 = N->getOperand(0);
4026 SDValue N1 = N->getOperand(1);
4027 EVT VT = N->getValueType(0);
4028 SDLoc DL(N);
4029
4030 unsigned Opc = N->getOpcode();
4031 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4032 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4033
4034 // X / undef -> undef
4035 // X % undef -> undef
4036 // X / 0 -> undef
4037 // X % 0 -> undef
4038 // NOTE: This includes vectors where any divisor element is zero/undef.
4039 if (DAG.isUndef(Opc, {N0, N1}))
4040 return DAG.getUNDEF(VT);
4041
4042 // undef / X -> 0
4043 // undef % X -> 0
4044 if (N0.isUndef())
4045 return DAG.getConstant(0, DL, VT);
4046
4047 // 0 / X -> 0
4048 // 0 % X -> 0
4049 ConstantSDNode *N0C = isConstOrConstSplat(N0);
4050 if (N0C && N0C->isNullValue())
4051 return N0;
4052
4053 // X / X -> 1
4054 // X % X -> 0
4055 if (N0 == N1)
4056 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4057
4058 // X / 1 -> X
4059 // X % 1 -> 0
4060 // If this is a boolean op (single-bit element type), we can't have
4061 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4062 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4063 // it's a 1.
4064 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4065 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4066
4067 return SDValue();
4068}
4069
4070SDValue DAGCombiner::visitSDIV(SDNode *N) {
4071 SDValue N0 = N->getOperand(0);
4072 SDValue N1 = N->getOperand(1);
4073 EVT VT = N->getValueType(0);
4074 EVT CCVT = getSetCCResultType(VT);
4075
4076 // fold vector ops
4077 if (VT.isVector())
4078 if (SDValue FoldedVOp = SimplifyVBinOp(N))
4079 return FoldedVOp;
4080
4081 SDLoc DL(N);
4082
4083 // fold (sdiv c1, c2) -> c1/c2
4084 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4085 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4086 return C;
4087
4088 // fold (sdiv X, -1) -> 0-X
4089 if (N1C && N1C->isAllOnesValue())
4090 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4091
4092 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4093 if (N1C && N1C->getAPIntValue().isMinSignedValue())
4094 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4095 DAG.getConstant(1, DL, VT),
4096 DAG.getConstant(0, DL, VT));
4097
4098 if (SDValue V = simplifyDivRem(N, DAG))
4099 return V;
4100
4101 if (SDValue NewSel = foldBinOpIntoSelect(N))
4102 return NewSel;
4103
4104 // If we know the sign bits of both operands are zero, strength reduce to a
4105 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
4106 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4107 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4108
4109 if (SDValue V = visitSDIVLike(N0, N1, N)) {
4110 // If the corresponding remainder node exists, update its users with
4111 // (Dividend - (Quotient * Divisor).
4112 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4113 { N0, N1 })) {
4114 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4115 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4116 AddToWorklist(Mul.getNode());
4117 AddToWorklist(Sub.getNode());
4118 CombineTo(RemNode, Sub);
4119 }
4120 return V;
4121 }
4122
4123 // sdiv, srem -> sdivrem
4124 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4125 // true. Otherwise, we break the simplification logic in visitREM().
4126 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4127 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4128 if (SDValue DivRem = useDivRem(N))
4129 return DivRem;
4130
4131 return SDValue();
4132}
4133
4134SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4135 SDLoc DL(N);
4136 EVT VT = N->getValueType(0);
4137 EVT CCVT = getSetCCResultType(VT);
4138 unsigned BitWidth = VT.getScalarSizeInBits();
4139
4140 // Helper for determining whether a value is a power-2 constant scalar or a
4141 // vector of such elements.
4142 auto IsPowerOfTwo = [](ConstantSDNode *C) {
4143 if (C->isNullValue() || C->isOpaque())
4144 return false;
4145 if (C->getAPIntValue().isPowerOf2())
4146 return true;
4147 if ((-C->getAPIntValue()).isPowerOf2())
4148 return true;
4149 return false;
4150 };
4151
4152 // fold (sdiv X, pow2) -> simple ops after legalize
4153 // FIXME: We check for the exact bit here because the generic lowering gives
4154 // better results in that case. The target-specific lowering should learn how
4155 // to handle exact sdivs efficiently.
4156 if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
4157 // Target-specific implementation of sdiv x, pow2.
4158 if (SDValue Res = BuildSDIVPow2(N))
4159 return Res;
4160
4161 // Create constants that are functions of the shift amount value.
4162 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4163 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4164 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4165 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4166 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4167 if (!isConstantOrConstantVector(Inexact))
4168 return SDValue();
4169
4170 // Splat the sign bit into the register
4171 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4172 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4173 AddToWorklist(Sign.getNode());
4174
4175 // Add (N0 < 0) ? abs2 - 1 : 0;
4176 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4177 AddToWorklist(Srl.getNode());
4178 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4179 AddToWorklist(Add.getNode());
4180 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4181 AddToWorklist(Sra.getNode());
4182
4183 // Special case: (sdiv X, 1) -> X
4184 // Special Case: (sdiv X, -1) -> 0-X
4185 SDValue One = DAG.getConstant(1, DL, VT);
4186 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4187 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4188 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4189 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4190 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4191
4192 // If dividing by a positive value, we're done. Otherwise, the result must
4193 // be negated.
4194 SDValue Zero = DAG.getConstant(0, DL, VT);
4195 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4196
4197 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4198 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4199 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4200 return Res;
4201 }
4202
4203 // If integer divide is expensive and we satisfy the requirements, emit an
4204 // alternate sequence. Targets may check function attributes for size/speed
4205 // trade-offs.
4206 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4207 if (isConstantOrConstantVector(N1) &&
4208 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4209 if (SDValue Op = BuildSDIV(N))
4210 return Op;
4211
4212 return SDValue();
4213}
4214
4215SDValue DAGCombiner::visitUDIV(SDNode *N) {
4216 SDValue N0 = N->getOperand(0);
4217 SDValue N1 = N->getOperand(1);
4218 EVT VT = N->getValueType(0);
4219 EVT CCVT = getSetCCResultType(VT);
4220
4221 // fold vector ops
4222 if (VT.isVector())
4223 if (SDValue FoldedVOp = SimplifyVBinOp(N))
4224 return FoldedVOp;
4225
4226 SDLoc DL(N);
4227
4228 // fold (udiv c1, c2) -> c1/c2
4229 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4230 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4231 return C;
4232
4233 // fold (udiv X, -1) -> select(X == -1, 1, 0)
4234 if (N1C && N1C->getAPIntValue().isAllOnesValue())
4235 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4236 DAG.getConstant(1, DL, VT),
4237 DAG.getConstant(0, DL, VT));
4238
4239 if (SDValue V = simplifyDivRem(N, DAG))
4240 return V;
4241
4242 if (SDValue NewSel = foldBinOpIntoSelect(N))
4243 return NewSel;
4244
4245 if (SDValue V = visitUDIVLike(N0, N1, N)) {
4246 // If the corresponding remainder node exists, update its users with
4247 // (Dividend - (Quotient * Divisor).
4248 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4249 { N0, N1 })) {
4250 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4251 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4252 AddToWorklist(Mul.getNode());
4253 AddToWorklist(Sub.getNode());
4254 CombineTo(RemNode, Sub);
4255 }
4256 return V;
4257 }
4258
4259 // sdiv, srem -> sdivrem
4260 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4261 // true. Otherwise, we break the simplification logic in visitREM().
4262 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4263 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4264 if (SDValue DivRem = useDivRem(N))
4265 return DivRem;
4266
4267 return SDValue();
4268}
4269
4270SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4271 SDLoc DL(N);
4272 EVT VT = N->getValueType(0);
4273
4274 // fold (udiv x, (1 << c)) -> x >>u c
4275 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4276 DAG.isKnownToBeAPowerOfTwo(N1)) {
4277 SDValue LogBase2 = BuildLogBase2(N1, DL);
4278 AddToWorklist(LogBase2.getNode());
4279
4280 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4281 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4282 AddToWorklist(Trunc.getNode());
4283 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4284 }
4285
4286 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4287 if (N1.getOpcode() == ISD::SHL) {
4288 SDValue N10 = N1.getOperand(0);
4289 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
4290 DAG.isKnownToBeAPowerOfTwo(N10)) {
4291 SDValue LogBase2 = BuildLogBase2(N10, DL);
4292 AddToWorklist(LogBase2.getNode());
4293
4294 EVT ADDVT = N1.getOperand(1).getValueType();
4295 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4296 AddToWorklist(Trunc.getNode());
4297 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4298 AddToWorklist(Add.getNode());
4299 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4300 }
4301 }
4302
4303 // fold (udiv x, c) -> alternate
4304 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4305 if (isConstantOrConstantVector(N1) &&
4306 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4307 if (SDValue Op = BuildUDIV(N))
4308 return Op;
4309
4310 return SDValue();
4311}
4312
4313// handles ISD::SREM and ISD::UREM
4314SDValue DAGCombiner::visitREM(SDNode *N) {
4315 unsigned Opcode = N->getOpcode();
4316 SDValue N0 = N->getOperand(0);
4317 SDValue N1 = N->getOperand(1);
4318 EVT VT = N->getValueType(0);
4319 EVT CCVT = getSetCCResultType(VT);
4320
4321 bool isSigned = (Opcode == ISD::SREM);
4322 SDLoc DL(N);
4323
4324 // fold (rem c1, c2) -> c1%c2
4325 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4326 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4327 return C;
4328
4329 // fold (urem X, -1) -> select(X == -1, 0, x)
4330 if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
4331 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4332 DAG.getConstant(0, DL, VT), N0);
4333
4334 if (SDValue V = simplifyDivRem(N, DAG))
4335 return V;
4336
4337 if (SDValue NewSel = foldBinOpIntoSelect(N))
4338 return NewSel;
4339
4340 if (isSigned) {
4341 // If we know the sign bits of both operands are zero, strength reduce to a
4342 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4343 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4344 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4345 } else {
4346 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4347 // fold (urem x, pow2) -> (and x, pow2-1)
4348 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4349 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4350 AddToWorklist(Add.getNode());
4351 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4352 }
4353 if (N1.getOpcode() == ISD::SHL &&
4354 DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
4355 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4356 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4357 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4358 AddToWorklist(Add.getNode());
4359 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4360 }
4361 }
4362
4363 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4364
4365 // If X/C can be simplified by the division-by-constant logic, lower
4366 // X%C to the equivalent of X-X/C*C.
4367 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4368 // speculative DIV must not cause a DIVREM conversion. We guard against this
4369 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
4370 // combine will not return a DIVREM. Regardless, checking cheapness here
4371 // makes sense since the simplification results in fatter code.
4372 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4373 SDValue OptimizedDiv =
4374 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4375 if (OptimizedDiv.getNode()) {
4376 // If the equivalent Div node also exists, update its users.
4377 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4378 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4379 { N0, N1 }))
4380 CombineTo(DivNode, OptimizedDiv);
4381 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4382 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4383 AddToWorklist(OptimizedDiv.getNode());
4384 AddToWorklist(Mul.getNode());
4385 return Sub;
4386 }
4387 }
4388
4389 // sdiv, srem -> sdivrem
4390 if (SDValue DivRem = useDivRem(N))
4391 return DivRem.getValue(1);
4392
4393 return SDValue();
4394}
4395
4396SDValue DAGCombiner::visitMULHS(SDNode *N) {
4397 SDValue N0 = N->getOperand(0);
4398 SDValue N1 = N->getOperand(1);
4399 EVT VT = N->getValueType(0);
4400 SDLoc DL(N);
4401
4402 if (VT.isVector()) {
4403 // fold (mulhs x, 0) -> 0
4404 // do not return N0/N1, because undef node may exist.
4405 if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
4406 ISD::isBuildVectorAllZeros(N1.getNode()))
4407 return DAG.getConstant(0, DL, VT);
4408 }
4409
4410 // fold (mulhs x, 0) -> 0
4411 if (isNullConstant(N1))
4412 return N1;
4413 // fold (mulhs x, 1) -> (sra x, size(x)-1)
4414 if (isOneConstant(N1))
4415 return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4416 DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
4417 getShiftAmountTy(N0.getValueType())));
4418
4419 // fold (mulhs x, undef) -> 0
4420 if (N0.isUndef() || N1.isUndef())
4421 return DAG.getConstant(0, DL, VT);
4422
4423 // If the type twice as wide is legal, transform the mulhs to a wider multiply
4424 // plus a shift.
4425 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
4426 !VT.isVector()) {
4427 MVT Simple = VT.getSimpleVT();
4428 unsigned SimpleSize = Simple.getSizeInBits();
4429 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4430 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4431 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4432 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4433 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4434 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4435 DAG.getConstant(SimpleSize, DL,
4436 getShiftAmountTy(N1.getValueType())));
4437 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4438 }
4439 }
4440
4441 return SDValue();
4442}
4443
4444SDValue DAGCombiner::visitMULHU(SDNode *N) {
4445 SDValue N0 = N->getOperand(0);
4446 SDValue N1 = N->getOperand(1);
4447 EVT VT = N->getValueType(0);
4448 SDLoc DL(N);
4449
4450 if (VT.isVector()) {
4451 // fold (mulhu x, 0) -> 0
4452 // do not return N0/N1, because undef node may exist.
4453 if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
4454 ISD::isBuildVectorAllZeros(N1.getNode()))
4455 return DAG.getConstant(0, DL, VT);
4456 }
4457
4458 // fold (mulhu x, 0) -> 0
4459 if (isNullConstant(N1))
4460 return N1;
4461 // fold (mulhu x, 1) -> 0
4462 if (isOneConstant(N1))
4463 return DAG.getConstant(0, DL, N0.getValueType());
4464 // fold (mulhu x, undef) -> 0
4465 if (N0.isUndef() || N1.isUndef())
4466 return DAG.getConstant(0, DL, VT);
4467
4468 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4469 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4470 DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4471 unsigned NumEltBits = VT.getScalarSizeInBits();
4472 SDValue LogBase2 = BuildLogBase2(N1, DL);
4473 SDValue SRLAmt = DAG.getNode(
4474 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4475 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4476 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4477 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4478 }
4479
4480 // If the type twice as wide is legal, transform the mulhu to a wider multiply
4481 // plus a shift.
4482 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
4483 !VT.isVector()) {
4484 MVT Simple = VT.getSimpleVT();
4485 unsigned SimpleSize = Simple.getSizeInBits();
4486 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4487 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4488 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4489 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4490 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4491 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4492 DAG.getConstant(SimpleSize, DL,
4493 getShiftAmountTy(N1.getValueType())));
4494 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4495 }
4496 }
4497
4498 return SDValue();
4499}
4500
4501/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4502/// give the opcodes for the two computations that are being performed. Return
4503/// true if a simplification was made.
4504SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4505 unsigned HiOp) {
4506 // If the high half is not needed, just compute the low half.
4507 bool HiExists = N->hasAnyUseOfValue(1);
4508 if (!HiExists && (!LegalOperations ||
4509 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4510 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4511 return CombineTo(N, Res, Res);
4512 }
4513
4514 // If the low half is not needed, just compute the high half.
4515 bool LoExists = N->hasAnyUseOfValue(0);
4516 if (!LoExists && (!LegalOperations ||
4517 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4518 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4519 return CombineTo(N, Res, Res);
4520 }
4521
4522 // If both halves are used, return as it is.
4523 if (LoExists && HiExists)
4524 return SDValue();
4525
4526 // If the two computed results can be simplified separately, separate them.
4527 if (LoExists) {
4528 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4529 AddToWorklist(Lo.getNode());
4530 SDValue LoOpt = combine(Lo.getNode());
4531 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4532 (!LegalOperations ||
4533 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4534 return CombineTo(N, LoOpt, LoOpt);
4535 }
4536
4537 if (HiExists) {
4538 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4539 AddToWorklist(Hi.getNode());
4540 SDValue HiOpt = combine(Hi.getNode());
4541 if (HiOpt.getNode() && HiOpt != Hi &&
4542 (!LegalOperations ||
4543 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4544 return CombineTo(N, HiOpt, HiOpt);
4545 }
4546
4547 return SDValue();
4548}
4549
4550SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4551 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4552 return Res;
4553
4554 EVT VT = N->getValueType(0);
4555 SDLoc DL(N);
4556
4557 // If the type is twice as wide is legal, transform the mulhu to a wider
4558 // multiply plus a shift.
4559 if (VT.isSimple() && !VT.isVector()) {
4560 MVT Simple = VT.getSimpleVT();
4561 unsigned SimpleSize = Simple.getSizeInBits();
4562 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4563 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4564 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4565 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4566 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4567 // Compute the high part as N1.
4568 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4569 DAG.getConstant(SimpleSize, DL,
4570 getShiftAmountTy(Lo.getValueType())));
4571 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4572 // Compute the low part as N0.
4573 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4574 return CombineTo(N, Lo, Hi);
4575 }
4576 }
4577
4578 return SDValue();
4579}
4580
4581SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4582 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4583 return Res;
4584
4585 EVT VT = N->getValueType(0);
4586 SDLoc DL(N);
4587
4588 // (umul_lohi N0, 0) -> (0, 0)
4589 if (isNullConstant(N->getOperand(1))) {
4590 SDValue Zero = DAG.getConstant(0, DL, VT);
4591 return CombineTo(N, Zero, Zero);
4592 }
4593
4594 // (umul_lohi N0, 1) -> (N0, 0)
4595 if (isOneConstant(N->getOperand(1))) {
4596 SDValue Zero = DAG.getConstant(0, DL, VT);
4597 return CombineTo(N, N->getOperand(0), Zero);
4598 }
4599
4600 // If the type is twice as wide is legal, transform the mulhu to a wider
4601 // multiply plus a shift.
4602 if (VT.isSimple() && !VT.isVector()) {
4603 MVT Simple = VT.getSimpleVT();
4604 unsigned SimpleSize = Simple.getSizeInBits();
4605 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4606 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4607 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4608 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4609 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4610 // Compute the high part as N1.
4611 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4612 DAG.getConstant(SimpleSize, DL,
4613 getShiftAmountTy(Lo.getValueType())));
4614 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4615 // Compute the low part as N0.
4616 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4617 return CombineTo(N, Lo, Hi);
4618 }
4619 }
4620
4621 return SDValue();
4622}
4623
4624SDValue DAGCombiner::visitMULO(SDNode *N) {
4625 SDValue N0 = N->getOperand(0);
4626 SDValue N1 = N->getOperand(1);
4627 EVT VT = N0.getValueType();
4628 bool IsSigned = (ISD::SMULO == N->getOpcode());
4629
4630 EVT CarryVT = N->getValueType(1);
4631 SDLoc DL(N);
4632
4633 ConstantSDNode *N0C = isConstOrConstSplat(N0);
4634 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4635
4636 // fold operation with constant operands.
4637 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
4638 // multiple results.
4639 if (N0C && N1C) {
4640 bool Overflow;
4641 APInt Result =
4642 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
4643 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
4644 return CombineTo(N, DAG.getConstant(Result, DL, VT),
4645 DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
4646 }
4647
4648 // canonicalize constant to RHS.
4649 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4650 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4651 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
4652
4653 // fold (mulo x, 0) -> 0 + no carry out
4654 if (isNullOrNullSplat(N1))
4655 return CombineTo(N, DAG.getConstant(0, DL, VT),
4656 DAG.getConstant(0, DL, CarryVT));
4657
4658 // (mulo x, 2) -> (addo x, x)
4659 if (N1C && N1C->getAPIntValue() == 2)
4660 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
4661 N->getVTList(), N0, N0);
4662
4663 if (IsSigned) {
4664 // A 1 bit SMULO overflows if both inputs are 1.
4665 if (VT.getScalarSizeInBits() == 1) {
4666 SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
4667 return CombineTo(N, And,
4668 DAG.getSetCC(DL, CarryVT, And,
4669 DAG.getConstant(0, DL, VT), ISD::SETNE));
4670 }
4671
4672 // Multiplying n * m significant bits yields a result of n + m significant
4673 // bits. If the total number of significant bits does not exceed the
4674 // result bit width (minus 1), there is no overflow.
4675 unsigned SignBits = DAG.ComputeNumSignBits(N0);
4676 if (SignBits > 1)
4677 SignBits += DAG.ComputeNumSignBits(N1);
4678 if (SignBits > VT.getScalarSizeInBits() + 1)
4679 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4680 DAG.getConstant(0, DL, CarryVT));
4681 } else {
4682 KnownBits N1Known = DAG.computeKnownBits(N1);
4683 KnownBits N0Known = DAG.computeKnownBits(N0);
4684 bool Overflow;
4685 (void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow);
4686 if (!Overflow)
4687 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4688 DAG.getConstant(0, DL, CarryVT));
4689 }
4690
4691 return SDValue();
4692}
4693
4694SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
4695 SDValue N0 = N->getOperand(0);
4696 SDValue N1 = N->getOperand(1);
4697 EVT VT = N0.getValueType();
4698 unsigned Opcode = N->getOpcode();
4699
4700 // fold vector ops
4701 if (VT.isVector())
4702 if (SDValue FoldedVOp = SimplifyVBinOp(N))
4703 return FoldedVOp;
4704
4705 // fold operation with constant operands.
4706 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1}))
4707 return C;
4708
4709 // canonicalize constant to RHS
4710 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4711 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4712 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
4713
4714 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
4715 // Only do this if the current op isn't legal and the flipped is.
4716 if (!TLI.isOperationLegal(Opcode, VT) &&
4717 (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
4718 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
4719 unsigned AltOpcode;
4720 switch (Opcode) {
4721 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4722 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4723 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4724 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4725 default: llvm_unreachable("Unknown MINMAX opcode")::llvm::llvm_unreachable_internal("Unknown MINMAX opcode", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4725)
;
4726 }
4727 if (TLI.isOperationLegal(AltOpcode, VT))
4728 return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4729 }
4730
4731 // Simplify the operands using demanded-bits information.
4732 if (SimplifyDemandedBits(SDValue(N, 0)))
4733 return SDValue(N, 0);
4734
4735 return SDValue();
4736}
4737
4738/// If this is a bitwise logic instruction and both operands have the same
4739/// opcode, try to sink the other opcode after the logic instruction.
4740SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4741 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4742 EVT VT = N0.getValueType();
4743 unsigned LogicOpcode = N->getOpcode();
4744 unsigned HandOpcode = N0.getOpcode();
4745 assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||(((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || LogicOpcode
== ISD::XOR) && "Expected logic opcode") ? static_cast
<void> (0) : __assert_fail ("(LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || LogicOpcode == ISD::XOR) && \"Expected logic opcode\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4746, __PRETTY_FUNCTION__))
4746 LogicOpcode == ISD::XOR) && "Expected logic opcode")(((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || LogicOpcode
== ISD::XOR) && "Expected logic opcode") ? static_cast
<void> (0) : __assert_fail ("(LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || LogicOpcode == ISD::XOR) && \"Expected logic opcode\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4746, __PRETTY_FUNCTION__))
;
4747 assert(HandOpcode == N1.getOpcode() && "Bad input!")((HandOpcode == N1.getOpcode() && "Bad input!") ? static_cast
<void> (0) : __assert_fail ("HandOpcode == N1.getOpcode() && \"Bad input!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4747, __PRETTY_FUNCTION__))
;
4748
4749 // Bail early if none of these transforms apply.
4750 if (N0.getNumOperands() == 0)
4751 return SDValue();
4752
4753 // FIXME: We should check number of uses of the operands to not increase
4754 // the instruction count for all transforms.
4755
4756 // Handle size-changing casts.
4757 SDValue X = N0.getOperand(0);
4758 SDValue Y = N1.getOperand(0);
4759 EVT XVT = X.getValueType();
4760 SDLoc DL(N);
4761 if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4762 HandOpcode == ISD::SIGN_EXTEND) {
4763 // If both operands have other uses, this transform would create extra
4764 // instructions without eliminating anything.
4765 if (!N0.hasOneUse() && !N1.hasOneUse())
4766 return SDValue();
4767 // We need matching integer source types.
4768 if (XVT != Y.getValueType())
4769 return SDValue();
4770 // Don't create an illegal op during or after legalization. Don't ever
4771 // create an unsupported vector op.
4772 if ((VT.isVector() || LegalOperations) &&
4773 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4774 return SDValue();
4775 // Avoid infinite looping with PromoteIntBinOp.
4776 // TODO: Should we apply desirable/legal constraints to all opcodes?
4777 if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4778 !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4779 return SDValue();
4780 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4781 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4782 return DAG.getNode(HandOpcode, DL, VT, Logic);
4783 }
4784
4785 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4786 if (HandOpcode == ISD::TRUNCATE) {
4787 // If both operands have other uses, this transform would create extra
4788 // instructions without eliminating anything.
4789 if (!N0.hasOneUse() && !N1.hasOneUse())
4790 return SDValue();
4791 // We need matching source types.
4792 if (XVT != Y.getValueType())
4793 return SDValue();
4794 // Don't create an illegal op during or after legalization.
4795 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
4796 return SDValue();
4797 // Be extra careful sinking truncate. If it's free, there's no benefit in
4798 // widening a binop. Also, don't create a logic op on an illegal type.
4799 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
4800 return SDValue();
4801 if (!TLI.isTypeLegal(XVT))
4802 return SDValue();
4803 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4804 return DAG.getNode(HandOpcode, DL, VT, Logic);
4805 }
4806
4807 // For binops SHL/SRL/SRA/AND:
4808 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4809 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
4810 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
4811 N0.getOperand(1) == N1.getOperand(1)) {
4812 // If either operand has other uses, this transform is not an improvement.
4813 if (!N0.hasOneUse() || !N1.hasOneUse())
4814 return SDValue();
4815 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4816 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4817 }
4818
4819 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4820 if (HandOpcode == ISD::BSWAP) {
4821 // If either operand has other uses, this transform is not an improvement.
4822 if (!N0.hasOneUse() || !N1.hasOneUse())
4823 return SDValue();
4824 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4825 return DAG.getNode(HandOpcode, DL, VT, Logic);
4826 }
4827
4828 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4829 // Only perform this optimization up until type legalization, before
4830 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4831 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4832 // we don't want to undo this promotion.
4833 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4834 // on scalars.
4835 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
4836 Level <= AfterLegalizeTypes) {
4837 // Input types must be integer and the same.
4838 if (XVT.isInteger() && XVT == Y.getValueType() &&
4839 !(VT.isVector() && TLI.isTypeLegal(VT) &&
4840 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
4841 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4842 return DAG.getNode(HandOpcode, DL, VT, Logic);
4843 }
4844 }
4845
4846 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4847 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4848 // If both shuffles use the same mask, and both shuffle within a single
4849 // vector, then it is worthwhile to move the swizzle after the operation.
4850 // The type-legalizer generates this pattern when loading illegal
4851 // vector types from memory. In many cases this allows additional shuffle
4852 // optimizations.
4853 // There are other cases where moving the shuffle after the xor/and/or
4854 // is profitable even if shuffles don't perform a swizzle.
4855 // If both shuffles use the same mask, and both shuffles have the same first
4856 // or second operand, then it might still be profitable to move the shuffle
4857 // after the xor/and/or operation.
4858 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4859 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4860 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4861 assert(X.getValueType() == Y.getValueType() &&((X.getValueType() == Y.getValueType() && "Inputs to shuffles are not the same type"
) ? static_cast<void> (0) : __assert_fail ("X.getValueType() == Y.getValueType() && \"Inputs to shuffles are not the same type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4862, __PRETTY_FUNCTION__))
4862 "Inputs to shuffles are not the same type")((X.getValueType() == Y.getValueType() && "Inputs to shuffles are not the same type"
) ? static_cast<void> (0) : __assert_fail ("X.getValueType() == Y.getValueType() && \"Inputs to shuffles are not the same type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4862, __PRETTY_FUNCTION__))
;
4863
4864 // Check that both shuffles use the same mask. The masks are known to be of
4865 // the same length because the result vector type is the same.
4866 // Check also that shuffles have only one use to avoid introducing extra
4867 // instructions.
4868 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
4869 !SVN0->getMask().equals(SVN1->getMask()))
4870 return SDValue();
4871
4872 // Don't try to fold this node if it requires introducing a
4873 // build vector of all zeros that might be illegal at this stage.
4874 SDValue ShOp = N0.getOperand(1);
4875 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4876 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4877
4878 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4879 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4880 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4881 N0.getOperand(0), N1.getOperand(0));
4882 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4883 }
4884
4885 // Don't try to fold this node if it requires introducing a
4886 // build vector of all zeros that might be illegal at this stage.
4887 ShOp = N0.getOperand(0);
4888 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4889 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4890
4891 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4892 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4893 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4894 N1.getOperand(1));
4895 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4896 }
4897 }
4898
4899 return SDValue();
4900}
4901
4902/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4903SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4904 const SDLoc &DL) {
4905 SDValue LL, LR, RL, RR, N0CC, N1CC;
4906 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4907 !isSetCCEquivalent(N1, RL, RR, N1CC))
4908 return SDValue();
4909
4910 assert(N0.getValueType() == N1.getValueType() &&((N0.getValueType() == N1.getValueType() && "Unexpected operand types for bitwise logic op"
) ? static_cast<void> (0) : __assert_fail ("N0.getValueType() == N1.getValueType() && \"Unexpected operand types for bitwise logic op\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4911, __PRETTY_FUNCTION__))
4911 "Unexpected operand types for bitwise logic op")((N0.getValueType() == N1.getValueType() && "Unexpected operand types for bitwise logic op"
) ? static_cast<void> (0) : __assert_fail ("N0.getValueType() == N1.getValueType() && \"Unexpected operand types for bitwise logic op\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4911, __PRETTY_FUNCTION__))
;
4912 assert(LL.getValueType() == LR.getValueType() &&((LL.getValueType() == LR.getValueType() && RL.getValueType
() == RR.getValueType() && "Unexpected operand types for setcc"
) ? static_cast<void> (0) : __assert_fail ("LL.getValueType() == LR.getValueType() && RL.getValueType() == RR.getValueType() && \"Unexpected operand types for setcc\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4914, __PRETTY_FUNCTION__))
4913 RL.getValueType() == RR.getValueType() &&((LL.getValueType() == LR.getValueType() && RL.getValueType
() == RR.getValueType() && "Unexpected operand types for setcc"
) ? static_cast<void> (0) : __assert_fail ("LL.getValueType() == LR.getValueType() && RL.getValueType() == RR.getValueType() && \"Unexpected operand types for setcc\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4914, __PRETTY_FUNCTION__))
4914 "Unexpected operand types for setcc")((LL.getValueType() == LR.getValueType() && RL.getValueType
() == RR.getValueType() && "Unexpected operand types for setcc"
) ? static_cast<void> (0) : __assert_fail ("LL.getValueType() == LR.getValueType() && RL.getValueType() == RR.getValueType() && \"Unexpected operand types for setcc\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4914, __PRETTY_FUNCTION__))
;
4915
4916 // If we're here post-legalization or the logic op type is not i1, the logic
4917 // op type must match a setcc result type. Also, all folds require new
4918 // operations on the left and right operands, so those types must match.
4919 EVT VT = N0.getValueType();
4920 EVT OpVT = LL.getValueType();
4921 if (LegalOperations || VT.getScalarType() != MVT::i1)
4922 if (VT != getSetCCResultType(OpVT))
4923 return SDValue();
4924 if (OpVT != RL.getValueType())
4925 return SDValue();
4926
4927 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4928 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4929 bool IsInteger = OpVT.isInteger();
4930 if (LR == RR && CC0 == CC1 && IsInteger) {
4931 bool IsZero = isNullOrNullSplat(LR);
4932 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4933
4934 // All bits clear?
4935 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
4936 // All sign bits clear?
4937 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
4938 // Any bits set?
4939 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
4940 // Any sign bits set?
4941 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
4942
4943 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
4944 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4945 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
4946 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
4947 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
4948 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4949 AddToWorklist(Or.getNode());
4950 return DAG.getSetCC(DL, VT, Or, LR, CC1);
4951 }
4952
4953 // All bits set?
4954 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
4955 // All sign bits set?
4956 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
4957 // Any bits clear?
4958 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
4959 // Any sign bits clear?
4960 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
4961
4962 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4963 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
4964 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4965 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
4966 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
4967 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4968 AddToWorklist(And.getNode());
4969 return DAG.getSetCC(DL, VT, And, LR, CC1);
4970 }
4971 }
4972
4973 // TODO: What is the 'or' equivalent of this fold?
4974 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4975 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
4976 IsInteger && CC0 == ISD::SETNE &&
4977 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
4978 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
4979 SDValue One = DAG.getConstant(1, DL, OpVT);
4980 SDValue Two = DAG.getConstant(2, DL, OpVT);
4981 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4982 AddToWorklist(Add.getNode());
4983 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4984 }
4985
4986 // Try more general transforms if the predicates match and the only user of
4987 // the compares is the 'and' or 'or'.
4988 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
4989 N0.hasOneUse() && N1.hasOneUse()) {
4990 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4991 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4992 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
4993 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4994 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4995 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4996 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4997 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4998 }
4999
5000 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
5001 // TODO - support non-uniform vector amounts.
5002 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
5003 // Match a shared variable operand and 2 non-opaque constant operands.
5004 ConstantSDNode *C0 = isConstOrConstSplat(LR);
5005 ConstantSDNode *C1 = isConstOrConstSplat(RR);
5006 if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
5007 const APInt &CMax =
5008 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
5009 const APInt &CMin =
5010 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
5011 // The difference of the constants must be a single bit.
5012 if ((CMax - CMin).isPowerOf2()) {
5013 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
5014 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
5015 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
5016 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
5017 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
5018 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
5019 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
5020 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
5021 SDValue Zero = DAG.getConstant(0, DL, OpVT);
5022 return DAG.getSetCC(DL, VT, And, Zero, CC0);
5023 }
5024 }
5025 }
5026 }
5027
5028 // Canonicalize equivalent operands to LL == RL.
5029 if (LL == RR && LR == RL) {
5030 CC1 = ISD::getSetCCSwappedOperands(CC1);
5031 std::swap(RL, RR);
5032 }
5033
5034 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5035 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5036 if (LL == RL && LR == RR) {
5037 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
5038 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
5039 if (NewCC != ISD::SETCC_INVALID &&
5040 (!LegalOperations ||
5041 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
5042 TLI.isOperationLegal(ISD::SETCC, OpVT))))
5043 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
5044 }
5045
5046 return SDValue();
5047}
5048
5049/// This contains all DAGCombine rules which reduce two values combined by
5050/// an And operation to a single value. This makes them reusable in the context
5051/// of visitSELECT(). Rules involving constants are not included as
5052/// visitSELECT() already handles those cases.
5053SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
5054 EVT VT = N1.getValueType();
5055 SDLoc DL(N);
5056
5057 // fold (and x, undef) -> 0
5058 if (N0.isUndef() || N1.isUndef())
5059 return DAG.getConstant(0, DL, VT);
5060
5061 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
5062 return V;
5063
5064 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
5065 VT.getSizeInBits() <= 64) {
5066 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5067 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
5068 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
5069 // immediate for an add, but it is legal if its top c2 bits are set,
5070 // transform the ADD so the immediate doesn't need to be materialized
5071 // in a register.
5072 APInt ADDC = ADDI->getAPIntValue();
5073 APInt SRLC = SRLI->getAPIntValue();
5074 if (ADDC.getMinSignedBits() <= 64 &&
5075 SRLC.ult(VT.getSizeInBits()) &&
5076 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5077 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
5078 SRLC.getZExtValue());
5079 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
5080 ADDC |= Mask;
5081 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5082 SDLoc DL0(N0);
5083 SDValue NewAdd =
5084 DAG.getNode(ISD::ADD, DL0, VT,
5085 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
5086 CombineTo(N0.getNode(), NewAdd);
5087 // Return N so it doesn't get rechecked!
5088 return SDValue(N, 0);
5089 }
5090 }
5091 }
5092 }
5093 }
5094 }
5095
5096 // Reduce bit extract of low half of an integer to the narrower type.
5097 // (and (srl i64:x, K), KMask) ->
5098 // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
5099 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5100 if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
5101 if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5102 unsigned Size = VT.getSizeInBits();
5103 const APInt &AndMask = CAnd->getAPIntValue();
5104 unsigned ShiftBits = CShift->getZExtValue();
5105
5106 // Bail out, this node will probably disappear anyway.
5107 if (ShiftBits == 0)
5108 return SDValue();
5109
5110 unsigned MaskBits = AndMask.countTrailingOnes();
5111 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
5112
5113 if (AndMask.isMask() &&
5114 // Required bits must not span the two halves of the integer and
5115 // must fit in the half size type.
5116 (ShiftBits + MaskBits <= Size / 2) &&
5117 TLI.isNarrowingProfitable(VT, HalfVT) &&
5118 TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
5119 TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
5120 TLI.isTruncateFree(VT, HalfVT) &&
5121 TLI.isZExtFree(HalfVT, VT)) {
5122 // The isNarrowingProfitable is to avoid regressions on PPC and
5123 // AArch64 which match a few 64-bit bit insert / bit extract patterns
5124 // on downstream users of this. Those patterns could probably be
5125 // extended to handle extensions mixed in.
5126
5127 SDValue SL(N0);
5128 assert(MaskBits <= Size)((MaskBits <= Size) ? static_cast<void> (0) : __assert_fail
("MaskBits <= Size", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5128, __PRETTY_FUNCTION__))
;
5129
5130 // Extracting the highest bit of the low half.
5131 EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
5132 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
5133 N0.getOperand(0));
5134
5135 SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
5136 SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
5137 SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
5138 SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
5139 return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
5140 }
5141 }
5142 }
5143 }
5144
5145 return SDValue();
5146}
5147
5148bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
5149 EVT LoadResultTy, EVT &ExtVT) {
5150 if (!AndC->getAPIntValue().isMask())
5151 return false;
5152
5153 unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
5154
5155 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5156 EVT LoadedVT = LoadN->getMemoryVT();
5157
5158 if (ExtVT == LoadedVT &&
5159 (!LegalOperations ||
5160 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
5161 // ZEXTLOAD will match without needing to change the size of the value being
5162 // loaded.
5163 return true;
5164 }
5165
5166 // Do not change the width of a volatile or atomic loads.
5167 if (!LoadN->isSimple())
5168 return false;
5169
5170 // Do not generate loads of non-round integer types since these can
5171 // be expensive (and would be wrong if the type is not byte sized).
5172 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
5173 return false;
5174
5175 if (LegalOperations &&
5176 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
5177 return false;
5178
5179 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
5180 return false;
5181
5182 return true;
5183}
5184
5185bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
5186 ISD::LoadExtType ExtType, EVT &MemVT,
5187 unsigned ShAmt) {
5188 if (!LDST)
5189 return false;
5190 // Only allow byte offsets.
5191 if (ShAmt % 8)
5192 return false;
5193
5194 // Do not generate loads of non-round integer types since these can
5195 // be expensive (and would be wrong if the type is not byte sized).
5196 if (!MemVT.isRound())
5197 return false;
5198
5199 // Don't change the width of a volatile or atomic loads.
5200 if (!LDST->isSimple())
5201 return false;
5202
5203 EVT LdStMemVT = LDST->getMemoryVT();
5204
5205 // Bail out when changing the scalable property, since we can't be sure that
5206 // we're actually narrowing here.
5207 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
5208 return false;
5209
5210 // Verify that we are actually reducing a load width here.
5211 if (LdStMemVT.bitsLT(MemVT))
5212 return false;
5213
5214 // Ensure that this isn't going to produce an unsupported memory access.
5215 if (ShAmt) {
5216 assert(ShAmt % 8 == 0 && "ShAmt is byte offset")((ShAmt % 8 == 0 && "ShAmt is byte offset") ? static_cast
<void> (0) : __assert_fail ("ShAmt % 8 == 0 && \"ShAmt is byte offset\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5216, __PRETTY_FUNCTION__))
;
5217 const unsigned ByteShAmt = ShAmt / 8;
5218 const Align LDSTAlign = LDST->getAlign();
5219 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
5220 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
5221 LDST->getAddressSpace(), NarrowAlign,
5222 LDST->getMemOperand()->getFlags()))
5223 return false;
5224 }
5225
5226 // It's not possible to generate a constant of extended or untyped type.
5227 EVT PtrType = LDST->getBasePtr().getValueType();
5228 if (PtrType == MVT::Untyped || PtrType.isExtended())
5229 return false;
5230
5231 if (isa<LoadSDNode>(LDST)) {
5232 LoadSDNode *Load = cast<LoadSDNode>(LDST);
5233 // Don't transform one with multiple uses, this would require adding a new
5234 // load.
5235 if (!SDValue(Load, 0).hasOneUse())
5236 return false;
5237
5238 if (LegalOperations &&
5239 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
5240 return false;
5241
5242 // For the transform to be legal, the load must produce only two values
5243 // (the value loaded and the chain). Don't transform a pre-increment
5244 // load, for example, which produces an extra value. Otherwise the
5245 // transformation is not equivalent, and the downstream logic to replace
5246 // uses gets things wrong.
5247 if (Load->getNumValues() > 2)
5248 return false;
5249
5250 // If the load that we're shrinking is an extload and we're not just
5251 // discarding the extension we can't simply shrink the load. Bail.
5252 // TODO: It would be possible to merge the extensions in some cases.
5253 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
5254 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5255 return false;
5256
5257 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
5258 return false;
5259 } else {
5260 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode")((isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode"
) ? static_cast<void> (0) : __assert_fail ("isa<StoreSDNode>(LDST) && \"It is not a Load nor a Store SDNode\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5260, __PRETTY_FUNCTION__))
;
5261 StoreSDNode *Store = cast<StoreSDNode>(LDST);
5262 // Can't write outside the original store
5263 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5264 return false;
5265
5266 if (LegalOperations &&
5267 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
5268 return false;
5269 }
5270 return true;
5271}
5272
5273bool DAGCombiner::SearchForAndLoads(SDNode *N,
5274 SmallVectorImpl<LoadSDNode*> &Loads,
5275 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
5276 ConstantSDNode *Mask,
5277 SDNode *&NodeToMask) {
5278 // Recursively search for the operands, looking for loads which can be
5279 // narrowed.
5280 for (SDValue Op : N->op_values()) {
5281 if (Op.getValueType().isVector())
5282 return false;
5283
5284 // Some constants may need fixing up later if they are too large.
5285 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5286 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
5287 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
5288 NodesWithConsts.insert(N);
5289 continue;
5290 }
5291
5292 if (!Op.hasOneUse())
5293 return false;
5294
5295 switch(Op.getOpcode()) {
5296 case ISD::LOAD: {
5297 auto *Load = cast<LoadSDNode>(Op);
5298 EVT ExtVT;
5299 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
5300 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
5301
5302 // ZEXTLOAD is already small enough.
5303 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
5304 ExtVT.bitsGE(Load->getMemoryVT()))
5305 continue;
5306
5307 // Use LE to convert equal sized loads to zext.
5308 if (ExtVT.bitsLE(Load->getMemoryVT()))
5309 Loads.push_back(Load);
5310
5311 continue;
5312 }
5313 return false;
5314 }
5315 case ISD::ZERO_EXTEND:
5316 case ISD::AssertZext: {
5317 unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
5318 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5319 EVT VT = Op.getOpcode() == ISD::AssertZext ?
5320 cast<VTSDNode>(Op.getOperand(1))->getVT() :
5321 Op.getOperand(0).getValueType();
5322
5323 // We can accept extending nodes if the mask is wider or an equal
5324 // width to the original type.
5325 if (ExtVT.bitsGE(VT))
5326 continue;
5327 break;
5328 }
5329 case ISD::OR:
5330 case ISD::XOR:
5331 case ISD::AND:
5332 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
5333 NodeToMask))
5334 return false;
5335 continue;
5336 }
5337
5338 // Allow one node which will masked along with any loads found.
5339 if (NodeToMask)
5340 return false;
5341
5342 // Also ensure that the node to be masked only produces one data result.
5343 NodeToMask = Op.getNode();
5344 if (NodeToMask->getNumValues() > 1) {
5345 bool HasValue = false;
5346 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
5347 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
5348 if (VT != MVT::Glue && VT != MVT::Other) {
5349 if (HasValue) {
5350 NodeToMask = nullptr;
5351 return false;
5352 }
5353 HasValue = true;
5354 }
5355 }
5356 assert(HasValue && "Node to be masked has no data result?")((HasValue && "Node to be masked has no data result?"
) ? static_cast<void> (0) : __assert_fail ("HasValue && \"Node to be masked has no data result?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5356, __PRETTY_FUNCTION__))
;
5357 }
5358 }
5359 return true;
5360}
5361
5362bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
5363 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
5364 if (!Mask)
5365 return false;
5366
5367 if (!Mask->getAPIntValue().isMask())
5368 return false;
5369
5370 // No need to do anything if the and directly uses a load.
5371 if (isa<LoadSDNode>(N->getOperand(0)))
5372 return false;
5373
5374 SmallVector<LoadSDNode*, 8> Loads;
5375 SmallPtrSet<SDNode*, 2> NodesWithConsts;
5376 SDNode *FixupNode = nullptr;
5377 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
5378 if (Loads.size() == 0)
5379 return false;
5380
5381 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "Backwards propagate AND: "
; N->dump(); } } while (false)
;
5382 SDValue MaskOp = N->getOperand(1);
5383
5384 // If it exists, fixup the single node we allow in the tree that needs
5385 // masking.
5386 if (FixupNode) {
5387 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "First, need to fix up: "; FixupNode
->dump(); } } while (false)
;
5388 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
5389 FixupNode->getValueType(0),
5390 SDValue(FixupNode, 0), MaskOp);
5391 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
5392 if (And.getOpcode() == ISD ::AND)
5393 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
5394 }
5395
5396 // Narrow any constants that need it.
5397 for (auto *LogicN : NodesWithConsts) {
5398 SDValue Op0 = LogicN->getOperand(0);
5399 SDValue Op1 = LogicN->getOperand(1);
5400
5401 if (isa<ConstantSDNode>(Op0))
5402 std::swap(Op0, Op1);
5403
5404 SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5405 Op1, MaskOp);
5406
5407 DAG.UpdateNodeOperands(LogicN, Op0, And);
5408 }
5409
5410 // Create narrow loads.
5411 for (auto *Load : Loads) {
5412 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "Propagate AND back to: "; Load
->dump(); } } while (false)
;
5413 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5414 SDValue(Load, 0), MaskOp);
5415 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
5416 if (And.getOpcode() == ISD ::AND)
5417 And = SDValue(
5418 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
5419 SDValue NewLoad = ReduceLoadWidth(And.getNode());
5420 assert(NewLoad &&((NewLoad && "Shouldn't be masking the load if it can't be narrowed"
) ? static_cast<void> (0) : __assert_fail ("NewLoad && \"Shouldn't be masking the load if it can't be narrowed\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5421, __PRETTY_FUNCTION__))
5421 "Shouldn't be masking the load if it can't be narrowed")((NewLoad && "Shouldn't be masking the load if it can't be narrowed"
) ? static_cast<void> (0) : __assert_fail ("NewLoad && \"Shouldn't be masking the load if it can't be narrowed\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5421, __PRETTY_FUNCTION__))
;
5422 CombineTo(Load, NewLoad, NewLoad.getValue(1));
5423 }
5424 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
5425 return true;
5426 }
5427 return false;
5428}
5429
5430// Unfold
5431// x & (-1 'logical shift' y)
5432// To
5433// (x 'opposite logical shift' y) 'logical shift' y
5434// if it is better for performance.
5435SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
5436 assert(N->getOpcode() == ISD::AND)((N->getOpcode() == ISD::AND) ? static_cast<void> (0
) : __assert_fail ("N->getOpcode() == ISD::AND", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5436, __PRETTY_FUNCTION__))
;
5437
5438 SDValue N0 = N->getOperand(0);
5439 SDValue N1 = N->getOperand(1);
5440
5441 // Do we actually prefer shifts over mask?
5442 if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
5443 return SDValue();
5444
5445 // Try to match (-1 '[outer] logical shift' y)
5446 unsigned OuterShift;
5447 unsigned InnerShift; // The opposite direction to the OuterShift.
5448 SDValue Y; // Shift amount.
5449 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
5450 if (!M.hasOneUse())
5451 return false;
5452 OuterShift = M->getOpcode();
5453 if (OuterShift == ISD::SHL)
5454 InnerShift = ISD::SRL;
5455 else if (OuterShift == ISD::SRL)
5456 InnerShift = ISD::SHL;
5457 else
5458 return false;
5459 if (!isAllOnesConstant(M->getOperand(0)))
5460 return false;
5461 Y = M->getOperand(1);
5462 return true;
5463 };
5464
5465 SDValue X;
5466 if (matchMask(N1))
5467 X = N0;
5468 else if (matchMask(N0))
5469 X = N1;
5470 else
5471 return SDValue();
5472
5473 SDLoc DL(N);
5474 EVT VT = N->getValueType(0);
5475
5476 // tmp = x 'opposite logical shift' y
5477 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
5478 // ret = tmp 'logical shift' y
5479 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
5480
5481 return T1;
5482}
5483
5484/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
5485/// For a target with a bit test, this is expected to become test + set and save
5486/// at least 1 instruction.
5487static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
5488 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op")((And->getOpcode() == ISD::AND && "Expected an 'and' op"
) ? static_cast<void> (0) : __assert_fail ("And->getOpcode() == ISD::AND && \"Expected an 'and' op\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5488, __PRETTY_FUNCTION__))
;
5489
5490 // This is probably not worthwhile without a supported type.
5491 EVT VT = And->getValueType(0);
5492 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5493 if (!TLI.isTypeLegal(VT))
5494 return SDValue();
5495
5496 // Look through an optional extension and find a 'not'.
5497 // TODO: Should we favor test+set even without the 'not' op?
5498 SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
5499 if (Not.getOpcode() == ISD::ANY_EXTEND)
5500 Not = Not.getOperand(0);
5501 if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
5502 return SDValue();
5503
5504 // Look though an optional truncation. The source operand may not be the same
5505 // type as the original 'and', but that is ok because we are masking off
5506 // everything but the low bit.
5507 SDValue Srl = Not.getOperand(0);
5508 if (Srl.getOpcode() == ISD::TRUNCATE)
5509 Srl = Srl.getOperand(0);
5510
5511 // Match a shift-right by constant.
5512 if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
5513 !isa<ConstantSDNode>(Srl.getOperand(1)))
5514 return SDValue();
5515
5516 // We might have looked through casts that make this transform invalid.
5517 // TODO: If the source type is wider than the result type, do the mask and
5518 // compare in the source type.
5519 const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
5520 unsigned VTBitWidth = VT.getSizeInBits();
5521 if (ShiftAmt.uge(VTBitWidth))
5522 return SDValue();
5523
5524 // Turn this into a bit-test pattern using mask op + setcc:
5525 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
5526 SDLoc DL(And);
5527 SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
5528 EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5529 SDValue Mask = DAG.getConstant(
5530 APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
5531 SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
5532 SDValue Zero = DAG.getConstant(0, DL, VT);
5533 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
5534 return DAG.getZExtOrTrunc(Setcc, DL, VT);
5535}
5536
5537SDValue DAGCombiner::visitAND(SDNode *N) {
5538 SDValue N0 = N->getOperand(0);
5539 SDValue N1 = N->getOperand(1);
5540 EVT VT = N1.getValueType();
5541
5542 // x & x --> x
5543 if (N0 == N1)
5544 return N0;
5545
5546 // fold vector ops
5547 if (VT.isVector()) {
5548 if (SDValue FoldedVOp = SimplifyVBinOp(N))
5549 return FoldedVOp;
5550
5551 // fold (and x, 0) -> 0, vector edition
5552 if (ISD::isBuildVectorAllZeros(N0.getNode()))
5553 // do not return N0, because undef node may exist in N0
5554 return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
5555 SDLoc(N), N0.getValueType());
5556 if (ISD::isBuildVectorAllZeros(N1.getNode()))
5557 // do not return N1, because undef node may exist in N1
5558 return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
5559 SDLoc(N), N1.getValueType());
5560
5561 // fold (and x, -1) -> x, vector edition
5562 if (ISD::isBuildVectorAllOnes(N0.getNode()))
5563 return N1;
5564 if (ISD::isBuildVectorAllOnes(N1.getNode()))
5565 return N0;
5566
5567 // fold (and (masked_load) (build_vec (x, ...))) to zext_masked_load
5568 auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
5569 auto *BVec = dyn_cast<BuildVectorSDNode>(N1);
5570 if (MLoad && BVec && MLoad->getExtensionType() == ISD::EXTLOAD &&
5571 N0.hasOneUse() && N1.hasOneUse()) {
5572 EVT LoadVT = MLoad->getMemoryVT();
5573 EVT ExtVT = VT;
5574 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
5575 // For this AND to be a zero extension of the masked load the elements
5576 // of the BuildVec must mask the bottom bits of the extended element
5577 // type
5578 if (ConstantSDNode *Splat = BVec->getConstantSplatNode()) {
5579 uint64_t ElementSize =
5580 LoadVT.getVectorElementType().getScalarSizeInBits();
5581 if (Splat->getAPIntValue().isMask(ElementSize)) {
5582 return DAG.getMaskedLoad(
5583 ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
5584 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
5585 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
5586 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
5587 }
5588 }
5589 }
5590 }
5591 }
5592
5593 // fold (and c1, c2) -> c1&c2
5594 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5595 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
5596 return C;
5597
5598 // canonicalize constant to RHS
5599 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5600 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5601 return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
5602
5603 // fold (and x, -1) -> x
5604 if (isAllOnesConstant(N1))
5605 return N0;
5606
5607 // if (and x, c) is known to be zero, return 0
5608 unsigned BitWidth = VT.getScalarSizeInBits();
5609 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5610 APInt::getAllOnesValue(BitWidth)))
5611 return DAG.getConstant(0, SDLoc(N), VT);
5612
5613 if (SDValue NewSel = foldBinOpIntoSelect(N))
5614 return NewSel;
5615
5616 // reassociate and
5617 if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
5618 return RAND;
5619
5620 // Try to convert a constant mask AND into a shuffle clear mask.
5621 if (VT.isVector())
5622 if (SDValue Shuffle = XformToShuffleWithZero(N))
5623 return Shuffle;
5624
5625 if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
5626 return Combined;
5627
5628 // fold (and (or x, C), D) -> D if (C & D) == D
5629 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5630 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
5631 };
5632 if (N0.getOpcode() == ISD::OR &&
5633 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
5634 return N1;
5635 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
5636 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5637 SDValue N0Op0 = N0.getOperand(0);
5638 APInt Mask = ~N1C->getAPIntValue();
5639 Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
5640 if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
5641 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
5642 N0.getValueType(), N0Op0);
5643
5644 // Replace uses of the AND with uses of the Zero extend node.
5645 CombineTo(N, Zext);
5646
5647 // We actually want to replace all uses of the any_extend with the
5648 // zero_extend, to avoid duplicating things. This will later cause this
5649 // AND to be folded.
5650 CombineTo(N0.getNode(), Zext);
5651 return SDValue(N, 0); // Return N so it doesn't get rechecked!
5652 }
5653 }
5654
5655 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
5656 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
5657 // already be zero by virtue of the width of the base type of the load.
5658 //
5659 // the 'X' node here can either be nothing or an extract_vector_elt to catch
5660 // more cases.
5661 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5662 N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
5663 N0.getOperand(0).getOpcode() == ISD::LOAD &&
5664 N0.getOperand(0).getResNo() == 0) ||
5665 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
5666 LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
5667 N0 : N0.getOperand(0) );
5668
5669 // Get the constant (if applicable) the zero'th operand is being ANDed with.
5670 // This can be a pure constant or a vector splat, in which case we treat the
5671 // vector as a scalar and use the splat value.
5672 APInt Constant = APInt::getNullValue(1);
5673 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
5674 Constant = C->getAPIntValue();
5675 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
5676 APInt SplatValue, SplatUndef;
5677 unsigned SplatBitSize;
5678 bool HasAnyUndefs;
5679 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
5680 SplatBitSize, HasAnyUndefs);
5681 if (IsSplat) {
5682 // Undef bits can contribute to a possible optimisation if set, so
5683 // set them.
5684 SplatValue |= SplatUndef;
5685
5686 // The splat value may be something like "0x00FFFFFF", which means 0 for
5687 // the first vector value and FF for the rest, repeating. We need a mask
5688 // that will apply equally to all members of the vector, so AND all the
5689 // lanes of the constant together.
5690 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
5691
5692 // If the splat value has been compressed to a bitlength lower
5693 // than the size of the vector lane, we need to re-expand it to
5694 // the lane size.
5695 if (EltBitWidth > SplatBitSize)
5696 for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
5697 SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
5698 SplatValue |= SplatValue.shl(SplatBitSize);
5699
5700 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
5701 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
5702 if ((SplatBitSize % EltBitWidth) == 0) {
5703 Constant = APInt::getAllOnesValue(EltBitWidth);
5704 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
5705 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
5706 }
5707 }
5708 }
5709
5710 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
5711 // actually legal and isn't going to get expanded, else this is a false
5712 // optimisation.
5713 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
5714 Load->getValueType(0),
5715 Load->getMemoryVT());
5716
5717 // Resize the constant to the same size as the original memory access before
5718 // extension. If it is still the AllOnesValue then this AND is completely
5719 // unneeded.
5720 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
5721
5722 bool B;
5723 switch (Load->getExtensionType()) {
5724 default: B = false; break;
5725 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
5726 case ISD::ZEXTLOAD:
5727 case ISD::NON_EXTLOAD: B = true; break;
5728 }
5729
5730 if (B && Constant.isAllOnesValue()) {
5731 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
5732 // preserve semantics once we get rid of the AND.
5733 SDValue NewLoad(Load, 0);
5734
5735 // Fold the AND away. NewLoad may get replaced immediately.
5736 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
5737
5738 if (Load->getExtensionType() == ISD::EXTLOAD) {
5739 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
5740 Load->getValueType(0), SDLoc(Load),
5741 Load->getChain(), Load->getBasePtr(),
5742 Load->getOffset(), Load->getMemoryVT(),
5743 Load->getMemOperand());
5744 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
5745 if (Load->getNumValues() == 3) {
5746 // PRE/POST_INC loads have 3 values.
5747 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
5748 NewLoad.getValue(2) };
5749 CombineTo(Load, To, 3, true);
5750 } else {
5751 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
5752 }
5753 }
5754
5755 return SDValue(N, 0); // Return N so it doesn't get rechecked!
5756 }
5757 }
5758
5759 // fold (and (masked_gather x)) -> (zext_masked_gather x)
5760 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
5761 EVT MemVT = GN0->getMemoryVT();
5762 EVT ScalarVT = MemVT.getScalarType();
5763
5764 if (SDValue(GN0, 0).hasOneUse() &&
5765 isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
5766 TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
5767 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
5768 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
5769
5770 SDValue ZExtLoad = DAG.getMaskedGather(
5771 DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
5772 GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);
5773
5774 CombineTo(N, ZExtLoad);
5775 AddToWorklist(ZExtLoad.getNode());
5776 // Avoid recheck of N.
5777 return SDValue(N, 0);
5778 }
5779 }
5780
5781 // fold (and (load x), 255) -> (zextload x, i8)
5782 // fold (and (extload x, i16), 255) -> (zextload x, i8)
5783 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
5784 if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
5785 (N0.getOpcode() == ISD::ANY_EXTEND &&
5786 N0.getOperand(0).getOpcode() == ISD::LOAD))) {
5787 if (SDValue Res = ReduceLoadWidth(N)) {
5788 LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
5789 ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
5790 AddToWorklist(N);
5791 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
5792 return SDValue(N, 0);
5793 }
5794 }
5795
5796 if (LegalTypes) {
5797 // Attempt to propagate the AND back up to the leaves which, if they're
5798 // loads, can be combined to narrow loads and the AND node can be removed.
5799 // Perform after legalization so that extend nodes will already be
5800 // combined into the loads.
5801 if (BackwardsPropagateMask(N))
5802 return SDValue(N, 0);
5803 }
5804
5805 if (SDValue Combined = visitANDLike(N0, N1, N))
5806 return Combined;
5807
5808 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
5809 if (N0.getOpcode() == N1.getOpcode())
5810 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5811 return V;
5812
5813 // Masking the negated extension of a boolean is just the zero-extended
5814 // boolean:
5815 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
5816 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
5817 //
5818 // Note: the SimplifyDemandedBits fold below can make an information-losing
5819 // transform, and then we have no way to find this better fold.
5820 if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
5821 if (isNullOrNullSplat(N0.getOperand(0))) {
5822 SDValue SubRHS = N0.getOperand(1);
5823 if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
5824 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5825 return SubRHS;
5826 if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
5827 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5828 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
5829 }
5830 }
5831
5832 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
5833 // fold (and (sra)) -> (and (srl)) when possible.
5834 if (SimplifyDemandedBits(SDValue(N, 0)))
5835 return SDValue(N, 0);
5836
5837 // fold (zext_inreg (extload x)) -> (zextload x)
5838 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
5839 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
5840 (ISD::isEXTLoad(N0.getNode()) ||
5841 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
5842 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5843 EVT MemVT = LN0->getMemoryVT();
5844 // If we zero all the possible extended bits, then we can turn this into
5845 // a zextload if we are running before legalize or the operation is legal.
5846 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
5847 unsigned MemBitSize = MemVT.getScalarSizeInBits();
5848 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
5849 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
5850 ((!LegalOperations && LN0->isSimple()) ||
5851 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
5852 SDValue ExtLoad =
5853 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
5854 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
5855 AddToWorklist(N);
5856 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5857 return SDValue(N, 0); // Return N so it doesn't get rechecked!
5858 }
5859 }
5860
5861 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
5862 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
5863 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
5864 N0.getOperand(1), false))
5865 return BSwap;
5866 }
5867
5868 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
5869 return Shifts;
5870
5871 if (TLI.hasBitTest(N0, N1))
5872 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
5873 return V;
5874
5875 // Recognize the following pattern:
5876 //
5877 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
5878 //
5879 // where bitmask is a mask that clears the upper bits of AndVT. The
5880 // number of bits in bitmask must be a power of two.
5881 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
5882 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
5883 return false;
5884
5885 auto *C = dyn_cast<ConstantSDNode>(RHS);
5886 if (!C)
5887 return false;
5888
5889 if (!C->getAPIntValue().isMask(
5890 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
5891 return false;
5892
5893 return true;
5894 };
5895
5896 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
5897 if (IsAndZeroExtMask(N0, N1))
5898 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
5899
5900 return SDValue();
5901}
5902
5903/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
5904SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
5905 bool DemandHighBits) {
5906 if (!LegalOperations)
5907 return SDValue();
5908
5909 EVT VT = N->getValueType(0);
5910 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
5911 return SDValue();
5912 if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5913 return SDValue();
5914
5915 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
5916 bool LookPassAnd0 = false;
5917 bool LookPassAnd1 = false;
5918 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
5919 std::swap(N0, N1);
5920 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
5921 std::swap(N0, N1);
5922 if (N0.getOpcode() == ISD::AND) {
5923 if (!N0.getNode()->hasOneUse())
5924 return SDValue();
5925 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5926 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
5927 // This is needed for X86.
5928 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
5929 N01C->getZExtValue() != 0xFFFF))
5930 return SDValue();
5931 N0 = N0.getOperand(0);
5932 LookPassAnd0 = true;
5933 }
5934
5935 if (N1.getOpcode() == ISD::AND) {
5936 if (!N1.getNode()->hasOneUse())
5937 return SDValue();
5938 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5939 if (!N11C || N11C->getZExtValue() != 0xFF)
5940 return SDValue();
5941 N1 = N1.getOperand(0);
5942 LookPassAnd1 = true;
5943 }
5944
5945 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
5946 std::swap(N0, N1);
5947 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
5948 return SDValue();
5949 if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
5950 return SDValue();
5951
5952 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5953 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5954 if (!N01C || !N11C)
5955 return SDValue();
5956 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
5957 return SDValue();
5958
5959 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
5960 SDValue N00 = N0->getOperand(0);
5961 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
5962 if (!N00.getNode()->hasOneUse())
5963 return SDValue();
5964 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
5965 if (!N001C || N001C->getZExtValue() != 0xFF)
5966 return SDValue();
5967 N00 = N00.getOperand(0);
5968 LookPassAnd0 = true;
5969 }
5970
5971 SDValue N10 = N1->getOperand(0);
5972 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
5973 if (!N10.getNode()->hasOneUse())
5974 return SDValue();
5975 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
5976 // Also allow 0xFFFF since the bits will be shifted out. This is needed
5977 // for X86.
5978 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
5979 N101C->getZExtValue() != 0xFFFF))
5980 return SDValue();
5981 N10 = N10.getOperand(0);
5982 LookPassAnd1 = true;
5983 }
5984
5985 if (N00 != N10)
5986 return SDValue();
5987
5988 // Make sure everything beyond the low halfword gets set to zero since the SRL
5989 // 16 will clear the top bits.
5990 unsigned OpSizeInBits = VT.getSizeInBits();
5991 if (DemandHighBits && OpSizeInBits > 16) {
5992 // If the left-shift isn't masked out then the only way this is a bswap is
5993 // if all bits beyond the low 8 are 0. In that case the entire pattern
5994 // reduces to a left shift anyway: leave it for other parts of the combiner.
5995 if (!LookPassAnd0)
5996 return SDValue();
5997
5998 // However, if the right shift isn't masked out then it might be because
5999 // it's not needed. See if we can spot that too.
6000 if (!LookPassAnd1 &&
6001 !DAG.MaskedValueIsZero(
6002 N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
6003 return SDValue();
6004 }
6005
6006 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
6007 if (OpSizeInBits > 16) {
6008 SDLoc DL(N);
6009 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
6010 DAG.getConstant(OpSizeInBits - 16, DL,
6011 getShiftAmountTy(VT)));
6012 }
6013 return Res;
6014}
6015
6016/// Return true if the specified node is an element that makes up a 32-bit
6017/// packed halfword byteswap.
6018/// ((x & 0x000000ff) << 8) |
6019/// ((x & 0x0000ff00) >> 8) |
6020/// ((x & 0x00ff0000) << 8) |
6021/// ((x & 0xff000000) >> 8)
6022static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
6023 if (!N.getNode()->hasOneUse())
6024 return false;
6025
6026 unsigned Opc = N.getOpcode();
6027 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
6028 return false;
6029
6030 SDValue N0 = N.getOperand(0);
6031 unsigned Opc0 = N0.getOpcode();
6032 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
6033 return false;
6034
6035 ConstantSDNode *N1C = nullptr;
6036 // SHL or SRL: look upstream for AND mask operand
6037 if (Opc == ISD::AND)
6038 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6039 else if (Opc0 == ISD::AND)
6040 N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6041 if (!N1C)
6042 return false;
6043
6044 unsigned MaskByteOffset;
6045 switch (N1C->getZExtValue()) {
6046 default:
6047 return false;
6048 case 0xFF: MaskByteOffset = 0; break;
6049 case 0xFF00: MaskByteOffset = 1; break;
6050 case 0xFFFF:
6051 // In case demanded bits didn't clear the bits that will be shifted out.