LLVM  16.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10 // both before and after the DAG is legalized.
11 //
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
49 #include "llvm/IR/Attributes.h"
50 #include "llvm/IR/Constant.h"
51 #include "llvm/IR/DataLayout.h"
52 #include "llvm/IR/DerivedTypes.h"
53 #include "llvm/IR/Function.h"
54 #include "llvm/IR/Metadata.h"
55 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/CodeGen.h"
58 #include "llvm/Support/Compiler.h"
59 #include "llvm/Support/Debug.h"
61 #include "llvm/Support/KnownBits.h"
67 #include <algorithm>
68 #include <cassert>
69 #include <cstdint>
70 #include <functional>
71 #include <iterator>
72 #include <optional>
73 #include <string>
74 #include <tuple>
75 #include <utility>
76 #include <variant>
77 
78 using namespace llvm;
79 
80 #define DEBUG_TYPE "dagcombine"
81 
82 STATISTIC(NodesCombined , "Number of dag nodes combined");
83 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
84 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
85 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
86 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
87 STATISTIC(SlicedLoads, "Number of load sliced");
88 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
89 
90 static cl::opt<bool>
91 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
92  cl::desc("Enable DAG combiner's use of IR alias analysis"));
93 
94 static cl::opt<bool>
95 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
96  cl::desc("Enable DAG combiner's use of TBAA"));
97 
98 #ifndef NDEBUG
100 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
101  cl::desc("Only use DAG-combiner alias analysis in this"
102  " function"));
103 #endif
104 
105 /// Hidden option to stress test load slicing, i.e., when this option
106 /// is enabled, load slicing bypasses most of its profitability guards.
107 static cl::opt<bool>
108 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
109  cl::desc("Bypass the profitability model of load slicing"),
110  cl::init(false));
111 
112 static cl::opt<bool>
113  MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
114  cl::desc("DAG combiner may split indexing from loads"));
115 
116 static cl::opt<bool>
117  EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
118  cl::desc("DAG combiner enable merging multiple stores "
119  "into a wider store"));
120 
122  "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
123  cl::desc("Limit the number of operands to inline for Token Factors"));
124 
126  "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
127  cl::desc("Limit the number of times for the same StoreNode and RootNode "
128  "to bail out in store merging dependence check"));
129 
131  "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
132  cl::desc("DAG combiner enable reducing the width of load/op/store "
133  "sequence"));
134 
136  "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
137  cl::desc("DAG combiner enable load/<replace bytes>/store with "
138  "a narrower store"));
139 
141  "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false),
142  cl::desc(
143  "Enable merging extends and rounds into FCOPYSIGN on vector types"));
144 
145 namespace {
146 
147  class DAGCombiner {
148  SelectionDAG &DAG;
149  const TargetLowering &TLI;
150  const SelectionDAGTargetInfo *STI;
152  CodeGenOpt::Level OptLevel;
153  bool LegalDAG = false;
154  bool LegalOperations = false;
155  bool LegalTypes = false;
156  bool ForCodeSize;
157  bool DisableGenericCombines;
158 
159  /// Worklist of all of the nodes that need to be simplified.
160  ///
161  /// This must behave as a stack -- new nodes to process are pushed onto the
162  /// back and when processing we pop off of the back.
163  ///
164  /// The worklist will not contain duplicates but may contain null entries
165  /// due to nodes being deleted from the underlying DAG.
166  SmallVector<SDNode *, 64> Worklist;
167 
168  /// Mapping from an SDNode to its position on the worklist.
169  ///
170  /// This is used to find and remove nodes from the worklist (by nulling
171  /// them) when they are deleted from the underlying DAG. It relies on
172  /// stable indices of nodes within the worklist.
173  DenseMap<SDNode *, unsigned> WorklistMap;
174  /// This records all nodes attempted to add to the worklist since we
175  /// considered a new worklist entry. As we keep do not add duplicate nodes
176  /// in the worklist, this is different from the tail of the worklist.
177  SmallSetVector<SDNode *, 32> PruningList;
178 
179  /// Set of nodes which have been combined (at least once).
180  ///
181  /// This is used to allow us to reliably add any operands of a DAG node
182  /// which have not yet been combined to the worklist.
183  SmallPtrSet<SDNode *, 32> CombinedNodes;
184 
185  /// Map from candidate StoreNode to the pair of RootNode and count.
186  /// The count is used to track how many times we have seen the StoreNode
187  /// with the same RootNode bail out in dependence check. If we have seen
188  /// the bail out for the same pair many times over a limit, we won't
189  /// consider the StoreNode with the same RootNode as store merging
190  /// candidate again.
192 
193  // AA - Used for DAG load/store alias analysis.
194  AliasAnalysis *AA;
195 
196  /// When an instruction is simplified, add all users of the instruction to
197  /// the work lists because they might get more simplified now.
198  void AddUsersToWorklist(SDNode *N) {
199  for (SDNode *Node : N->uses())
200  AddToWorklist(Node);
201  }
202 
203  /// Convenient shorthand to add a node and all of its user to the worklist.
204  void AddToWorklistWithUsers(SDNode *N) {
205  AddUsersToWorklist(N);
206  AddToWorklist(N);
207  }
208 
209  // Prune potentially dangling nodes. This is called after
210  // any visit to a node, but should also be called during a visit after any
211  // failed combine which may have created a DAG node.
212  void clearAddedDanglingWorklistEntries() {
213  // Check any nodes added to the worklist to see if they are prunable.
214  while (!PruningList.empty()) {
215  auto *N = PruningList.pop_back_val();
216  if (N->use_empty())
217  recursivelyDeleteUnusedNodes(N);
218  }
219  }
220 
221  SDNode *getNextWorklistEntry() {
222  // Before we do any work, remove nodes that are not in use.
223  clearAddedDanglingWorklistEntries();
224  SDNode *N = nullptr;
225  // The Worklist holds the SDNodes in order, but it may contain null
226  // entries.
227  while (!N && !Worklist.empty()) {
228  N = Worklist.pop_back_val();
229  }
230 
231  if (N) {
232  bool GoodWorklistEntry = WorklistMap.erase(N);
233  (void)GoodWorklistEntry;
234  assert(GoodWorklistEntry &&
235  "Found a worklist entry without a corresponding map entry!");
236  }
237  return N;
238  }
239 
240  /// Call the node-specific routine that folds each particular type of node.
241  SDValue visit(SDNode *N);
242 
243  public:
244  DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
245  : DAG(D), TLI(D.getTargetLoweringInfo()),
246  STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
247  ForCodeSize = DAG.shouldOptForSize();
248  DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
249 
250  MaximumLegalStoreInBits = 0;
251  // We use the minimum store size here, since that's all we can guarantee
252  // for the scalable vector types.
253  for (MVT VT : MVT::all_valuetypes())
254  if (EVT(VT).isSimple() && VT != MVT::Other &&
255  TLI.isTypeLegal(EVT(VT)) &&
256  VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
257  MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
258  }
259 
260  void ConsiderForPruning(SDNode *N) {
261  // Mark this for potential pruning.
262  PruningList.insert(N);
263  }
264 
265  /// Add to the worklist making sure its instance is at the back (next to be
266  /// processed.)
267  void AddToWorklist(SDNode *N) {
268  assert(N->getOpcode() != ISD::DELETED_NODE &&
269  "Deleted Node added to Worklist");
270 
271  // Skip handle nodes as they can't usefully be combined and confuse the
272  // zero-use deletion strategy.
273  if (N->getOpcode() == ISD::HANDLENODE)
274  return;
275 
276  ConsiderForPruning(N);
277 
278  if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
279  Worklist.push_back(N);
280  }
281 
282  /// Remove all instances of N from the worklist.
283  void removeFromWorklist(SDNode *N) {
284  CombinedNodes.erase(N);
285  PruningList.remove(N);
286  StoreRootCountMap.erase(N);
287 
288  auto It = WorklistMap.find(N);
289  if (It == WorklistMap.end())
290  return; // Not in the worklist.
291 
292  // Null out the entry rather than erasing it to avoid a linear operation.
293  Worklist[It->second] = nullptr;
294  WorklistMap.erase(It);
295  }
296 
297  void deleteAndRecombine(SDNode *N);
298  bool recursivelyDeleteUnusedNodes(SDNode *N);
299 
300  /// Replaces all uses of the results of one DAG node with new values.
301  SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
302  bool AddTo = true);
303 
304  /// Replaces all uses of the results of one DAG node with new values.
305  SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
306  return CombineTo(N, &Res, 1, AddTo);
307  }
308 
309  /// Replaces all uses of the results of one DAG node with new values.
310  SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
311  bool AddTo = true) {
312  SDValue To[] = { Res0, Res1 };
313  return CombineTo(N, To, 2, AddTo);
314  }
315 
316  void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
317 
318  private:
319  unsigned MaximumLegalStoreInBits;
320 
321  /// Check the specified integer node value to see if it can be simplified or
322  /// if things it uses can be simplified by bit propagation.
323  /// If so, return true.
324  bool SimplifyDemandedBits(SDValue Op) {
325  unsigned BitWidth = Op.getScalarValueSizeInBits();
327  return SimplifyDemandedBits(Op, DemandedBits);
328  }
329 
330  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
331  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
332  KnownBits Known;
333  if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
334  return false;
335 
336  // Revisit the node.
337  AddToWorklist(Op.getNode());
338 
339  CommitTargetLoweringOpt(TLO);
340  return true;
341  }
342 
343  /// Check the specified vector node value to see if it can be simplified or
344  /// if things it uses can be simplified as it only uses some of the
345  /// elements. If so, return true.
346  bool SimplifyDemandedVectorElts(SDValue Op) {
347  // TODO: For now just pretend it cannot be simplified.
348  if (Op.getValueType().isScalableVector())
349  return false;
350 
351  unsigned NumElts = Op.getValueType().getVectorNumElements();
352  APInt DemandedElts = APInt::getAllOnes(NumElts);
353  return SimplifyDemandedVectorElts(Op, DemandedElts);
354  }
355 
356  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
357  const APInt &DemandedElts,
358  bool AssumeSingleUse = false);
359  bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
360  bool AssumeSingleUse = false);
361 
362  bool CombineToPreIndexedLoadStore(SDNode *N);
363  bool CombineToPostIndexedLoadStore(SDNode *N);
364  SDValue SplitIndexingFromLoad(LoadSDNode *LD);
365  bool SliceUpLoad(SDNode *N);
366 
367  // Scalars have size 0 to distinguish from singleton vectors.
368  SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
369  bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
370  bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
371 
372  /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
373  /// load.
374  ///
375  /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
376  /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
377  /// \param EltNo index of the vector element to load.
378  /// \param OriginalLoad load that EVE came from to be replaced.
379  /// \returns EVE on success SDValue() on failure.
380  SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
381  SDValue EltNo,
382  LoadSDNode *OriginalLoad);
383  void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
384  SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
385  SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
386  SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
387  SDValue PromoteIntBinOp(SDValue Op);
388  SDValue PromoteIntShiftOp(SDValue Op);
389  SDValue PromoteExtend(SDValue Op);
390  bool PromoteLoad(SDValue Op);
391 
392  /// Call the node-specific routine that knows how to fold each
393  /// particular type of node. If that doesn't do anything, try the
394  /// target-specific DAG combines.
396 
397  // Visitation implementation - Implement dag node combining for different
398  // node types. The semantics are as follows:
399  // Return Value:
400  // SDValue.getNode() == 0 - No change was made
401  // SDValue.getNode() == N - N was replaced, is dead and has been handled.
402  // otherwise - N should be replaced by the returned Operand.
403  //
404  SDValue visitTokenFactor(SDNode *N);
405  SDValue visitMERGE_VALUES(SDNode *N);
406  SDValue visitADD(SDNode *N);
407  SDValue visitADDLike(SDNode *N);
408  SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
409  SDValue visitSUB(SDNode *N);
410  SDValue visitADDSAT(SDNode *N);
411  SDValue visitSUBSAT(SDNode *N);
412  SDValue visitADDC(SDNode *N);
413  SDValue visitADDO(SDNode *N);
414  SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
415  SDValue visitSUBC(SDNode *N);
416  SDValue visitSUBO(SDNode *N);
417  SDValue visitADDE(SDNode *N);
418  SDValue visitADDCARRY(SDNode *N);
419  SDValue visitSADDO_CARRY(SDNode *N);
420  SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
421  SDValue visitSUBE(SDNode *N);
422  SDValue visitSUBCARRY(SDNode *N);
423  SDValue visitSSUBO_CARRY(SDNode *N);
424  SDValue visitMUL(SDNode *N);
425  SDValue visitMULFIX(SDNode *N);
426  SDValue useDivRem(SDNode *N);
427  SDValue visitSDIV(SDNode *N);
428  SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
429  SDValue visitUDIV(SDNode *N);
430  SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
431  SDValue visitREM(SDNode *N);
432  SDValue visitMULHU(SDNode *N);
433  SDValue visitMULHS(SDNode *N);
434  SDValue visitAVG(SDNode *N);
435  SDValue visitSMUL_LOHI(SDNode *N);
436  SDValue visitUMUL_LOHI(SDNode *N);
437  SDValue visitMULO(SDNode *N);
438  SDValue visitIMINMAX(SDNode *N);
439  SDValue visitAND(SDNode *N);
440  SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
441  SDValue visitOR(SDNode *N);
442  SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
443  SDValue visitXOR(SDNode *N);
444  SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
445  SDValue visitSHL(SDNode *N);
446  SDValue visitSRA(SDNode *N);
447  SDValue visitSRL(SDNode *N);
448  SDValue visitFunnelShift(SDNode *N);
449  SDValue visitSHLSAT(SDNode *N);
450  SDValue visitRotate(SDNode *N);
451  SDValue visitABS(SDNode *N);
452  SDValue visitBSWAP(SDNode *N);
453  SDValue visitBITREVERSE(SDNode *N);
454  SDValue visitCTLZ(SDNode *N);
455  SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
456  SDValue visitCTTZ(SDNode *N);
457  SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
458  SDValue visitCTPOP(SDNode *N);
459  SDValue visitSELECT(SDNode *N);
460  SDValue visitVSELECT(SDNode *N);
461  SDValue visitSELECT_CC(SDNode *N);
462  SDValue visitSETCC(SDNode *N);
463  SDValue visitSETCCCARRY(SDNode *N);
464  SDValue visitSIGN_EXTEND(SDNode *N);
465  SDValue visitZERO_EXTEND(SDNode *N);
466  SDValue visitANY_EXTEND(SDNode *N);
467  SDValue visitAssertExt(SDNode *N);
468  SDValue visitAssertAlign(SDNode *N);
469  SDValue visitSIGN_EXTEND_INREG(SDNode *N);
470  SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
471  SDValue visitTRUNCATE(SDNode *N);
472  SDValue visitBITCAST(SDNode *N);
473  SDValue visitFREEZE(SDNode *N);
474  SDValue visitBUILD_PAIR(SDNode *N);
475  SDValue visitFADD(SDNode *N);
476  SDValue visitSTRICT_FADD(SDNode *N);
477  SDValue visitFSUB(SDNode *N);
478  SDValue visitFMUL(SDNode *N);
479  SDValue visitFMA(SDNode *N);
480  SDValue visitFDIV(SDNode *N);
481  SDValue visitFREM(SDNode *N);
482  SDValue visitFSQRT(SDNode *N);
483  SDValue visitFCOPYSIGN(SDNode *N);
484  SDValue visitFPOW(SDNode *N);
485  SDValue visitSINT_TO_FP(SDNode *N);
486  SDValue visitUINT_TO_FP(SDNode *N);
487  SDValue visitFP_TO_SINT(SDNode *N);
488  SDValue visitFP_TO_UINT(SDNode *N);
489  SDValue visitFP_ROUND(SDNode *N);
490  SDValue visitFP_EXTEND(SDNode *N);
491  SDValue visitFNEG(SDNode *N);
492  SDValue visitFABS(SDNode *N);
493  SDValue visitFCEIL(SDNode *N);
494  SDValue visitFTRUNC(SDNode *N);
495  SDValue visitFFLOOR(SDNode *N);
496  SDValue visitFMinMax(SDNode *N);
497  SDValue visitBRCOND(SDNode *N);
498  SDValue visitBR_CC(SDNode *N);
499  SDValue visitLOAD(SDNode *N);
500 
501  SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
502  SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
503 
504  SDValue visitSTORE(SDNode *N);
505  SDValue visitLIFETIME_END(SDNode *N);
506  SDValue visitINSERT_VECTOR_ELT(SDNode *N);
507  SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
508  SDValue visitBUILD_VECTOR(SDNode *N);
509  SDValue visitCONCAT_VECTORS(SDNode *N);
510  SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
511  SDValue visitVECTOR_SHUFFLE(SDNode *N);
512  SDValue visitSCALAR_TO_VECTOR(SDNode *N);
513  SDValue visitINSERT_SUBVECTOR(SDNode *N);
514  SDValue visitMLOAD(SDNode *N);
515  SDValue visitMSTORE(SDNode *N);
516  SDValue visitMGATHER(SDNode *N);
517  SDValue visitMSCATTER(SDNode *N);
518  SDValue visitVPGATHER(SDNode *N);
519  SDValue visitVPSCATTER(SDNode *N);
520  SDValue visitFP_TO_FP16(SDNode *N);
521  SDValue visitFP16_TO_FP(SDNode *N);
522  SDValue visitFP_TO_BF16(SDNode *N);
523  SDValue visitVECREDUCE(SDNode *N);
524  SDValue visitVPOp(SDNode *N);
525 
526  SDValue visitFADDForFMACombine(SDNode *N);
527  SDValue visitFSUBForFMACombine(SDNode *N);
528  SDValue visitFMULForFMADistributiveCombine(SDNode *N);
529 
530  SDValue XformToShuffleWithZero(SDNode *N);
531  bool reassociationCanBreakAddressingModePattern(unsigned Opc,
532  const SDLoc &DL,
533  SDNode *N,
534  SDValue N0,
535  SDValue N1);
536  SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
537  SDValue N1);
538  SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
539  SDValue N1, SDNodeFlags Flags);
540 
541  SDValue visitShiftByConstant(SDNode *N);
542 
543  SDValue foldSelectOfConstants(SDNode *N);
544  SDValue foldVSelectOfConstants(SDNode *N);
545  SDValue foldBinOpIntoSelect(SDNode *BO);
546  bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
547  SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
548  SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
549  SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
551  bool NotExtCompare = false);
552  SDValue convertSelectOfFPConstantsToLoadOffset(
553  const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
554  ISD::CondCode CC);
555  SDValue foldSignChangeInBitcast(SDNode *N);
556  SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
557  SDValue N2, SDValue N3, ISD::CondCode CC);
558  SDValue foldSelectOfBinops(SDNode *N);
559  SDValue foldSextSetcc(SDNode *N);
560  SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
561  const SDLoc &DL);
562  SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
563  SDValue unfoldMaskedMerge(SDNode *N);
564  SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
565  SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
566  const SDLoc &DL, bool foldBooleans);
567  SDValue rebuildSetCC(SDValue N);
568 
569  bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
570  SDValue &CC, bool MatchStrict = false) const;
571  bool isOneUseSetCC(SDValue N) const;
572 
573  SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
574  unsigned HiOp);
575  SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
576  SDValue CombineExtLoad(SDNode *N);
577  SDValue CombineZExtLogicopShiftLoad(SDNode *N);
578  SDValue combineRepeatedFPDivisors(SDNode *N);
579  SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
580  SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
581  SDValue BuildSDIV(SDNode *N);
582  SDValue BuildSDIVPow2(SDNode *N);
583  SDValue BuildUDIV(SDNode *N);
584  SDValue BuildSREMPow2(SDNode *N);
585  SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
586  SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
587  SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
588  SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
589  SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
590  SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
591  SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
592  SDNodeFlags Flags, bool Reciprocal);
593  SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
594  SDNodeFlags Flags, bool Reciprocal);
595  SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
596  bool DemandHighBits = true);
597  SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
598  SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
599  SDValue InnerPos, SDValue InnerNeg, bool HasPos,
600  unsigned PosOpcode, unsigned NegOpcode,
601  const SDLoc &DL);
602  SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
603  SDValue InnerPos, SDValue InnerNeg, bool HasPos,
604  unsigned PosOpcode, unsigned NegOpcode,
605  const SDLoc &DL);
606  SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
607  SDValue MatchLoadCombine(SDNode *N);
608  SDValue mergeTruncStores(StoreSDNode *N);
609  SDValue reduceLoadWidth(SDNode *N);
610  SDValue ReduceLoadOpStoreWidth(SDNode *N);
612  SDValue TransformFPLoadStorePair(SDNode *N);
613  SDValue convertBuildVecZextToZext(SDNode *N);
614  SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
615  SDValue reduceBuildVecTruncToBitCast(SDNode *N);
616  SDValue reduceBuildVecToShuffle(SDNode *N);
617  SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
618  ArrayRef<int> VectorMask, SDValue VecIn1,
619  SDValue VecIn2, unsigned LeftIdx,
620  bool DidSplitVec);
621  SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
622 
623  /// Walk up chain skipping non-aliasing memory nodes,
624  /// looking for aliasing nodes and adding them to the Aliases vector.
625  void GatherAllAliases(SDNode *N, SDValue OriginalChain,
626  SmallVectorImpl<SDValue> &Aliases);
627 
628  /// Return true if there is any possibility that the two addresses overlap.
629  bool mayAlias(SDNode *Op0, SDNode *Op1) const;
630 
631  /// Walk up chain skipping non-aliasing memory nodes, looking for a better
632  /// chain (aliasing node.)
633  SDValue FindBetterChain(SDNode *N, SDValue Chain);
634 
635  /// Try to replace a store and any possibly adjacent stores on
636  /// consecutive chains with better chains. Return true only if St is
637  /// replaced.
638  ///
639  /// Notice that other chains may still be replaced even if the function
640  /// returns false.
641  bool findBetterNeighborChains(StoreSDNode *St);
642 
643  // Helper for findBetterNeighborChains. Walk up store chain add additional
644  // chained stores that do not overlap and can be parallelized.
645  bool parallelizeChainedStores(StoreSDNode *St);
646 
647  /// Holds a pointer to an LSBaseSDNode as well as information on where it
648  /// is located in a sequence of memory operations connected by a chain.
649  struct MemOpLink {
650  // Ptr to the mem node.
651  LSBaseSDNode *MemNode;
652 
653  // Offset from the base ptr.
654  int64_t OffsetFromBase;
655 
656  MemOpLink(LSBaseSDNode *N, int64_t Offset)
657  : MemNode(N), OffsetFromBase(Offset) {}
658  };
659 
660  // Classify the origin of a stored value.
661  enum class StoreSource { Unknown, Constant, Extract, Load };
662  StoreSource getStoreSource(SDValue StoreVal) {
663  switch (StoreVal.getOpcode()) {
664  case ISD::Constant:
665  case ISD::ConstantFP:
666  return StoreSource::Constant;
669  return StoreSource::Extract;
670  case ISD::LOAD:
671  return StoreSource::Load;
672  default:
673  return StoreSource::Unknown;
674  }
675  }
676 
677  /// This is a helper function for visitMUL to check the profitability
678  /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
679  /// MulNode is the original multiply, AddNode is (add x, c1),
680  /// and ConstNode is c2.
681  bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
682  SDValue ConstNode);
683 
684  /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
685  /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
686  /// the type of the loaded value to be extended.
687  bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
688  EVT LoadResultTy, EVT &ExtVT);
689 
690  /// Helper function to calculate whether the given Load/Store can have its
691  /// width reduced to ExtVT.
692  bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
693  EVT &MemVT, unsigned ShAmt = 0);
694 
695  /// Used by BackwardsPropagateMask to find suitable loads.
696  bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
697  SmallPtrSetImpl<SDNode*> &NodesWithConsts,
698  ConstantSDNode *Mask, SDNode *&NodeToMask);
699  /// Attempt to propagate a given AND node back to load leaves so that they
700  /// can be combined into narrow loads.
701  bool BackwardsPropagateMask(SDNode *N);
702 
703  /// Helper function for mergeConsecutiveStores which merges the component
704  /// store chains.
705  SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
706  unsigned NumStores);
707 
708  /// This is a helper function for mergeConsecutiveStores. When the source
709  /// elements of the consecutive stores are all constants or all extracted
710  /// vector elements, try to merge them into one larger store introducing
711  /// bitcasts if necessary. \return True if a merged store was created.
712  bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
713  EVT MemVT, unsigned NumStores,
714  bool IsConstantSrc, bool UseVector,
715  bool UseTrunc);
716 
717  /// This is a helper function for mergeConsecutiveStores. Stores that
718  /// potentially may be merged with St are placed in StoreNodes. RootNode is
719  /// a chain predecessor to all store candidates.
720  void getStoreMergeCandidates(StoreSDNode *St,
721  SmallVectorImpl<MemOpLink> &StoreNodes,
722  SDNode *&Root);
723 
724  /// Helper function for mergeConsecutiveStores. Checks if candidate stores
725  /// have indirect dependency through their operands. RootNode is the
726  /// predecessor to all stores calculated by getStoreMergeCandidates and is
727  /// used to prune the dependency check. \return True if safe to merge.
728  bool checkMergeStoreCandidatesForDependencies(
729  SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
730  SDNode *RootNode);
731 
732  /// This is a helper function for mergeConsecutiveStores. Given a list of
733  /// store candidates, find the first N that are consecutive in memory.
734  /// Returns 0 if there are not at least 2 consecutive stores to try merging.
735  unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
736  int64_t ElementSizeBytes) const;
737 
738  /// This is a helper function for mergeConsecutiveStores. It is used for
739  /// store chains that are composed entirely of constant values.
740  bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
741  unsigned NumConsecutiveStores,
742  EVT MemVT, SDNode *Root, bool AllowVectors);
743 
744  /// This is a helper function for mergeConsecutiveStores. It is used for
745  /// store chains that are composed entirely of extracted vector elements.
746  /// When extracting multiple vector elements, try to store them in one
747  /// vector store rather than a sequence of scalar stores.
748  bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
749  unsigned NumConsecutiveStores, EVT MemVT,
750  SDNode *Root);
751 
752  /// This is a helper function for mergeConsecutiveStores. It is used for
753  /// store chains that are composed entirely of loaded values.
754  bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
755  unsigned NumConsecutiveStores, EVT MemVT,
756  SDNode *Root, bool AllowVectors,
757  bool IsNonTemporalStore, bool IsNonTemporalLoad);
758 
759  /// Merge consecutive store operations into a wide store.
760  /// This optimization uses wide integers or vectors when possible.
761  /// \return true if stores were merged.
762  bool mergeConsecutiveStores(StoreSDNode *St);
763 
764  /// Try to transform a truncation where C is a constant:
765  /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
766  ///
767  /// \p N needs to be a truncation and its first operand an AND. Other
768  /// requirements are checked by the function (e.g. that trunc is
769  /// single-use) and if missed an empty SDValue is returned.
770  SDValue distributeTruncateThroughAnd(SDNode *N);
771 
772  /// Helper function to determine whether the target supports operation
773  /// given by \p Opcode for type \p VT, that is, whether the operation
774  /// is legal or custom before legalizing operations, and whether is
775  /// legal (but not custom) after legalization.
776  bool hasOperation(unsigned Opcode, EVT VT) {
777  return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
778  }
779 
780  public:
781  /// Runs the dag combiner on all nodes in the work list
782  void Run(CombineLevel AtLevel);
783 
784  SelectionDAG &getDAG() const { return DAG; }
785 
786  /// Returns a type large enough to hold any valid shift amount - before type
787  /// legalization these can be huge.
788  EVT getShiftAmountTy(EVT LHSTy) {
789  assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
790  return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
791  }
792 
793  /// This method returns true if we are running before type legalization or
794  /// if the specified VT is legal.
795  bool isTypeLegal(const EVT &VT) {
796  if (!LegalTypes) return true;
797  return TLI.isTypeLegal(VT);
798  }
799 
800  /// Convenience wrapper around TargetLowering::getSetCCResultType
801  EVT getSetCCResultType(EVT VT) const {
802  return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
803  }
804 
805  void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
806  SDValue OrigLoad, SDValue ExtLoad,
807  ISD::NodeType ExtType);
808  };
809 
810 /// This class is a DAGUpdateListener that removes any deleted
811 /// nodes from the worklist.
812 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
813  DAGCombiner &DC;
814 
815 public:
816  explicit WorklistRemover(DAGCombiner &dc)
817  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
818 
819  void NodeDeleted(SDNode *N, SDNode *E) override {
820  DC.removeFromWorklist(N);
821  }
822 };
823 
824 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
825  DAGCombiner &DC;
826 
827 public:
828  explicit WorklistInserter(DAGCombiner &dc)
829  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
830 
831  // FIXME: Ideally we could add N to the worklist, but this causes exponential
832  // compile time costs in large DAGs, e.g. Halide.
833  void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
834 };
835 
836 } // end anonymous namespace
837 
838 //===----------------------------------------------------------------------===//
839 // TargetLowering::DAGCombinerInfo implementation
840 //===----------------------------------------------------------------------===//
841 
843  ((DAGCombiner*)DC)->AddToWorklist(N);
844 }
845 
847 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
848  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
849 }
850 
852 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
853  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
854 }
855 
857 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
858  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
859 }
860 
863  return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
864 }
865 
868  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
869 }
870 
871 //===----------------------------------------------------------------------===//
872 // Helper Functions
873 //===----------------------------------------------------------------------===//
874 
875 void DAGCombiner::deleteAndRecombine(SDNode *N) {
876  removeFromWorklist(N);
877 
878  // If the operands of this node are only used by the node, they will now be
879  // dead. Make sure to re-visit them and recursively delete dead nodes.
880  for (const SDValue &Op : N->ops())
881  // For an operand generating multiple values, one of the values may
882  // become dead allowing further simplification (e.g. split index
883  // arithmetic from an indexed load).
884  if (Op->hasOneUse() || Op->getNumValues() > 1)
885  AddToWorklist(Op.getNode());
886 
887  DAG.DeleteNode(N);
888 }
889 
890 // APInts must be the same size for most operations, this helper
891 // function zero extends the shorter of the pair so that they match.
892 // We provide an Offset so that we can create bitwidths that won't overflow.
893 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
894  unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
895  LHS = LHS.zext(Bits);
896  RHS = RHS.zext(Bits);
897 }
898 
899 // Return true if this node is a setcc, or is a select_cc
900 // that selects between the target values used for true and false, making it
901 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
902 // the appropriate nodes based on the type of node we are checking. This
903 // simplifies life a bit for the callers.
904 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
905  SDValue &CC, bool MatchStrict) const {
906  if (N.getOpcode() == ISD::SETCC) {
907  LHS = N.getOperand(0);
908  RHS = N.getOperand(1);
909  CC = N.getOperand(2);
910  return true;
911  }
912 
913  if (MatchStrict &&
914  (N.getOpcode() == ISD::STRICT_FSETCC ||
915  N.getOpcode() == ISD::STRICT_FSETCCS)) {
916  LHS = N.getOperand(1);
917  RHS = N.getOperand(2);
918  CC = N.getOperand(3);
919  return true;
920  }
921 
922  if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
923  !TLI.isConstFalseVal(N.getOperand(3)))
924  return false;
925 
926  if (TLI.getBooleanContents(N.getValueType()) ==
928  return false;
929 
930  LHS = N.getOperand(0);
931  RHS = N.getOperand(1);
932  CC = N.getOperand(4);
933  return true;
934 }
935 
936 /// Return true if this is a SetCC-equivalent operation with only one use.
937 /// If this is true, it allows the users to invert the operation for free when
938 /// it is profitable to do so.
939 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
940  SDValue N0, N1, N2;
941  if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
942  return true;
943  return false;
944 }
945 
946 static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
947  if (!ScalarTy.isSimple())
948  return false;
949 
950  uint64_t MaskForTy = 0ULL;
951  switch (ScalarTy.getSimpleVT().SimpleTy) {
952  case MVT::i8:
953  MaskForTy = 0xFFULL;
954  break;
955  case MVT::i16:
956  MaskForTy = 0xFFFFULL;
957  break;
958  case MVT::i32:
959  MaskForTy = 0xFFFFFFFFULL;
960  break;
961  default:
962  return false;
963  break;
964  }
965 
966  APInt Val;
967  if (ISD::isConstantSplatVector(N, Val))
968  return Val.getLimitedValue() == MaskForTy;
969 
970  return false;
971 }
972 
973 // Determines if it is a constant integer or a splat/build vector of constant
974 // integers (and undefs).
975 // Do not permit build vector implicit truncation.
976 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
977  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
978  return !(Const->isOpaque() && NoOpaques);
979  if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
980  return false;
981  unsigned BitWidth = N.getScalarValueSizeInBits();
982  for (const SDValue &Op : N->op_values()) {
983  if (Op.isUndef())
984  continue;
985  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
986  if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
987  (Const->isOpaque() && NoOpaques))
988  return false;
989  }
990  return true;
991 }
992 
993 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
994 // undef's.
995 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
996  if (V.getOpcode() != ISD::BUILD_VECTOR)
997  return false;
998  return isConstantOrConstantVector(V, NoOpaques) ||
1000 }
1001 
1002 // Determine if this an indexed load with an opaque target constant index.
1003 static bool canSplitIdx(LoadSDNode *LD) {
1004  return MaySplitLoadIndex &&
1005  (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1006  !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1007 }
1008 
1009 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1010  const SDLoc &DL,
1011  SDNode *N,
1012  SDValue N0,
1013  SDValue N1) {
1014  // Currently this only tries to ensure we don't undo the GEP splits done by
1015  // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1016  // we check if the following transformation would be problematic:
1017  // (load/store (add, (add, x, offset1), offset2)) ->
1018  // (load/store (add, x, offset1+offset2)).
1019 
1020  // (load/store (add, (add, x, y), offset2)) ->
1021  // (load/store (add, (add, x, offset2), y)).
1022 
1023  if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1024  return false;
1025 
1026  auto *C2 = dyn_cast<ConstantSDNode>(N1);
1027  if (!C2)
1028  return false;
1029 
1030  const APInt &C2APIntVal = C2->getAPIntValue();
1031  if (C2APIntVal.getSignificantBits() > 64)
1032  return false;
1033 
1034  if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1035  if (N0.hasOneUse())
1036  return false;
1037 
1038  const APInt &C1APIntVal = C1->getAPIntValue();
1039  const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1040  if (CombinedValueIntVal.getSignificantBits() > 64)
1041  return false;
1042  const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1043 
1044  for (SDNode *Node : N->uses()) {
1045  if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1046  // Is x[offset2] already not a legal addressing mode? If so then
1047  // reassociating the constants breaks nothing (we test offset2 because
1048  // that's the one we hope to fold into the load or store).
1050  AM.HasBaseReg = true;
1051  AM.BaseOffs = C2APIntVal.getSExtValue();
1052  EVT VT = LoadStore->getMemoryVT();
1053  unsigned AS = LoadStore->getAddressSpace();
1054  Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1055  if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1056  continue;
1057 
1058  // Would x[offset1+offset2] still be a legal addressing mode?
1059  AM.BaseOffs = CombinedValue;
1060  if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1061  return true;
1062  }
1063  }
1064  } else {
1065  if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1066  if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1067  return false;
1068 
1069  for (SDNode *Node : N->uses()) {
1070  auto *LoadStore = dyn_cast<MemSDNode>(Node);
1071  if (!LoadStore)
1072  return false;
1073 
1074  // Is x[offset2] a legal addressing mode? If so then
1075  // reassociating the constants breaks address pattern
1077  AM.HasBaseReg = true;
1078  AM.BaseOffs = C2APIntVal.getSExtValue();
1079  EVT VT = LoadStore->getMemoryVT();
1080  unsigned AS = LoadStore->getAddressSpace();
1081  Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1082  if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1083  return false;
1084  }
1085  return true;
1086  }
1087 
1088  return false;
1089 }
1090 
1091 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1092 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1093 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1094  SDValue N0, SDValue N1) {
1095  EVT VT = N0.getValueType();
1096 
1097  if (N0.getOpcode() != Opc)
1098  return SDValue();
1099 
1100  SDValue N00 = N0.getOperand(0);
1101  SDValue N01 = N0.getOperand(1);
1102 
1105  // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1106  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
1107  return DAG.getNode(Opc, DL, VT, N00, OpNode);
1108  return SDValue();
1109  }
1110  if (TLI.isReassocProfitable(DAG, N0, N1)) {
1111  // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1112  // iff (op x, c1) has one use
1113  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1);
1114  return DAG.getNode(Opc, DL, VT, OpNode, N01);
1115  }
1116  }
1117 
1118  // Check for repeated operand logic simplifications.
1119  if (Opc == ISD::AND || Opc == ISD::OR) {
1120  // (N00 & N01) & N00 --> N00 & N01
1121  // (N00 & N01) & N01 --> N00 & N01
1122  // (N00 | N01) | N00 --> N00 | N01
1123  // (N00 | N01) | N01 --> N00 | N01
1124  if (N1 == N00 || N1 == N01)
1125  return N0;
1126  }
1127  if (Opc == ISD::XOR) {
1128  // (N00 ^ N01) ^ N00 --> N01
1129  if (N1 == N00)
1130  return N01;
1131  // (N00 ^ N01) ^ N01 --> N00
1132  if (N1 == N01)
1133  return N00;
1134  }
1135 
1136  if (TLI.isReassocProfitable(DAG, N0, N1)) {
1137  if (N1 != N01) {
1138  // Reassociate if (op N00, N1) already exist
1139  if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1140  // if Op (Op N00, N1), N01 already exist
1141  // we need to stop reassciate to avoid dead loop
1142  if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1143  return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1144  }
1145  }
1146 
1147  if (N1 != N00) {
1148  // Reassociate if (op N01, N1) already exist
1149  if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1150  // if Op (Op N01, N1), N00 already exist
1151  // we need to stop reassciate to avoid dead loop
1152  if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1153  return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1154  }
1155  }
1156  }
1157 
1158  return SDValue();
1159 }
1160 
1161 // Try to reassociate commutative binops.
1162 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1163  SDValue N1, SDNodeFlags Flags) {
1164  assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1165 
1166  // Floating-point reassociation is not allowed without loose FP math.
1167  if (N0.getValueType().isFloatingPoint() ||
1169  if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1170  return SDValue();
1171 
1172  if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1173  return Combined;
1174  if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1175  return Combined;
1176  return SDValue();
1177 }
1178 
1179 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1180  bool AddTo) {
1181  assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1182  ++NodesCombined;
1183  LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1184  To[0].dump(&DAG);
1185  dbgs() << " and " << NumTo - 1 << " other values\n");
1186  for (unsigned i = 0, e = NumTo; i != e; ++i)
1187  assert((!To[i].getNode() ||
1188  N->getValueType(i) == To[i].getValueType()) &&
1189  "Cannot combine value to value of different type!");
1190 
1191  WorklistRemover DeadNodes(*this);
1192  DAG.ReplaceAllUsesWith(N, To);
1193  if (AddTo) {
1194  // Push the new nodes and any users onto the worklist
1195  for (unsigned i = 0, e = NumTo; i != e; ++i) {
1196  if (To[i].getNode())
1197  AddToWorklistWithUsers(To[i].getNode());
1198  }
1199  }
1200 
1201  // Finally, if the node is now dead, remove it from the graph. The node
1202  // may not be dead if the replacement process recursively simplified to
1203  // something else needing this node.
1204  if (N->use_empty())
1205  deleteAndRecombine(N);
1206  return SDValue(N, 0);
1207 }
1208 
1209 void DAGCombiner::
1210 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1211  // Replace the old value with the new one.
1212  ++NodesCombined;
1213  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1214  dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1215 
1216  // Replace all uses.
1217  DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1218 
1219  // Push the new node and any (possibly new) users onto the worklist.
1220  AddToWorklistWithUsers(TLO.New.getNode());
1221 
1222  // Finally, if the node is now dead, remove it from the graph.
1223  recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1224 }
1225 
1226 /// Check the specified integer node value to see if it can be simplified or if
1227 /// things it uses can be simplified by bit propagation. If so, return true.
1228 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1229  const APInt &DemandedElts,
1230  bool AssumeSingleUse) {
1231  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1232  KnownBits Known;
1233  if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1234  AssumeSingleUse))
1235  return false;
1236 
1237  // Revisit the node.
1238  AddToWorklist(Op.getNode());
1239 
1240  CommitTargetLoweringOpt(TLO);
1241  return true;
1242 }
1243 
1244 /// Check the specified vector node value to see if it can be simplified or
1245 /// if things it uses can be simplified as it only uses some of the elements.
1246 /// If so, return true.
1247 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1248  const APInt &DemandedElts,
1249  bool AssumeSingleUse) {
1250  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1251  APInt KnownUndef, KnownZero;
1252  if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1253  TLO, 0, AssumeSingleUse))
1254  return false;
1255 
1256  // Revisit the node.
1257  AddToWorklist(Op.getNode());
1258 
1259  CommitTargetLoweringOpt(TLO);
1260  return true;
1261 }
1262 
1263 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1264  SDLoc DL(Load);
1265  EVT VT = Load->getValueType(0);
1266  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1267 
1268  LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1269  Trunc.dump(&DAG); dbgs() << '\n');
1270 
1271  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1272  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1273 
1274  AddToWorklist(Trunc.getNode());
1275  recursivelyDeleteUnusedNodes(Load);
1276 }
1277 
1278 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1279  Replace = false;
1280  SDLoc DL(Op);
1281  if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1282  LoadSDNode *LD = cast<LoadSDNode>(Op);
1283  EVT MemVT = LD->getMemoryVT();
1285  : LD->getExtensionType();
1286  Replace = true;
1287  return DAG.getExtLoad(ExtType, DL, PVT,
1288  LD->getChain(), LD->getBasePtr(),
1289  MemVT, LD->getMemOperand());
1290  }
1291 
1292  unsigned Opc = Op.getOpcode();
1293  switch (Opc) {
1294  default: break;
1295  case ISD::AssertSext:
1296  if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1297  return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1298  break;
1299  case ISD::AssertZext:
1300  if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1301  return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1302  break;
1303  case ISD::Constant: {
1304  unsigned ExtOpc =
1305  Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1306  return DAG.getNode(ExtOpc, DL, PVT, Op);
1307  }
1308  }
1309 
1310  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1311  return SDValue();
1312  return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1313 }
1314 
1315 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1317  return SDValue();
1318  EVT OldVT = Op.getValueType();
1319  SDLoc DL(Op);
1320  bool Replace = false;
1321  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1322  if (!NewOp.getNode())
1323  return SDValue();
1324  AddToWorklist(NewOp.getNode());
1325 
1326  if (Replace)
1327  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1328  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1329  DAG.getValueType(OldVT));
1330 }
1331 
1332 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1333  EVT OldVT = Op.getValueType();
1334  SDLoc DL(Op);
1335  bool Replace = false;
1336  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1337  if (!NewOp.getNode())
1338  return SDValue();
1339  AddToWorklist(NewOp.getNode());
1340 
1341  if (Replace)
1342  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1343  return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1344 }
1345 
1346 /// Promote the specified integer binary operation if the target indicates it is
1347 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1348 /// i32 since i16 instructions are longer.
1349 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1350  if (!LegalOperations)
1351  return SDValue();
1352 
1353  EVT VT = Op.getValueType();
1354  if (VT.isVector() || !VT.isInteger())
1355  return SDValue();
1356 
1357  // If operation type is 'undesirable', e.g. i16 on x86, consider
1358  // promoting it.
1359  unsigned Opc = Op.getOpcode();
1360  if (TLI.isTypeDesirableForOp(Opc, VT))
1361  return SDValue();
1362 
1363  EVT PVT = VT;
1364  // Consult target whether it is a good idea to promote this operation and
1365  // what's the right type to promote it to.
1366  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1367  assert(PVT != VT && "Don't know what type to promote to!");
1368 
1369  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1370 
1371  bool Replace0 = false;
1372  SDValue N0 = Op.getOperand(0);
1373  SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1374 
1375  bool Replace1 = false;
1376  SDValue N1 = Op.getOperand(1);
1377  SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1378  SDLoc DL(Op);
1379 
1380  SDValue RV =
1381  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1382 
1383  // We are always replacing N0/N1's use in N and only need additional
1384  // replacements if there are additional uses.
1385  // Note: We are checking uses of the *nodes* (SDNode) rather than values
1386  // (SDValue) here because the node may reference multiple values
1387  // (for example, the chain value of a load node).
1388  Replace0 &= !N0->hasOneUse();
1389  Replace1 &= (N0 != N1) && !N1->hasOneUse();
1390 
1391  // Combine Op here so it is preserved past replacements.
1392  CombineTo(Op.getNode(), RV);
1393 
1394  // If operands have a use ordering, make sure we deal with
1395  // predecessor first.
1396  if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1397  std::swap(N0, N1);
1398  std::swap(NN0, NN1);
1399  }
1400 
1401  if (Replace0) {
1402  AddToWorklist(NN0.getNode());
1403  ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1404  }
1405  if (Replace1) {
1406  AddToWorklist(NN1.getNode());
1407  ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1408  }
1409  return Op;
1410  }
1411  return SDValue();
1412 }
1413 
1414 /// Promote the specified integer shift operation if the target indicates it is
1415 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1416 /// i32 since i16 instructions are longer.
1417 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1418  if (!LegalOperations)
1419  return SDValue();
1420 
1421  EVT VT = Op.getValueType();
1422  if (VT.isVector() || !VT.isInteger())
1423  return SDValue();
1424 
1425  // If operation type is 'undesirable', e.g. i16 on x86, consider
1426  // promoting it.
1427  unsigned Opc = Op.getOpcode();
1428  if (TLI.isTypeDesirableForOp(Opc, VT))
1429  return SDValue();
1430 
1431  EVT PVT = VT;
1432  // Consult target whether it is a good idea to promote this operation and
1433  // what's the right type to promote it to.
1434  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1435  assert(PVT != VT && "Don't know what type to promote to!");
1436 
1437  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1438 
1439  bool Replace = false;
1440  SDValue N0 = Op.getOperand(0);
1441  if (Opc == ISD::SRA)
1442  N0 = SExtPromoteOperand(N0, PVT);
1443  else if (Opc == ISD::SRL)
1444  N0 = ZExtPromoteOperand(N0, PVT);
1445  else
1446  N0 = PromoteOperand(N0, PVT, Replace);
1447 
1448  if (!N0.getNode())
1449  return SDValue();
1450 
1451  SDLoc DL(Op);
1452  SDValue N1 = Op.getOperand(1);
1453  SDValue RV =
1454  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1455 
1456  if (Replace)
1457  ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1458 
1459  // Deal with Op being deleted.
1460  if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1461  return RV;
1462  }
1463  return SDValue();
1464 }
1465 
1466 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1467  if (!LegalOperations)
1468  return SDValue();
1469 
1470  EVT VT = Op.getValueType();
1471  if (VT.isVector() || !VT.isInteger())
1472  return SDValue();
1473 
1474  // If operation type is 'undesirable', e.g. i16 on x86, consider
1475  // promoting it.
1476  unsigned Opc = Op.getOpcode();
1477  if (TLI.isTypeDesirableForOp(Opc, VT))
1478  return SDValue();
1479 
1480  EVT PVT = VT;
1481  // Consult target whether it is a good idea to promote this operation and
1482  // what's the right type to promote it to.
1483  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1484  assert(PVT != VT && "Don't know what type to promote to!");
1485  // fold (aext (aext x)) -> (aext x)
1486  // fold (aext (zext x)) -> (zext x)
1487  // fold (aext (sext x)) -> (sext x)
1488  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1489  return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1490  }
1491  return SDValue();
1492 }
1493 
1494 bool DAGCombiner::PromoteLoad(SDValue Op) {
1495  if (!LegalOperations)
1496  return false;
1497 
1498  if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1499  return false;
1500 
1501  EVT VT = Op.getValueType();
1502  if (VT.isVector() || !VT.isInteger())
1503  return false;
1504 
1505  // If operation type is 'undesirable', e.g. i16 on x86, consider
1506  // promoting it.
1507  unsigned Opc = Op.getOpcode();
1508  if (TLI.isTypeDesirableForOp(Opc, VT))
1509  return false;
1510 
1511  EVT PVT = VT;
1512  // Consult target whether it is a good idea to promote this operation and
1513  // what's the right type to promote it to.
1514  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1515  assert(PVT != VT && "Don't know what type to promote to!");
1516 
1517  SDLoc DL(Op);
1518  SDNode *N = Op.getNode();
1519  LoadSDNode *LD = cast<LoadSDNode>(N);
1520  EVT MemVT = LD->getMemoryVT();
1522  : LD->getExtensionType();
1523  SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1524  LD->getChain(), LD->getBasePtr(),
1525  MemVT, LD->getMemOperand());
1526  SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1527 
1528  LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1529  Result.dump(&DAG); dbgs() << '\n');
1530 
1532  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1533 
1534  AddToWorklist(Result.getNode());
1535  recursivelyDeleteUnusedNodes(N);
1536  return true;
1537  }
1538 
1539  return false;
1540 }
1541 
1542 /// Recursively delete a node which has no uses and any operands for
1543 /// which it is the only use.
1544 ///
1545 /// Note that this both deletes the nodes and removes them from the worklist.
1546 /// It also adds any nodes who have had a user deleted to the worklist as they
1547 /// may now have only one use and subject to other combines.
1548 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1549  if (!N->use_empty())
1550  return false;
1551 
1553  Nodes.insert(N);
1554  do {
1555  N = Nodes.pop_back_val();
1556  if (!N)
1557  continue;
1558 
1559  if (N->use_empty()) {
1560  for (const SDValue &ChildN : N->op_values())
1561  Nodes.insert(ChildN.getNode());
1562 
1563  removeFromWorklist(N);
1564  DAG.DeleteNode(N);
1565  } else {
1566  AddToWorklist(N);
1567  }
1568  } while (!Nodes.empty());
1569  return true;
1570 }
1571 
1572 //===----------------------------------------------------------------------===//
1573 // Main DAG Combiner implementation
1574 //===----------------------------------------------------------------------===//
1575 
1576 void DAGCombiner::Run(CombineLevel AtLevel) {
1577  // set the instance variables, so that the various visit routines may use it.
1578  Level = AtLevel;
1579  LegalDAG = Level >= AfterLegalizeDAG;
1580  LegalOperations = Level >= AfterLegalizeVectorOps;
1581  LegalTypes = Level >= AfterLegalizeTypes;
1582 
1583  WorklistInserter AddNodes(*this);
1584 
1585  // Add all the dag nodes to the worklist.
1586  for (SDNode &Node : DAG.allnodes())
1587  AddToWorklist(&Node);
1588 
1589  // Create a dummy node (which is not added to allnodes), that adds a reference
1590  // to the root node, preventing it from being deleted, and tracking any
1591  // changes of the root.
1592  HandleSDNode Dummy(DAG.getRoot());
1593 
1594  // While we have a valid worklist entry node, try to combine it.
1595  while (SDNode *N = getNextWorklistEntry()) {
1596  // If N has no uses, it is dead. Make sure to revisit all N's operands once
1597  // N is deleted from the DAG, since they too may now be dead or may have a
1598  // reduced number of uses, allowing other xforms.
1599  if (recursivelyDeleteUnusedNodes(N))
1600  continue;
1601 
1602  WorklistRemover DeadNodes(*this);
1603 
1604  // If this combine is running after legalizing the DAG, re-legalize any
1605  // nodes pulled off the worklist.
1606  if (LegalDAG) {
1607  SmallSetVector<SDNode *, 16> UpdatedNodes;
1608  bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1609 
1610  for (SDNode *LN : UpdatedNodes)
1611  AddToWorklistWithUsers(LN);
1612 
1613  if (!NIsValid)
1614  continue;
1615  }
1616 
1617  LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1618 
1619  // Add any operands of the new node which have not yet been combined to the
1620  // worklist as well. Because the worklist uniques things already, this
1621  // won't repeatedly process the same operand.
1622  CombinedNodes.insert(N);
1623  for (const SDValue &ChildN : N->op_values())
1624  if (!CombinedNodes.count(ChildN.getNode()))
1625  AddToWorklist(ChildN.getNode());
1626 
1627  SDValue RV = combine(N);
1628 
1629  if (!RV.getNode())
1630  continue;
1631 
1632  ++NodesCombined;
1633 
1634  // If we get back the same node we passed in, rather than a new node or
1635  // zero, we know that the node must have defined multiple values and
1636  // CombineTo was used. Since CombineTo takes care of the worklist
1637  // mechanics for us, we have no work to do in this case.
1638  if (RV.getNode() == N)
1639  continue;
1640 
1641  assert(N->getOpcode() != ISD::DELETED_NODE &&
1642  RV.getOpcode() != ISD::DELETED_NODE &&
1643  "Node was deleted but visit returned new node!");
1644 
1645  LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1646 
1647  if (N->getNumValues() == RV->getNumValues())
1648  DAG.ReplaceAllUsesWith(N, RV.getNode());
1649  else {
1650  assert(N->getValueType(0) == RV.getValueType() &&
1651  N->getNumValues() == 1 && "Type mismatch");
1652  DAG.ReplaceAllUsesWith(N, &RV);
1653  }
1654 
1655  // Push the new node and any users onto the worklist. Omit this if the
1656  // new node is the EntryToken (e.g. if a store managed to get optimized
1657  // out), because re-visiting the EntryToken and its users will not uncover
1658  // any additional opportunities, but there may be a large number of such
1659  // users, potentially causing compile time explosion.
1660  if (RV.getOpcode() != ISD::EntryToken) {
1661  AddToWorklist(RV.getNode());
1662  AddUsersToWorklist(RV.getNode());
1663  }
1664 
1665  // Finally, if the node is now dead, remove it from the graph. The node
1666  // may not be dead if the replacement process recursively simplified to
1667  // something else needing this node. This will also take care of adding any
1668  // operands which have lost a user to the worklist.
1669  recursivelyDeleteUnusedNodes(N);
1670  }
1671 
1672  // If the root changed (e.g. it was a dead load, update the root).
1673  DAG.setRoot(Dummy.getValue());
1674  DAG.RemoveDeadNodes();
1675 }
1676 
1677 SDValue DAGCombiner::visit(SDNode *N) {
1678  switch (N->getOpcode()) {
1679  default: break;
1680  case ISD::TokenFactor: return visitTokenFactor(N);
1681  case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1682  case ISD::ADD: return visitADD(N);
1683  case ISD::SUB: return visitSUB(N);
1684  case ISD::SADDSAT:
1685  case ISD::UADDSAT: return visitADDSAT(N);
1686  case ISD::SSUBSAT:
1687  case ISD::USUBSAT: return visitSUBSAT(N);
1688  case ISD::ADDC: return visitADDC(N);
1689  case ISD::SADDO:
1690  case ISD::UADDO: return visitADDO(N);
1691  case ISD::SUBC: return visitSUBC(N);
1692  case ISD::SSUBO:
1693  case ISD::USUBO: return visitSUBO(N);
1694  case ISD::ADDE: return visitADDE(N);
1695  case ISD::ADDCARRY: return visitADDCARRY(N);
1696  case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1697  case ISD::SUBE: return visitSUBE(N);
1698  case ISD::SUBCARRY: return visitSUBCARRY(N);
1699  case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1700  case ISD::SMULFIX:
1701  case ISD::SMULFIXSAT:
1702  case ISD::UMULFIX:
1703  case ISD::UMULFIXSAT: return visitMULFIX(N);
1704  case ISD::MUL: return visitMUL(N);
1705  case ISD::SDIV: return visitSDIV(N);
1706  case ISD::UDIV: return visitUDIV(N);
1707  case ISD::SREM:
1708  case ISD::UREM: return visitREM(N);
1709  case ISD::MULHU: return visitMULHU(N);
1710  case ISD::MULHS: return visitMULHS(N);
1711  case ISD::AVGFLOORS:
1712  case ISD::AVGFLOORU:
1713  case ISD::AVGCEILS:
1714  case ISD::AVGCEILU: return visitAVG(N);
1715  case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1716  case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1717  case ISD::SMULO:
1718  case ISD::UMULO: return visitMULO(N);
1719  case ISD::SMIN:
1720  case ISD::SMAX:
1721  case ISD::UMIN:
1722  case ISD::UMAX: return visitIMINMAX(N);
1723  case ISD::AND: return visitAND(N);
1724  case ISD::OR: return visitOR(N);
1725  case ISD::XOR: return visitXOR(N);
1726  case ISD::SHL: return visitSHL(N);
1727  case ISD::SRA: return visitSRA(N);
1728  case ISD::SRL: return visitSRL(N);
1729  case ISD::ROTR:
1730  case ISD::ROTL: return visitRotate(N);
1731  case ISD::FSHL:
1732  case ISD::FSHR: return visitFunnelShift(N);
1733  case ISD::SSHLSAT:
1734  case ISD::USHLSAT: return visitSHLSAT(N);
1735  case ISD::ABS: return visitABS(N);
1736  case ISD::BSWAP: return visitBSWAP(N);
1737  case ISD::BITREVERSE: return visitBITREVERSE(N);
1738  case ISD::CTLZ: return visitCTLZ(N);
1739  case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1740  case ISD::CTTZ: return visitCTTZ(N);
1741  case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1742  case ISD::CTPOP: return visitCTPOP(N);
1743  case ISD::SELECT: return visitSELECT(N);
1744  case ISD::VSELECT: return visitVSELECT(N);
1745  case ISD::SELECT_CC: return visitSELECT_CC(N);
1746  case ISD::SETCC: return visitSETCC(N);
1747  case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1748  case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1749  case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1750  case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1751  case ISD::AssertSext:
1752  case ISD::AssertZext: return visitAssertExt(N);
1753  case ISD::AssertAlign: return visitAssertAlign(N);
1754  case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1756  case ISD::ZERO_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1757  case ISD::TRUNCATE: return visitTRUNCATE(N);
1758  case ISD::BITCAST: return visitBITCAST(N);
1759  case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1760  case ISD::FADD: return visitFADD(N);
1761  case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1762  case ISD::FSUB: return visitFSUB(N);
1763  case ISD::FMUL: return visitFMUL(N);
1764  case ISD::FMA: return visitFMA(N);
1765  case ISD::FDIV: return visitFDIV(N);
1766  case ISD::FREM: return visitFREM(N);
1767  case ISD::FSQRT: return visitFSQRT(N);
1768  case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1769  case ISD::FPOW: return visitFPOW(N);
1770  case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1771  case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1772  case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1773  case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1774  case ISD::FP_ROUND: return visitFP_ROUND(N);
1775  case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1776  case ISD::FNEG: return visitFNEG(N);
1777  case ISD::FABS: return visitFABS(N);
1778  case ISD::FFLOOR: return visitFFLOOR(N);
1779  case ISD::FMINNUM:
1780  case ISD::FMAXNUM:
1781  case ISD::FMINIMUM:
1782  case ISD::FMAXIMUM: return visitFMinMax(N);
1783  case ISD::FCEIL: return visitFCEIL(N);
1784  case ISD::FTRUNC: return visitFTRUNC(N);
1785  case ISD::BRCOND: return visitBRCOND(N);
1786  case ISD::BR_CC: return visitBR_CC(N);
1787  case ISD::LOAD: return visitLOAD(N);
1788  case ISD::STORE: return visitSTORE(N);
1789  case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1790  case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1791  case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1792  case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1793  case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1794  case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1795  case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1796  case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1797  case ISD::MGATHER: return visitMGATHER(N);
1798  case ISD::MLOAD: return visitMLOAD(N);
1799  case ISD::MSCATTER: return visitMSCATTER(N);
1800  case ISD::MSTORE: return visitMSTORE(N);
1801  case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1802  case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1803  case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1804  case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
1805  case ISD::FREEZE: return visitFREEZE(N);
1806  case ISD::VECREDUCE_FADD:
1807  case ISD::VECREDUCE_FMUL:
1808  case ISD::VECREDUCE_ADD:
1809  case ISD::VECREDUCE_MUL:
1810  case ISD::VECREDUCE_AND:
1811  case ISD::VECREDUCE_OR:
1812  case ISD::VECREDUCE_XOR:
1813  case ISD::VECREDUCE_SMAX:
1814  case ISD::VECREDUCE_SMIN:
1815  case ISD::VECREDUCE_UMAX:
1816  case ISD::VECREDUCE_UMIN:
1817  case ISD::VECREDUCE_FMAX:
1818  case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
1819 #define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
1820 #include "llvm/IR/VPIntrinsics.def"
1821  return visitVPOp(N);
1822  }
1823  return SDValue();
1824 }
1825 
1827  SDValue RV;
1828  if (!DisableGenericCombines)
1829  RV = visit(N);
1830 
1831  // If nothing happened, try a target-specific DAG combine.
1832  if (!RV.getNode()) {
1833  assert(N->getOpcode() != ISD::DELETED_NODE &&
1834  "Node was deleted but visit returned NULL!");
1835 
1836  if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1837  TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1838 
1839  // Expose the DAG combiner to the target combiner impls.
1841  DagCombineInfo(DAG, Level, false, this);
1842 
1843  RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1844  }
1845  }
1846 
1847  // If nothing happened still, try promoting the operation.
1848  if (!RV.getNode()) {
1849  switch (N->getOpcode()) {
1850  default: break;
1851  case ISD::ADD:
1852  case ISD::SUB:
1853  case ISD::MUL:
1854  case ISD::AND:
1855  case ISD::OR:
1856  case ISD::XOR:
1857  RV = PromoteIntBinOp(SDValue(N, 0));
1858  break;
1859  case ISD::SHL:
1860  case ISD::SRA:
1861  case ISD::SRL:
1862  RV = PromoteIntShiftOp(SDValue(N, 0));
1863  break;
1864  case ISD::SIGN_EXTEND:
1865  case ISD::ZERO_EXTEND:
1866  case ISD::ANY_EXTEND:
1867  RV = PromoteExtend(SDValue(N, 0));
1868  break;
1869  case ISD::LOAD:
1870  if (PromoteLoad(SDValue(N, 0)))
1871  RV = SDValue(N, 0);
1872  break;
1873  }
1874  }
1875 
1876  // If N is a commutative binary node, try to eliminate it if the commuted
1877  // version is already present in the DAG.
1878  if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
1879  SDValue N0 = N->getOperand(0);
1880  SDValue N1 = N->getOperand(1);
1881 
1882  // Constant operands are canonicalized to RHS.
1883  if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1884  SDValue Ops[] = {N1, N0};
1885  SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1886  N->getFlags());
1887  if (CSENode)
1888  return SDValue(CSENode, 0);
1889  }
1890  }
1891 
1892  return RV;
1893 }
1894 
1895 /// Given a node, return its input chain if it has one, otherwise return a null
1896 /// sd operand.
1898  if (unsigned NumOps = N->getNumOperands()) {
1899  if (N->getOperand(0).getValueType() == MVT::Other)
1900  return N->getOperand(0);
1901  if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1902  return N->getOperand(NumOps-1);
1903  for (unsigned i = 1; i < NumOps-1; ++i)
1904  if (N->getOperand(i).getValueType() == MVT::Other)
1905  return N->getOperand(i);
1906  }
1907  return SDValue();
1908 }
1909 
1910 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1911  // If N has two operands, where one has an input chain equal to the other,
1912  // the 'other' chain is redundant.
1913  if (N->getNumOperands() == 2) {
1914  if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1915  return N->getOperand(0);
1916  if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1917  return N->getOperand(1);
1918  }
1919 
1920  // Don't simplify token factors if optnone.
1921  if (OptLevel == CodeGenOpt::None)
1922  return SDValue();
1923 
1924  // Don't simplify the token factor if the node itself has too many operands.
1925  if (N->getNumOperands() > TokenFactorInlineLimit)
1926  return SDValue();
1927 
1928  // If the sole user is a token factor, we should make sure we have a
1929  // chance to merge them together. This prevents TF chains from inhibiting
1930  // optimizations.
1931  if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1932  AddToWorklist(*(N->use_begin()));
1933 
1934  SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1935  SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1936  SmallPtrSet<SDNode*, 16> SeenOps;
1937  bool Changed = false; // If we should replace this token factor.
1938 
1939  // Start out with this token factor.
1940  TFs.push_back(N);
1941 
1942  // Iterate through token factors. The TFs grows when new token factors are
1943  // encountered.
1944  for (unsigned i = 0; i < TFs.size(); ++i) {
1945  // Limit number of nodes to inline, to avoid quadratic compile times.
1946  // We have to add the outstanding Token Factors to Ops, otherwise we might
1947  // drop Ops from the resulting Token Factors.
1948  if (Ops.size() > TokenFactorInlineLimit) {
1949  for (unsigned j = i; j < TFs.size(); j++)
1950  Ops.emplace_back(TFs[j], 0);
1951  // Drop unprocessed Token Factors from TFs, so we do not add them to the
1952  // combiner worklist later.
1953  TFs.resize(i);
1954  break;
1955  }
1956 
1957  SDNode *TF = TFs[i];
1958  // Check each of the operands.
1959  for (const SDValue &Op : TF->op_values()) {
1960  switch (Op.getOpcode()) {
1961  case ISD::EntryToken:
1962  // Entry tokens don't need to be added to the list. They are
1963  // redundant.
1964  Changed = true;
1965  break;
1966 
1967  case ISD::TokenFactor:
1968  if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1969  // Queue up for processing.
1970  TFs.push_back(Op.getNode());
1971  Changed = true;
1972  break;
1973  }
1974  [[fallthrough]];
1975 
1976  default:
1977  // Only add if it isn't already in the list.
1978  if (SeenOps.insert(Op.getNode()).second)
1979  Ops.push_back(Op);
1980  else
1981  Changed = true;
1982  break;
1983  }
1984  }
1985  }
1986 
1987  // Re-visit inlined Token Factors, to clean them up in case they have been
1988  // removed. Skip the first Token Factor, as this is the current node.
1989  for (unsigned i = 1, e = TFs.size(); i < e; i++)
1990  AddToWorklist(TFs[i]);
1991 
1992  // Remove Nodes that are chained to another node in the list. Do so
1993  // by walking up chains breath-first stopping when we've seen
1994  // another operand. In general we must climb to the EntryNode, but we can exit
1995  // early if we find all remaining work is associated with just one operand as
1996  // no further pruning is possible.
1997 
1998  // List of nodes to search through and original Ops from which they originate.
2000  SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2001  SmallPtrSet<SDNode *, 16> SeenChains;
2002  bool DidPruneOps = false;
2003 
2004  unsigned NumLeftToConsider = 0;
2005  for (const SDValue &Op : Ops) {
2006  Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2007  OpWorkCount.push_back(1);
2008  }
2009 
2010  auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2011  // If this is an Op, we can remove the op from the list. Remark any
2012  // search associated with it as from the current OpNumber.
2013  if (SeenOps.contains(Op)) {
2014  Changed = true;
2015  DidPruneOps = true;
2016  unsigned OrigOpNumber = 0;
2017  while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2018  OrigOpNumber++;
2019  assert((OrigOpNumber != Ops.size()) &&
2020  "expected to find TokenFactor Operand");
2021  // Re-mark worklist from OrigOpNumber to OpNumber
2022  for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2023  if (Worklist[i].second == OrigOpNumber) {
2024  Worklist[i].second = OpNumber;
2025  }
2026  }
2027  OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2028  OpWorkCount[OrigOpNumber] = 0;
2029  NumLeftToConsider--;
2030  }
2031  // Add if it's a new chain
2032  if (SeenChains.insert(Op).second) {
2033  OpWorkCount[OpNumber]++;
2034  Worklist.push_back(std::make_pair(Op, OpNumber));
2035  }
2036  };
2037 
2038  for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2039  // We need at least be consider at least 2 Ops to prune.
2040  if (NumLeftToConsider <= 1)
2041  break;
2042  auto CurNode = Worklist[i].first;
2043  auto CurOpNumber = Worklist[i].second;
2044  assert((OpWorkCount[CurOpNumber] > 0) &&
2045  "Node should not appear in worklist");
2046  switch (CurNode->getOpcode()) {
2047  case ISD::EntryToken:
2048  // Hitting EntryToken is the only way for the search to terminate without
2049  // hitting
2050  // another operand's search. Prevent us from marking this operand
2051  // considered.
2052  NumLeftToConsider++;
2053  break;
2054  case ISD::TokenFactor:
2055  for (const SDValue &Op : CurNode->op_values())
2056  AddToWorklist(i, Op.getNode(), CurOpNumber);
2057  break;
2058  case ISD::LIFETIME_START:
2059  case ISD::LIFETIME_END:
2060  case ISD::CopyFromReg:
2061  case ISD::CopyToReg:
2062  AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2063  break;
2064  default:
2065  if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2066  AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2067  break;
2068  }
2069  OpWorkCount[CurOpNumber]--;
2070  if (OpWorkCount[CurOpNumber] == 0)
2071  NumLeftToConsider--;
2072  }
2073 
2074  // If we've changed things around then replace token factor.
2075  if (Changed) {
2076  SDValue Result;
2077  if (Ops.empty()) {
2078  // The entry token is the only possible outcome.
2079  Result = DAG.getEntryNode();
2080  } else {
2081  if (DidPruneOps) {
2082  SmallVector<SDValue, 8> PrunedOps;
2083  //
2084  for (const SDValue &Op : Ops) {
2085  if (SeenChains.count(Op.getNode()) == 0)
2086  PrunedOps.push_back(Op);
2087  }
2088  Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2089  } else {
2090  Result = DAG.getTokenFactor(SDLoc(N), Ops);
2091  }
2092  }
2093  return Result;
2094  }
2095  return SDValue();
2096 }
2097 
2098 /// MERGE_VALUES can always be eliminated.
2099 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2100  WorklistRemover DeadNodes(*this);
2101  // Replacing results may cause a different MERGE_VALUES to suddenly
2102  // be CSE'd with N, and carry its uses with it. Iterate until no
2103  // uses remain, to ensure that the node can be safely deleted.
2104  // First add the users of this node to the work list so that they
2105  // can be tried again once they have new operands.
2106  AddUsersToWorklist(N);
2107  do {
2108  // Do as a single replacement to avoid rewalking use lists.
2110  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2111  Ops.push_back(N->getOperand(i));
2112  DAG.ReplaceAllUsesWith(N, Ops.data());
2113  } while (!N->use_empty());
2114  deleteAndRecombine(N);
2115  return SDValue(N, 0); // Return N so it doesn't get rechecked!
2116 }
2117 
2118 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2119 /// ConstantSDNode pointer else nullptr.
2121  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2122  return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2123 }
2124 
2125 /// Return true if 'Use' is a load or a store that uses N as its base pointer
2126 /// and that N may be folded in the load / store addressing mode.
2128  const TargetLowering &TLI) {
2129  EVT VT;
2130  unsigned AS;
2131 
2132  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2133  if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2134  return false;
2135  VT = LD->getMemoryVT();
2136  AS = LD->getAddressSpace();
2137  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2138  if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2139  return false;
2140  VT = ST->getMemoryVT();
2141  AS = ST->getAddressSpace();
2142  } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2143  if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2144  return false;
2145  VT = LD->getMemoryVT();
2146  AS = LD->getAddressSpace();
2147  } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2148  if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2149  return false;
2150  VT = ST->getMemoryVT();
2151  AS = ST->getAddressSpace();
2152  } else {
2153  return false;
2154  }
2155 
2157  if (N->getOpcode() == ISD::ADD) {
2158  AM.HasBaseReg = true;
2159  ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2160  if (Offset)
2161  // [reg +/- imm]
2162  AM.BaseOffs = Offset->getSExtValue();
2163  else
2164  // [reg +/- reg]
2165  AM.Scale = 1;
2166  } else if (N->getOpcode() == ISD::SUB) {
2167  AM.HasBaseReg = true;
2168  ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2169  if (Offset)
2170  // [reg +/- imm]
2171  AM.BaseOffs = -Offset->getSExtValue();
2172  else
2173  // [reg +/- reg]
2174  AM.Scale = 1;
2175  } else {
2176  return false;
2177  }
2178 
2179  return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2180  VT.getTypeForEVT(*DAG.getContext()), AS);
2181 }
2182 
2183 /// This inverts a canonicalization in IR that replaces a variable select arm
2184 /// with an identity constant. Codegen improves if we re-use the variable
2185 /// operand rather than load a constant. This can also be converted into a
2186 /// masked vector operation if the target supports it.
2188  bool ShouldCommuteOperands) {
2189  // Match a select as operand 1. The identity constant that we are looking for
2190  // is only valid as operand 1 of a non-commutative binop.
2191  SDValue N0 = N->getOperand(0);
2192  SDValue N1 = N->getOperand(1);
2193  if (ShouldCommuteOperands)
2194  std::swap(N0, N1);
2195 
2196  // TODO: Should this apply to scalar select too?
2197  if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse())
2198  return SDValue();
2199 
2200  // We can't hoist div/rem because of immediate UB (not speculatable).
2201  unsigned Opcode = N->getOpcode();
2202  if (!DAG.isSafeToSpeculativelyExecute(Opcode))
2203  return SDValue();
2204 
2205  EVT VT = N->getValueType(0);
2206  SDValue Cond = N1.getOperand(0);
2207  SDValue TVal = N1.getOperand(1);
2208  SDValue FVal = N1.getOperand(2);
2209 
2210  // This transform increases uses of N0, so freeze it to be safe.
2211  // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2212  unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2213  if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo)) {
2214  SDValue F0 = DAG.getFreeze(N0);
2215  SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2216  return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2217  }
2218  // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2219  if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo)) {
2220  SDValue F0 = DAG.getFreeze(N0);
2221  SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2222  return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2223  }
2224 
2225  return SDValue();
2226 }
2227 
2228 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2229  assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2230  "Unexpected binary operator");
2231 
2232  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2233  auto BinOpcode = BO->getOpcode();
2234  EVT VT = BO->getValueType(0);
2235  if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
2236  if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2237  return Sel;
2238 
2239  if (TLI.isCommutativeBinOp(BO->getOpcode()))
2240  if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2241  return Sel;
2242  }
2243 
2244  // Don't do this unless the old select is going away. We want to eliminate the
2245  // binary operator, not replace a binop with a select.
2246  // TODO: Handle ISD::SELECT_CC.
2247  unsigned SelOpNo = 0;
2248  SDValue Sel = BO->getOperand(0);
2249  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2250  SelOpNo = 1;
2251  Sel = BO->getOperand(1);
2252  }
2253 
2254  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2255  return SDValue();
2256 
2257  SDValue CT = Sel.getOperand(1);
2258  if (!isConstantOrConstantVector(CT, true) &&
2260  return SDValue();
2261 
2262  SDValue CF = Sel.getOperand(2);
2263  if (!isConstantOrConstantVector(CF, true) &&
2265  return SDValue();
2266 
2267  // Bail out if any constants are opaque because we can't constant fold those.
2268  // The exception is "and" and "or" with either 0 or -1 in which case we can
2269  // propagate non constant operands into select. I.e.:
2270  // and (select Cond, 0, -1), X --> select Cond, 0, X
2271  // or X, (select Cond, -1, 0) --> select Cond, -1, X
2272  bool CanFoldNonConst =
2273  (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2276 
2277  SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2278  if (!CanFoldNonConst &&
2279  !isConstantOrConstantVector(CBO, true) &&
2281  return SDValue();
2282 
2283  SDLoc DL(Sel);
2284  SDValue NewCT, NewCF;
2285 
2286  if (CanFoldNonConst) {
2287  // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2288  if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2289  (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2290  NewCT = CT;
2291  else
2292  NewCT = CBO;
2293 
2294  if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2295  (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2296  NewCF = CF;
2297  else
2298  NewCF = CBO;
2299  } else {
2300  // We have a select-of-constants followed by a binary operator with a
2301  // constant. Eliminate the binop by pulling the constant math into the
2302  // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2303  // CBO, CF + CBO
2304  NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2305  : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2306  if (!CanFoldNonConst && !NewCT.isUndef() &&
2307  !isConstantOrConstantVector(NewCT, true) &&
2309  return SDValue();
2310 
2311  NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2312  : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2313  if (!CanFoldNonConst && !NewCF.isUndef() &&
2314  !isConstantOrConstantVector(NewCF, true) &&
2316  return SDValue();
2317  }
2318 
2319  SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2320  SelectOp->setFlags(BO->getFlags());
2321  return SelectOp;
2322 }
2323 
2325  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2326  "Expecting add or sub");
2327 
2328  // Match a constant operand and a zext operand for the math instruction:
2329  // add Z, C
2330  // sub C, Z
2331  bool IsAdd = N->getOpcode() == ISD::ADD;
2332  SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2333  SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2334  auto *CN = dyn_cast<ConstantSDNode>(C);
2335  if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2336  return SDValue();
2337 
2338  // Match the zext operand as a setcc of a boolean.
2339  if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2340  Z.getOperand(0).getValueType() != MVT::i1)
2341  return SDValue();
2342 
2343  // Match the compare as: setcc (X & 1), 0, eq.
2344  SDValue SetCC = Z.getOperand(0);
2345  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2346  if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2347  SetCC.getOperand(0).getOpcode() != ISD::AND ||
2348  !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2349  return SDValue();
2350 
2351  // We are adding/subtracting a constant and an inverted low bit. Turn that
2352  // into a subtract/add of the low bit with incremented/decremented constant:
2353  // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2354  // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2355  EVT VT = C.getValueType();
2356  SDLoc DL(N);
2357  SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2358  SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2359  DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2360  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2361 }
2362 
2363 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2364 /// a shift and add with a different constant.
2366  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2367  "Expecting add or sub");
2368 
2369  // We need a constant operand for the add/sub, and the other operand is a
2370  // logical shift right: add (srl), C or sub C, (srl).
2371  bool IsAdd = N->getOpcode() == ISD::ADD;
2372  SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2373  SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2374  if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2375  ShiftOp.getOpcode() != ISD::SRL)
2376  return SDValue();
2377 
2378  // The shift must be of a 'not' value.
2379  SDValue Not = ShiftOp.getOperand(0);
2380  if (!Not.hasOneUse() || !isBitwiseNot(Not))
2381  return SDValue();
2382 
2383  // The shift must be moving the sign bit to the least-significant-bit.
2384  EVT VT = ShiftOp.getValueType();
2385  SDValue ShAmt = ShiftOp.getOperand(1);
2386  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2387  if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2388  return SDValue();
2389 
2390  // Eliminate the 'not' by adjusting the shift and add/sub constant:
2391  // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2392  // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2393  SDLoc DL(N);
2394  if (SDValue NewC = DAG.FoldConstantArithmetic(
2395  IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2396  {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2397  SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2398  Not.getOperand(0), ShAmt);
2399  return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2400  }
2401 
2402  return SDValue();
2403 }
2404 
2405 static bool isADDLike(SDValue V, const SelectionDAG &DAG) {
2406  unsigned Opcode = V.getOpcode();
2407  if (Opcode == ISD::OR)
2408  return DAG.haveNoCommonBitsSet(V.getOperand(0), V.getOperand(1));
2409  if (Opcode == ISD::XOR)
2410  return isMinSignedConstant(V.getOperand(1));
2411  return false;
2412 }
2413 
2414 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2415 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2416 /// are no common bits set in the operands).
2417 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2418  SDValue N0 = N->getOperand(0);
2419  SDValue N1 = N->getOperand(1);
2420  EVT VT = N0.getValueType();
2421  SDLoc DL(N);
2422 
2423  // fold (add x, undef) -> undef
2424  if (N0.isUndef())
2425  return N0;
2426  if (N1.isUndef())
2427  return N1;
2428 
2429  // fold (add c1, c2) -> c1+c2
2430  if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2431  return C;
2432 
2433  // canonicalize constant to RHS
2436  return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2437 
2438  // fold vector ops
2439  if (VT.isVector()) {
2440  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2441  return FoldedVOp;
2442 
2443  // fold (add x, 0) -> x, vector edition
2445  return N0;
2446  }
2447 
2448  // fold (add x, 0) -> x
2449  if (isNullConstant(N1))
2450  return N0;
2451 
2452  if (N0.getOpcode() == ISD::SUB) {
2453  SDValue N00 = N0.getOperand(0);
2454  SDValue N01 = N0.getOperand(1);
2455 
2456  // fold ((A-c1)+c2) -> (A+(c2-c1))
2457  if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2458  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2459 
2460  // fold ((c1-A)+c2) -> (c1+c2)-A
2461  if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2462  return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2463  }
2464 
2465  // add (sext i1 X), 1 -> zext (not i1 X)
2466  // We don't transform this pattern:
2467  // add (zext i1 X), -1 -> sext (not i1 X)
2468  // because most (?) targets generate better code for the zext form.
2469  if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2470  isOneOrOneSplat(N1)) {
2471  SDValue X = N0.getOperand(0);
2472  if ((!LegalOperations ||
2473  (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2474  TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2475  X.getScalarValueSizeInBits() == 1) {
2476  SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2477  return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2478  }
2479  }
2480 
2481  // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2482  // iff (or x, c0) is equivalent to (add x, c0).
2483  // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2484  // iff (xor x, c0) is equivalent to (add x, c0).
2485  if (isADDLike(N0, DAG)) {
2486  SDValue N01 = N0.getOperand(1);
2487  if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2488  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2489  }
2490 
2491  if (SDValue NewSel = foldBinOpIntoSelect(N))
2492  return NewSel;
2493 
2494  // reassociate add
2495  if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2496  if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2497  return RADD;
2498 
2499  // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2500  // equivalent to (add x, c).
2501  // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2502  // equivalent to (add x, c).
2503  auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2504  if (isADDLike(N0, DAG) && N0.hasOneUse() &&
2505  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2506  return DAG.getNode(ISD::ADD, DL, VT,
2507  DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2508  N0.getOperand(1));
2509  }
2510  return SDValue();
2511  };
2512  if (SDValue Add = ReassociateAddOr(N0, N1))
2513  return Add;
2514  if (SDValue Add = ReassociateAddOr(N1, N0))
2515  return Add;
2516  }
2517  // fold ((0-A) + B) -> B-A
2518  if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2519  return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2520 
2521  // fold (A + (0-B)) -> A-B
2522  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2523  return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2524 
2525  // fold (A+(B-A)) -> B
2526  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2527  return N1.getOperand(0);
2528 
2529  // fold ((B-A)+A) -> B
2530  if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2531  return N0.getOperand(0);
2532 
2533  // fold ((A-B)+(C-A)) -> (C-B)
2534  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2535  N0.getOperand(0) == N1.getOperand(1))
2536  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2537  N0.getOperand(1));
2538 
2539  // fold ((A-B)+(B-C)) -> (A-C)
2540  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2541  N0.getOperand(1) == N1.getOperand(0))
2542  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2543  N1.getOperand(1));
2544 
2545  // fold (A+(B-(A+C))) to (B-C)
2546  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2547  N0 == N1.getOperand(1).getOperand(0))
2548  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2549  N1.getOperand(1).getOperand(1));
2550 
2551  // fold (A+(B-(C+A))) to (B-C)
2552  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2553  N0 == N1.getOperand(1).getOperand(1))
2554  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2555  N1.getOperand(1).getOperand(0));
2556 
2557  // fold (A+((B-A)+or-C)) to (B+or-C)
2558  if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2559  N1.getOperand(0).getOpcode() == ISD::SUB &&
2560  N0 == N1.getOperand(0).getOperand(1))
2561  return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2562  N1.getOperand(1));
2563 
2564  // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2565  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2566  N0->hasOneUse() && N1->hasOneUse()) {
2567  SDValue N00 = N0.getOperand(0);
2568  SDValue N01 = N0.getOperand(1);
2569  SDValue N10 = N1.getOperand(0);
2570  SDValue N11 = N1.getOperand(1);
2571 
2573  return DAG.getNode(ISD::SUB, DL, VT,
2574  DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2575  DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2576  }
2577 
2578  // fold (add (umax X, C), -C) --> (usubsat X, C)
2579  if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2580  auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2581  return (!Max && !Op) ||
2582  (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2583  };
2584  if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2585  /*AllowUndefs*/ true))
2586  return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2587  N0.getOperand(1));
2588  }
2589 
2590  if (SimplifyDemandedBits(SDValue(N, 0)))
2591  return SDValue(N, 0);
2592 
2593  if (isOneOrOneSplat(N1)) {
2594  // fold (add (xor a, -1), 1) -> (sub 0, a)
2595  if (isBitwiseNot(N0))
2596  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2597  N0.getOperand(0));
2598 
2599  // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2600  if (N0.getOpcode() == ISD::ADD) {
2601  SDValue A, Xor;
2602 
2603  if (isBitwiseNot(N0.getOperand(0))) {
2604  A = N0.getOperand(1);
2605  Xor = N0.getOperand(0);
2606  } else if (isBitwiseNot(N0.getOperand(1))) {
2607  A = N0.getOperand(0);
2608  Xor = N0.getOperand(1);
2609  }
2610 
2611  if (Xor)
2612  return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2613  }
2614 
2615  // Look for:
2616  // add (add x, y), 1
2617  // And if the target does not like this form then turn into:
2618  // sub y, (xor x, -1)
2619  if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2620  N0.hasOneUse()) {
2621  SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2622  DAG.getAllOnesConstant(DL, VT));
2623  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2624  }
2625  }
2626 
2627  // (x - y) + -1 -> add (xor y, -1), x
2628  if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
2630  SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2631  return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2632  }
2633 
2634  if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2635  return Combined;
2636 
2637  if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2638  return Combined;
2639 
2640  return SDValue();
2641 }
2642 
2643 SDValue DAGCombiner::visitADD(SDNode *N) {
2644  SDValue N0 = N->getOperand(0);
2645  SDValue N1 = N->getOperand(1);
2646  EVT VT = N0.getValueType();
2647  SDLoc DL(N);
2648 
2649  if (SDValue Combined = visitADDLike(N))
2650  return Combined;
2651 
2652  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2653  return V;
2654 
2655  if (SDValue V = foldAddSubOfSignBit(N, DAG))
2656  return V;
2657 
2658  // fold (a+b) -> (a|b) iff a and b share no bits.
2659  if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2660  DAG.haveNoCommonBitsSet(N0, N1))
2661  return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2662 
2663  // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2664  if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2665  const APInt &C0 = N0->getConstantOperandAPInt(0);
2666  const APInt &C1 = N1->getConstantOperandAPInt(0);
2667  return DAG.getVScale(DL, VT, C0 + C1);
2668  }
2669 
2670  // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2671  if (N0.getOpcode() == ISD::ADD &&
2672  N0.getOperand(1).getOpcode() == ISD::VSCALE &&
2673  N1.getOpcode() == ISD::VSCALE) {
2674  const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2675  const APInt &VS1 = N1->getConstantOperandAPInt(0);
2676  SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2677  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2678  }
2679 
2680  // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
2681  if (N0.getOpcode() == ISD::STEP_VECTOR &&
2682  N1.getOpcode() == ISD::STEP_VECTOR) {
2683  const APInt &C0 = N0->getConstantOperandAPInt(0);
2684  const APInt &C1 = N1->getConstantOperandAPInt(0);
2685  APInt NewStep = C0 + C1;
2686  return DAG.getStepVector(DL, VT, NewStep);
2687  }
2688 
2689  // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2690  if (N0.getOpcode() == ISD::ADD &&
2691  N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR &&
2692  N1.getOpcode() == ISD::STEP_VECTOR) {
2693  const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2694  const APInt &SV1 = N1->getConstantOperandAPInt(0);
2695  APInt NewStep = SV0 + SV1;
2696  SDValue SV = DAG.getStepVector(DL, VT, NewStep);
2697  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
2698  }
2699 
2700  return SDValue();
2701 }
2702 
2703 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2704  unsigned Opcode = N->getOpcode();
2705  SDValue N0 = N->getOperand(0);
2706  SDValue N1 = N->getOperand(1);
2707  EVT VT = N0.getValueType();
2708  SDLoc DL(N);
2709 
2710  // fold (add_sat x, undef) -> -1
2711  if (N0.isUndef() || N1.isUndef())
2712  return DAG.getAllOnesConstant(DL, VT);
2713 
2714  // fold (add_sat c1, c2) -> c3
2715  if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
2716  return C;
2717 
2718  // canonicalize constant to RHS
2721  return DAG.getNode(Opcode, DL, VT, N1, N0);
2722 
2723  // fold vector ops
2724  if (VT.isVector()) {
2725  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2726  return FoldedVOp;
2727 
2728  // fold (add_sat x, 0) -> x, vector edition
2730  return N0;
2731  }
2732 
2733  // fold (add_sat x, 0) -> x
2734  if (isNullConstant(N1))
2735  return N0;
2736 
2737  // If it cannot overflow, transform into an add.
2738  if (Opcode == ISD::UADDSAT)
2739  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2740  return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2741 
2742  return SDValue();
2743 }
2744 
2745 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2746  bool Masked = false;
2747 
2748  // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2749  while (true) {
2750  if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2751  V = V.getOperand(0);
2752  continue;
2753  }
2754 
2755  if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2756  Masked = true;
2757  V = V.getOperand(0);
2758  continue;
2759  }
2760 
2761  break;
2762  }
2763 
2764  // If this is not a carry, return.
2765  if (V.getResNo() != 1)
2766  return SDValue();
2767 
2768  if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2769  V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2770  return SDValue();
2771 
2772  EVT VT = V->getValueType(0);
2773  if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2774  return SDValue();
2775 
2776  // If the result is masked, then no matter what kind of bool it is we can
2777  // return. If it isn't, then we need to make sure the bool type is either 0 or
2778  // 1 and not other values.
2779  if (Masked ||
2780  TLI.getBooleanContents(V.getValueType()) ==
2782  return V;
2783 
2784  return SDValue();
2785 }
2786 
2787 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2788 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2789 /// the opcode and bypass the mask operation.
2790 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2791  SelectionDAG &DAG, const SDLoc &DL) {
2792  if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2793  return SDValue();
2794 
2795  EVT VT = N0.getValueType();
2796  if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2797  return SDValue();
2798 
2799  // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2800  // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2801  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2802 }
2803 
2804 /// Helper for doing combines based on N0 and N1 being added to each other.
2805 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2806  SDNode *LocReference) {
2807  EVT VT = N0.getValueType();
2808  SDLoc DL(LocReference);
2809 
2810  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2811  if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2813  return DAG.getNode(ISD::SUB, DL, VT, N0,
2814  DAG.getNode(ISD::SHL, DL, VT,
2815  N1.getOperand(0).getOperand(1),
2816  N1.getOperand(1)));
2817 
2818  if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2819  return V;
2820 
2821  // Look for:
2822  // add (add x, 1), y
2823  // And if the target does not like this form then turn into:
2824  // sub y, (xor x, -1)
2825  if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2826  N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1))) {
2827  SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2828  DAG.getAllOnesConstant(DL, VT));
2829  return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2830  }
2831 
2832  if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
2833  // Hoist one-use subtraction by non-opaque constant:
2834  // (x - C) + y -> (x + y) - C
2835  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2836  if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2837  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2838  return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2839  }
2840  // Hoist one-use subtraction from non-opaque constant:
2841  // (C - x) + y -> (y - x) + C
2842  if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2843  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2844  return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2845  }
2846  }
2847 
2848  // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2849  // rather than 'add 0/-1' (the zext should get folded).
2850  // add (sext i1 Y), X --> sub X, (zext i1 Y)
2851  if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2852  N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2854  SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2855  return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2856  }
2857 
2858  // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2859  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2860  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2861  if (TN->getVT() == MVT::i1) {
2862  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2863  DAG.getConstant(1, DL, VT));
2864  return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2865  }
2866  }
2867 
2868  // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2869  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2870  N1.getResNo() == 0)
2871  return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2872  N0, N1.getOperand(0), N1.getOperand(2));
2873 
2874  // (add X, Carry) -> (addcarry X, 0, Carry)
2876  if (SDValue Carry = getAsCarry(TLI, N1))
2877  return DAG.getNode(ISD::ADDCARRY, DL,
2878  DAG.getVTList(VT, Carry.getValueType()), N0,
2879  DAG.getConstant(0, DL, VT), Carry);
2880 
2881  return SDValue();
2882 }
2883 
2884 SDValue DAGCombiner::visitADDC(SDNode *N) {
2885  SDValue N0 = N->getOperand(0);
2886  SDValue N1 = N->getOperand(1);
2887  EVT VT = N0.getValueType();
2888  SDLoc DL(N);
2889 
2890  // If the flag result is dead, turn this into an ADD.
2891  if (!N->hasAnyUseOfValue(1))
2892  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2894 
2895  // canonicalize constant to RHS.
2896  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2897  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2898  if (N0C && !N1C)
2899  return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2900 
2901  // fold (addc x, 0) -> x + no carry out
2902  if (isNullConstant(N1))
2903  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2904  DL, MVT::Glue));
2905 
2906  // If it cannot overflow, transform into an add.
2907  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2908  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2910 
2911  return SDValue();
2912 }
2913 
2914 /**
2915  * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2916  * then the flip also occurs if computing the inverse is the same cost.
2917  * This function returns an empty SDValue in case it cannot flip the boolean
2918  * without increasing the cost of the computation. If you want to flip a boolean
2919  * no matter what, use DAG.getLogicalNOT.
2920  */
2922  const TargetLowering &TLI,
2923  bool Force) {
2924  if (Force && isa<ConstantSDNode>(V))
2925  return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2926 
2927  if (V.getOpcode() != ISD::XOR)
2928  return SDValue();
2929 
2930  ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2931  if (!Const)
2932  return SDValue();
2933 
2934  EVT VT = V.getValueType();
2935 
2936  bool IsFlip = false;
2937  switch(TLI.getBooleanContents(VT)) {
2939  IsFlip = Const->isOne();
2940  break;
2942  IsFlip = Const->isAllOnes();
2943  break;
2945  IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2946  break;
2947  }
2948 
2949  if (IsFlip)
2950  return V.getOperand(0);
2951  if (Force)
2952  return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2953  return SDValue();
2954 }
2955 
2956 SDValue DAGCombiner::visitADDO(SDNode *N) {
2957  SDValue N0 = N->getOperand(0);
2958  SDValue N1 = N->getOperand(1);
2959  EVT VT = N0.getValueType();
2960  bool IsSigned = (ISD::SADDO == N->getOpcode());
2961 
2962  EVT CarryVT = N->getValueType(1);
2963  SDLoc DL(N);
2964 
2965  // If the flag result is dead, turn this into an ADD.
2966  if (!N->hasAnyUseOfValue(1))
2967  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2968  DAG.getUNDEF(CarryVT));
2969 
2970  // canonicalize constant to RHS.
2973  return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2974 
2975  // fold (addo x, 0) -> x + no carry out
2976  if (isNullOrNullSplat(N1))
2977  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2978 
2979  if (!IsSigned) {
2980  // If it cannot overflow, transform into an add.
2981  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2982  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2983  DAG.getConstant(0, DL, CarryVT));
2984 
2985  // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2986  if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2987  SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2988  DAG.getConstant(0, DL, VT), N0.getOperand(0));
2989  return CombineTo(
2990  N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
2991  }
2992 
2993  if (SDValue Combined = visitUADDOLike(N0, N1, N))
2994  return Combined;
2995 
2996  if (SDValue Combined = visitUADDOLike(N1, N0, N))
2997  return Combined;
2998  }
2999 
3000  return SDValue();
3001 }
3002 
3003 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3004  EVT VT = N0.getValueType();
3005  if (VT.isVector())
3006  return SDValue();
3007 
3008  // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
3009  // If Y + 1 cannot overflow.
3010  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
3011  SDValue Y = N1.getOperand(0);
3012  SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3014  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
3015  N1.getOperand(2));
3016  }
3017 
3018  // (uaddo X, Carry) -> (addcarry X, 0, Carry)
3020  if (SDValue Carry = getAsCarry(TLI, N1))
3021  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
3022  DAG.getConstant(0, SDLoc(N), VT), Carry);
3023 
3024  return SDValue();
3025 }
3026 
3027 SDValue DAGCombiner::visitADDE(SDNode *N) {
3028  SDValue N0 = N->getOperand(0);
3029  SDValue N1 = N->getOperand(1);
3030  SDValue CarryIn = N->getOperand(2);
3031 
3032  // canonicalize constant to RHS
3033  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3034  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3035  if (N0C && !N1C)
3036  return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3037  N1, N0, CarryIn);
3038 
3039  // fold (adde x, y, false) -> (addc x, y)
3040  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3041  return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3042 
3043  return SDValue();
3044 }
3045 
3046 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
3047  SDValue N0 = N->getOperand(0);
3048  SDValue N1 = N->getOperand(1);
3049  SDValue CarryIn = N->getOperand(2);
3050  SDLoc DL(N);
3051 
3052  // canonicalize constant to RHS
3053  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3054  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3055  if (N0C && !N1C)
3056  return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
3057 
3058  // fold (addcarry x, y, false) -> (uaddo x, y)
3059  if (isNullConstant(CarryIn)) {
3060  if (!LegalOperations ||
3061  TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3062  return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3063  }
3064 
3065  // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3066  if (isNullConstant(N0) && isNullConstant(N1)) {
3067  EVT VT = N0.getValueType();
3068  EVT CarryVT = CarryIn.getValueType();
3069  SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3070  AddToWorklist(CarryExt.getNode());
3071  return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3072  DAG.getConstant(1, DL, VT)),
3073  DAG.getConstant(0, DL, CarryVT));
3074  }
3075 
3076  if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
3077  return Combined;
3078 
3079  if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
3080  return Combined;
3081 
3082  // We want to avoid useless duplication.
3083  // TODO: This is done automatically for binary operations. As ADDCARRY is
3084  // not a binary operation, this is not really possible to leverage this
3085  // existing mechanism for it. However, if more operations require the same
3086  // deduplication logic, then it may be worth generalize.
3087  SDValue Ops[] = {N1, N0, CarryIn};
3088  SDNode *CSENode =
3089  DAG.getNodeIfExists(ISD::ADDCARRY, N->getVTList(), Ops, N->getFlags());
3090  if (CSENode)
3091  return SDValue(CSENode, 0);
3092 
3093  return SDValue();
3094 }
3095 
3096 SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3097  SDValue N0 = N->getOperand(0);
3098  SDValue N1 = N->getOperand(1);
3099  SDValue CarryIn = N->getOperand(2);
3100  SDLoc DL(N);
3101 
3102  // canonicalize constant to RHS
3103  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3104  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3105  if (N0C && !N1C)
3106  return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3107 
3108  // fold (saddo_carry x, y, false) -> (saddo x, y)
3109  if (isNullConstant(CarryIn)) {
3110  if (!LegalOperations ||
3111  TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3112  return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3113  }
3114 
3115  return SDValue();
3116 }
3117 
3118 /**
3119  * If we are facing some sort of diamond carry propapagtion pattern try to
3120  * break it up to generate something like:
3121  * (addcarry X, 0, (addcarry A, B, Z):Carry)
3122  *
3123  * The end result is usually an increase in operation required, but because the
3124  * carry is now linearized, other transforms can kick in and optimize the DAG.
3125  *
3126  * Patterns typically look something like
3127  * (uaddo A, B)
3128  * / \
3129  * Carry Sum
3130  * | \
3131  * | (addcarry *, 0, Z)
3132  * | /
3133  * \ Carry
3134  * | /
3135  * (addcarry X, *, *)
3136  *
3137  * But numerous variation exist. Our goal is to identify A, B, X and Z and
3138  * produce a combine with a single path for carry propagation.
3139  */
3141  SDValue X, SDValue Carry0, SDValue Carry1,
3142  SDNode *N) {
3143  if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3144  return SDValue();
3145  if (Carry1.getOpcode() != ISD::UADDO)
3146  return SDValue();
3147 
3148  SDValue Z;
3149 
3150  /**
3151  * First look for a suitable Z. It will present itself in the form of
3152  * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3153  */
3154  if (Carry0.getOpcode() == ISD::ADDCARRY &&
3155  isNullConstant(Carry0.getOperand(1))) {
3156  Z = Carry0.getOperand(2);
3157  } else if (Carry0.getOpcode() == ISD::UADDO &&
3158  isOneConstant(Carry0.getOperand(1))) {
3159  EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
3160  Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3161  } else {
3162  // We couldn't find a suitable Z.
3163  return SDValue();
3164  }
3165 
3166 
3167  auto cancelDiamond = [&](SDValue A,SDValue B) {
3168  SDLoc DL(N);
3169  SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
3170  Combiner.AddToWorklist(NewY.getNode());
3171  return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
3172  DAG.getConstant(0, DL, X.getValueType()),
3173  NewY.getValue(1));
3174  };
3175 
3176  /**
3177  * (uaddo A, B)
3178  * |
3179  * Sum
3180  * |
3181  * (addcarry *, 0, Z)
3182  */
3183  if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3184  return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3185  }
3186 
3187  /**
3188  * (addcarry A, 0, Z)
3189  * |
3190  * Sum
3191  * |
3192  * (uaddo *, B)
3193  */
3194  if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3195  return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3196  }
3197 
3198  if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3199  return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3200  }
3201 
3202  return SDValue();
3203 }
3204 
3205 // If we are facing some sort of diamond carry/borrow in/out pattern try to
3206 // match patterns like:
3207 //
3208 // (uaddo A, B) CarryIn
3209 // | \ |
3210 // | \ |
3211 // PartialSum PartialCarryOutX /
3212 // | | /
3213 // | ____|____________/
3214 // | / |
3215 // (uaddo *, *) \________
3216 // | \ \
3217 // | \ |
3218 // | PartialCarryOutY |
3219 // | \ |
3220 // | \ /
3221 // AddCarrySum | ______/
3222 // | /
3223 // CarryOut = (or *, *)
3224 //
3225 // And generate ADDCARRY (or SUBCARRY) with two result values:
3226 //
3227 // {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
3228 //
3229 // Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
3230 // a single path for carry/borrow out propagation:
3232  SDValue N0, SDValue N1, SDNode *N) {
3233  SDValue Carry0 = getAsCarry(TLI, N0);
3234  if (!Carry0)
3235  return SDValue();
3236  SDValue Carry1 = getAsCarry(TLI, N1);
3237  if (!Carry1)
3238  return SDValue();
3239 
3240  unsigned Opcode = Carry0.getOpcode();
3241  if (Opcode != Carry1.getOpcode())
3242  return SDValue();
3243  if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3244  return SDValue();
3245 
3246  // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3247  // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3248  if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3249  std::swap(Carry0, Carry1);
3250 
3251  // Check if nodes are connected in expected way.
3252  if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3253  Carry1.getOperand(1) != Carry0.getValue(0))
3254  return SDValue();
3255 
3256  // The carry in value must be on the righthand side for subtraction.
3257  unsigned CarryInOperandNum =
3258  Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3259  if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3260  return SDValue();
3261  SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3262 
3263  unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
3264  if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3265  return SDValue();
3266 
3267  // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3268  // TODO: make getAsCarry() aware of how partial carries are merged.
3269  if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
3270  return SDValue();
3271  CarryIn = CarryIn.getOperand(0);
3272  if (CarryIn.getValueType() != MVT::i1)
3273  return SDValue();
3274 
3275  SDLoc DL(N);
3276  SDValue Merged =
3277  DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3278  Carry0.getOperand(1), CarryIn);
3279 
3280  // Please note that because we have proven that the result of the UADDO/USUBO
3281  // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3282  // therefore prove that if the first UADDO/USUBO overflows, the second
3283  // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3284  // maximum value.
3285  //
3286  // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3287  // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3288  //
3289  // This is important because it means that OR and XOR can be used to merge
3290  // carry flags; and that AND can return a constant zero.
3291  //
3292  // TODO: match other operations that can merge flags (ADD, etc)
3293  DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3294  if (N->getOpcode() == ISD::AND)
3295  return DAG.getConstant(0, DL, MVT::i1);
3296  return Merged.getValue(1);
3297 }
3298 
3299 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3300  SDNode *N) {
3301  // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3302  if (isBitwiseNot(N0))
3303  if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3304  SDLoc DL(N);
3305  SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3306  N0.getOperand(0), NotC);
3307  return CombineTo(
3308  N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3309  }
3310 
3311  // Iff the flag result is dead:
3312  // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3313  // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3314  // or the dependency between the instructions.
3315  if ((N0.getOpcode() == ISD::ADD ||
3316  (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3317  N0.getValue(1) != CarryIn)) &&
3318  isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3319  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3320  N0.getOperand(0), N0.getOperand(1), CarryIn);
3321 
3322  /**
3323  * When one of the addcarry argument is itself a carry, we may be facing
3324  * a diamond carry propagation. In which case we try to transform the DAG
3325  * to ensure linear carry propagation if that is possible.
3326  */
3327  if (auto Y = getAsCarry(TLI, N1)) {
3328  // Because both are carries, Y and Z can be swapped.
3329  if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3330  return R;
3331  if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3332  return R;
3333  }
3334 
3335  return SDValue();
3336 }
3337 
3338 // Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3339 // clamp/truncation if necessary.
3341  SDValue RHS, SelectionDAG &DAG,
3342  const SDLoc &DL) {
3343  assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3344  "Illegal truncation");
3345 
3346  if (DstVT == SrcVT)
3347  return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3348 
3349  // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3350  // clamping RHS.
3351  APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
3352  DstVT.getScalarSizeInBits());
3353  if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3354  return SDValue();
3355 
3356  SDValue SatLimit =
3358  DstVT.getScalarSizeInBits()),
3359  DL, SrcVT);
3360  RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3361  RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3362  LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3363  return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3364 }
3365 
3366 // Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3367 // usubsat(a,b), optionally as a truncated type.
3368 SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
3369  if (N->getOpcode() != ISD::SUB ||
3370  !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3371  return SDValue();
3372 
3373  EVT SubVT = N->getValueType(0);
3374  SDValue Op0 = N->getOperand(0);
3375  SDValue Op1 = N->getOperand(1);
3376 
3377  // Try to find umax(a,b) - b or a - umin(a,b) patterns
3378  // they may be converted to usubsat(a,b).
3379  if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3380  SDValue MaxLHS = Op0.getOperand(0);
3381  SDValue MaxRHS = Op0.getOperand(1);
3382  if (MaxLHS == Op1)
3383  return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
3384  if (MaxRHS == Op1)
3385  return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
3386  }
3387 
3388  if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3389  SDValue MinLHS = Op1.getOperand(0);
3390  SDValue MinRHS = Op1.getOperand(1);
3391  if (MinLHS == Op0)
3392  return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
3393  if (MinRHS == Op0)
3394  return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
3395  }
3396 
3397  // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3398  if (Op1.getOpcode() == ISD::TRUNCATE &&
3399  Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3400  Op1.getOperand(0).hasOneUse()) {
3401  SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3402  SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3403  if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3404  return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3405  DAG, SDLoc(N));
3406  if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3407  return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3408  DAG, SDLoc(N));
3409  }
3410 
3411  return SDValue();
3412 }
3413 
3414 // Since it may not be valid to emit a fold to zero for vector initializers
3415 // check if we can before folding.
3416 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3417  SelectionDAG &DAG, bool LegalOperations) {
3418  if (!VT.isVector())
3419  return DAG.getConstant(0, DL, VT);
3420  if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3421  return DAG.getConstant(0, DL, VT);
3422  return SDValue();
3423 }
3424 
3425 SDValue DAGCombiner::visitSUB(SDNode *N) {
3426  SDValue N0 = N->getOperand(0);
3427  SDValue N1 = N->getOperand(1);
3428  EVT VT = N0.getValueType();
3429  SDLoc DL(N);
3430 
3431  auto PeekThroughFreeze = [](SDValue N) {
3432  if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
3433  return N->getOperand(0);
3434  return N;
3435  };
3436 
3437  // fold (sub x, x) -> 0
3438  // FIXME: Refactor this and xor and other similar operations together.
3439  if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
3440  return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3441 
3442  // fold (sub c1, c2) -> c3
3443  if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3444  return C;
3445 
3446  // fold vector ops
3447  if (VT.isVector()) {
3448  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3449  return FoldedVOp;
3450 
3451  // fold (sub x, 0) -> x, vector edition
3453  return N0;
3454  }
3455 
3456  if (SDValue NewSel = foldBinOpIntoSelect(N))
3457  return NewSel;
3458 
3460 
3461  // fold (sub x, c) -> (add x, -c)
3462  if (N1C) {
3463  return DAG.getNode(ISD::ADD, DL, VT, N0,
3464  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3465  }
3466 
3467  if (isNullOrNullSplat(N0)) {
3468  unsigned BitWidth = VT.getScalarSizeInBits();
3469  // Right-shifting everything out but the sign bit followed by negation is
3470  // the same as flipping arithmetic/logical shift type without the negation:
3471  // -(X >>u 31) -> (X >>s 31)
3472  // -(X >>s 31) -> (X >>u 31)
3473  if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3474  ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3475  if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3476  auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3477  if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3478  return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3479  }
3480  }
3481 
3482  // 0 - X --> 0 if the sub is NUW.
3483  if (N->getFlags().hasNoUnsignedWrap())
3484  return N0;
3485 
3487  // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3488  // N1 must be 0 because negating the minimum signed value is undefined.
3489  if (N->getFlags().hasNoSignedWrap())
3490  return N0;
3491 
3492  // 0 - X --> X if X is 0 or the minimum signed value.
3493  return N1;
3494  }
3495 
3496  // Convert 0 - abs(x).
3497  if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
3499  if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3500  return Result;
3501 
3502  // Fold neg(splat(neg(x)) -> splat(x)
3503  if (VT.isVector()) {
3504  SDValue N1S = DAG.getSplatValue(N1, true);
3505  if (N1S && N1S.getOpcode() == ISD::SUB &&
3506  isNullConstant(N1S.getOperand(0)))
3507  return DAG.getSplat(VT, DL, N1S.getOperand(1));
3508  }
3509  }
3510 
3511  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3512  if (isAllOnesOrAllOnesSplat(N0))
3513  return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3514 
3515  // fold (A - (0-B)) -> A+B
3516  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3517  return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3518 
3519  // fold A-(A-B) -> B
3520  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3521  return N1.getOperand(1);
3522 
3523  // fold (A+B)-A -> B
3524  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3525  return N0.getOperand(1);
3526 
3527  // fold (A+B)-B -> A
3528  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3529  return N0.getOperand(0);
3530 
3531  // fold (A+C1)-C2 -> A+(C1-C2)
3532  if (N0.getOpcode() == ISD::ADD) {
3533  SDValue N01 = N0.getOperand(1);
3534  if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
3535  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3536  }
3537 
3538  // fold C2-(A+C1) -> (C2-C1)-A
3539  if (N1.getOpcode() == ISD::ADD) {
3540  SDValue N11 = N1.getOperand(1);
3541  if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
3542  return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3543  }
3544 
3545  // fold (A-C1)-C2 -> A-(C1+C2)
3546  if (N0.getOpcode() == ISD::SUB) {
3547  SDValue N01 = N0.getOperand(1);
3548  if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
3549  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3550  }
3551 
3552  // fold (c1-A)-c2 -> (c1-c2)-A
3553  if (N0.getOpcode() == ISD::SUB) {
3554  SDValue N00 = N0.getOperand(0);
3555  if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
3556  return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3557  }
3558 
3559  // fold ((A+(B+or-C))-B) -> A+or-C
3560  if (N0.getOpcode() == ISD::ADD &&
3561  (N0.getOperand(1).getOpcode() == ISD::SUB ||
3562  N0.getOperand(1).getOpcode() == ISD::ADD) &&
3563  N0.getOperand(1).getOperand(0) == N1)
3564  return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3565  N0.getOperand(1).getOperand(1));
3566 
3567  // fold ((A+(C+B))-B) -> A+C
3568  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3569  N0.getOperand(1).getOperand(1) == N1)
3570  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3571  N0.getOperand(1).getOperand(0));
3572 
3573  // fold ((A-(B-C))-C) -> A-B
3574  if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3575  N0.getOperand(1).getOperand(1) == N1)
3576  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3577  N0.getOperand(1).getOperand(0));
3578 
3579  // fold (A-(B-C)) -> A+(C-B)
3580  if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3581  return DAG.getNode(ISD::ADD, DL, VT, N0,
3582  DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3583  N1.getOperand(0)));
3584 
3585  // A - (A & B) -> A & (~B)
3586  if (N1.getOpcode() == ISD::AND) {
3587  SDValue A = N1.getOperand(0);
3588  SDValue B = N1.getOperand(1);
3589  if (A != N0)
3590  std::swap(A, B);
3591  if (A == N0 &&
3592  (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3593  SDValue InvB =
3594  DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3595  return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3596  }
3597  }
3598 
3599  // fold (X - (-Y * Z)) -> (X + (Y * Z))
3600  if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3601  if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3603  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3604  N1.getOperand(0).getOperand(1),
3605  N1.getOperand(1));
3606  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3607  }
3608  if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3610  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3611  N1.getOperand(0),
3612  N1.getOperand(1).getOperand(1));
3613  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3614  }
3615  }
3616 
3617  // If either operand of a sub is undef, the result is undef
3618  if (N0.isUndef())
3619  return N0;
3620  if (N1.isUndef())
3621  return N1;
3622 
3623  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3624  return V;
3625 
3626  if (SDValue V = foldAddSubOfSignBit(N, DAG))
3627  return V;
3628 
3629  if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3630  return V;
3631 
3632  if (SDValue V = foldSubToUSubSat(VT, N))
3633  return V;
3634 
3635  // (x - y) - 1 -> add (xor y, -1), x
3636  if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && isOneOrOneSplat(N1)) {
3637  SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3638  DAG.getAllOnesConstant(DL, VT));
3639  return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3640  }
3641 
3642  // Look for:
3643  // sub y, (xor x, -1)
3644  // And if the target does not like this form then turn into:
3645  // add (add x, y), 1
3646  if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3647  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3648  return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3649  }
3650 
3651  // Hoist one-use addition by non-opaque constant:
3652  // (x + C) - y -> (x - y) + C
3653  if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
3654  isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3655  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3656  return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3657  }
3658  // y - (x + C) -> (y - x) - C
3659  if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
3660  isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3661  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3662  return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3663  }
3664  // (x - C) - y -> (x - y) - C
3665  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3666  if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3667  isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3668  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3669  return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3670  }
3671  // (C - x) - y -> C - (x + y)
3672  if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3673  isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3674  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3675  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3676  }
3677 
3678  // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3679  // rather than 'sub 0/1' (the sext should get folded).
3680  // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3681  if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3682  N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3683  TLI.getBooleanContents(VT) ==
3685  SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3686  return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3687  }
3688 
3689  // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3690  if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3691  if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3692  SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3693  SDValue S0 = N1.getOperand(0);
3694  if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3696  if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
3697  return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3698  }
3699  }
3700 
3701  // If the relocation model supports it, consider symbol offsets.
3702  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3703  if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3704  // fold (sub Sym, c) -> Sym-c
3705  if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3706  return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3707  GA->getOffset() -
3708  (uint64_t)N1C->getSExtValue());
3709  // fold (sub Sym+c1, Sym+c2) -> c1-c2
3710  if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3711  if (GA->getGlobal() == GB->getGlobal())
3712  return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3713  DL, VT);
3714  }
3715 
3716  // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3717  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3718  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3719  if (TN->getVT() == MVT::i1) {
3720  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3721  DAG.getConstant(1, DL, VT));
3722  return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3723  }
3724  }
3725 
3726  // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3727  if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
3728  const APInt &IntVal = N1.getConstantOperandAPInt(0);
3729  return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3730  }
3731 
3732  // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
3733  if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
3734  APInt NewStep = -N1.getConstantOperandAPInt(0);
3735  return DAG.getNode(ISD::ADD, DL, VT, N0,
3736  DAG.getStepVector(DL, VT, NewStep));
3737  }
3738 
3739  // Prefer an add for more folding potential and possibly better codegen:
3740  // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3741  if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3742  SDValue ShAmt = N1.getOperand(1);
3743  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3744  if (ShAmtC &&
3745  ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3746  SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3747  return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3748  }
3749  }
3750 
3751  // As with the previous fold, prefer add for more folding potential.
3752  // Subtracting SMIN/0 is the same as adding SMIN/0:
3753  // N0 - (X << BW-1) --> N0 + (X << BW-1)
3754  if (N1.getOpcode() == ISD::SHL) {
3756  if (ShlC && ShlC->getAPIntValue() == VT.getScalarSizeInBits() - 1)
3757  return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
3758  }
3759 
3760  // (sub (subcarry X, 0, Carry), Y) -> (subcarry X, Y, Carry)
3761  if (N0.getOpcode() == ISD::SUBCARRY && isNullConstant(N0.getOperand(1)) &&
3762  N0.getResNo() == 0 && N0.hasOneUse())
3763  return DAG.getNode(ISD::SUBCARRY, DL, N0->getVTList(),
3764  N0.getOperand(0), N1, N0.getOperand(2));
3765 
3766  if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3767  // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
3768  if (SDValue Carry = getAsCarry(TLI, N0)) {
3769  SDValue X = N1;
3770  SDValue Zero = DAG.getConstant(0, DL, VT);
3771  SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3772  return DAG.getNode(ISD::ADDCARRY, DL,
3773  DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3774  Carry);
3775  }
3776  }
3777 
3778  // If there's no chance of borrowing from adjacent bits, then sub is xor:
3779  // sub C0, X --> xor X, C0
3780  if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
3781  if (!C0->isOpaque()) {
3782  const APInt &C0Val = C0->getAPIntValue();
3783  const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
3784  if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
3785  return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3786  }
3787  }
3788 
3789  return SDValue();
3790 }
3791 
3792 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3793  SDValue N0 = N->getOperand(0);
3794  SDValue N1 = N->getOperand(1);
3795  EVT VT = N0.getValueType();
3796  SDLoc DL(N);
3797 
3798  // fold (sub_sat x, undef) -> 0
3799  if (N0.isUndef() || N1.isUndef())
3800  return DAG.getConstant(0, DL, VT);
3801 
3802  // fold (sub_sat x, x) -> 0
3803  if (N0 == N1)
3804  return DAG.getConstant(0, DL, VT);
3805 
3806  // fold (sub_sat c1, c2) -> c3
3807  if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3808  return C;
3809 
3810  // fold vector ops
3811  if (VT.isVector()) {
3812  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3813  return FoldedVOp;
3814 
3815  // fold (sub_sat x, 0) -> x, vector edition
3817  return N0;
3818  }
3819 
3820  // fold (sub_sat x, 0) -> x
3821  if (isNullConstant(N1))
3822  return N0;
3823 
3824  return SDValue();
3825 }
3826 
3827 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3828  SDValue N0 = N->getOperand(0);
3829  SDValue N1 = N->getOperand(1);
3830  EVT VT = N0.getValueType();
3831  SDLoc DL(N);
3832 
3833  // If the flag result is dead, turn this into an SUB.
3834  if (!N->hasAnyUseOfValue(1))
3835  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3837 
3838  // fold (subc x, x) -> 0 + no borrow
3839  if (N0 == N1)
3840  return CombineTo(N, DAG.getConstant(0, DL, VT),
3842 
3843  // fold (subc x, 0) -> x + no borrow
3844  if (isNullConstant(N1))
3845  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3846 
3847  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3848  if (isAllOnesConstant(N0))
3849  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3851 
3852  return SDValue();
3853 }
3854 
3855 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3856  SDValue N0 = N->getOperand(0);
3857  SDValue N1 = N->getOperand(1);
3858  EVT VT = N0.getValueType();
3859  bool IsSigned = (ISD::SSUBO == N->getOpcode());
3860 
3861  EVT CarryVT = N->getValueType(1);
3862  SDLoc DL(N);
3863 
3864  // If the flag result is dead, turn this into an SUB.
3865  if (!N->hasAnyUseOfValue(1))
3866  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3867  DAG.getUNDEF(CarryVT));
3868 
3869  // fold (subo x, x) -> 0 + no borrow
3870  if (N0 == N1)
3871  return CombineTo(N, DAG.getConstant(0, DL, VT),
3872  DAG.getConstant(0, DL, CarryVT));
3873 
3875 
3876  // fold (subox, c) -> (addo x, -c)
3877  if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3878  return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3879  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3880  }
3881 
3882  // fold (subo x, 0) -> x + no borrow
3883  if (isNullOrNullSplat(N1))
3884  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3885 
3886  // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3887  if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3888  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3889  DAG.getConstant(0, DL, CarryVT));
3890 
3891  return SDValue();
3892 }
3893 
3894 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3895  SDValue N0 = N->getOperand(0);
3896  SDValue N1 = N->getOperand(1);
3897  SDValue CarryIn = N->getOperand(2);
3898 
3899  // fold (sube x, y, false) -> (subc x, y)
3900  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3901  return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3902 
3903  return SDValue();
3904 }
3905 
3906 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3907  SDValue N0 = N->getOperand(0);
3908  SDValue N1 = N->getOperand(1);
3909  SDValue CarryIn = N->getOperand(2);
3910 
3911  // fold (subcarry x, y, false) -> (usubo x, y)
3912  if (isNullConstant(CarryIn)) {
3913  if (!LegalOperations ||
3914  TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3915  return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3916  }
3917 
3918  return SDValue();
3919 }
3920 
3921 SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
3922  SDValue N0 = N->getOperand(0);
3923  SDValue N1 = N->getOperand(1);
3924  SDValue CarryIn = N->getOperand(2);
3925 
3926  // fold (ssubo_carry x, y, false) -> (ssubo x, y)
3927  if (isNullConstant(CarryIn)) {
3928  if (!LegalOperations ||
3929  TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
3930  return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
3931  }
3932 
3933  return SDValue();
3934 }
3935 
3936 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3937 // UMULFIXSAT here.
3938 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3939  SDValue N0 = N->getOperand(0);
3940  SDValue N1 = N->getOperand(1);
3941  SDValue Scale = N->getOperand(2);
3942  EVT VT = N0.getValueType();
3943 
3944  // fold (mulfix x, undef, scale) -> 0
3945  if (N0.isUndef() || N1.isUndef())
3946  return DAG.getConstant(0, SDLoc(N), VT);
3947 
3948  // Canonicalize constant to RHS (vector doesn't have to splat)
3951  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3952 
3953  // fold (mulfix x, 0, scale) -> 0
3954  if (isNullConstant(N1))
3955  return DAG.getConstant(0, SDLoc(N), VT);
3956 
3957  return SDValue();
3958 }
3959 
3960 SDValue DAGCombiner::visitMUL(SDNode *N) {
3961  SDValue N0 = N->getOperand(0);
3962  SDValue N1 = N->getOperand(1);
3963  EVT VT = N0.getValueType();
3964  SDLoc DL(N);
3965 
3966  // fold (mul x, undef) -> 0
3967  if (N0.isUndef() || N1.isUndef())
3968  return DAG.getConstant(0, DL, VT);
3969 
3970  // fold (mul c1, c2) -> c1*c2
3971  if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
3972  return C;
3973 
3974  // canonicalize constant to RHS (vector doesn't have to splat)
3977  return DAG.getNode(ISD::MUL, DL, VT, N1, N0);
3978 
3979  bool N1IsConst = false;
3980  bool N1IsOpaqueConst = false;
3981  APInt ConstValue1;
3982 
3983  // fold vector ops
3984  if (VT.isVector()) {
3985  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3986  return FoldedVOp;
3987 
3988  N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3989  assert((!N1IsConst ||
3990  ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3991  "Splat APInt should be element width");
3992  } else {
3993  N1IsConst = isa<ConstantSDNode>(N1);
3994  if (N1IsConst) {
3995  ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3996  N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3997  }
3998  }
3999 
4000  // fold (mul x, 0) -> 0
4001  if (N1IsConst && ConstValue1.isZero())
4002  return N1;
4003 
4004  // fold (mul x, 1) -> x
4005  if (N1IsConst && ConstValue1.isOne())
4006  return N0;
4007 
4008  if (SDValue NewSel = foldBinOpIntoSelect(N))
4009  return NewSel;
4010 
4011  // fold (mul x, -1) -> 0-x
4012  if (N1IsConst && ConstValue1.isAllOnes())
4013  return DAG.getNegative(N0, DL, VT);
4014 
4015  // fold (mul x, (1 << c)) -> x << c
4016  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4017  DAG.isKnownToBeAPowerOfTwo(N1) &&
4018  (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
4019  SDValue LogBase2 = BuildLogBase2(N1, DL);
4020  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4021  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4022  return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
4023  }
4024 
4025  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4026  if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4027  unsigned Log2Val = (-ConstValue1).logBase2();
4028  // FIXME: If the input is something that is easily negated (e.g. a
4029  // single-use add), we should put the negate there.
4030  return DAG.getNode(ISD::SUB, DL, VT,
4031  DAG.getConstant(0, DL, VT),
4032  DAG.getNode(ISD::SHL, DL, VT, N0,
4033  DAG.getConstant(Log2Val, DL,
4034  getShiftAmountTy(N0.getValueType()))));
4035  }
4036 
4037  // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
4038  // hi result is in use in case we hit this mid-legalization.
4039  for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
4040  if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
4041  SDVTList LoHiVT = DAG.getVTList(VT, VT);
4042  // TODO: Can we match commutable operands with getNodeIfExists?
4043  if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
4044  if (LoHi->hasAnyUseOfValue(1))
4045  return SDValue(LoHi, 0);
4046  if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
4047  if (LoHi->hasAnyUseOfValue(1))
4048  return SDValue(LoHi, 0);
4049  }
4050  }
4051 
4052  // Try to transform:
4053  // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4054  // mul x, (2^N + 1) --> add (shl x, N), x
4055  // mul x, (2^N - 1) --> sub (shl x, N), x
4056  // Examples: x * 33 --> (x << 5) + x
4057  // x * 15 --> (x << 4) - x
4058  // x * -33 --> -((x << 5) + x)
4059  // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4060  // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4061  // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4062  // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4063  // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4064  // x * 0xf800 --> (x << 16) - (x << 11)
4065  // x * -0x8800 --> -((x << 15) + (x << 11))
4066  // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4067  if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4068  // TODO: We could handle more general decomposition of any constant by
4069  // having the target set a limit on number of ops and making a
4070  // callback to determine that sequence (similar to sqrt expansion).
4071  unsigned MathOp = ISD::DELETED_NODE;
4072  APInt MulC = ConstValue1.abs();
4073  // The constant `2` should be treated as (2^0 + 1).
4074  unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
4075  MulC.lshrInPlace(TZeros);
4076  if ((MulC - 1).isPowerOf2())
4077  MathOp = ISD::ADD;
4078  else if ((MulC + 1).isPowerOf2())
4079  MathOp = ISD::SUB;
4080 
4081  if (MathOp != ISD::DELETED_NODE) {
4082  unsigned ShAmt =
4083  MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4084  ShAmt += TZeros;
4085  assert(ShAmt < VT.getScalarSizeInBits() &&
4086  "multiply-by-constant generated out of bounds shift");
4087  SDValue Shl =
4088  DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4089  SDValue R =
4090  TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4091  DAG.getNode(ISD::SHL, DL, VT, N0,
4092  DAG.getConstant(TZeros, DL, VT)))
4093  : DAG.getNode(MathOp, DL, VT, Shl, N0);
4094  if (ConstValue1.isNegative())
4095  R = DAG.getNegative(R, DL, VT);
4096  return R;
4097  }
4098  }
4099 
4100  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4101  if (N0.getOpcode() == ISD::SHL) {
4102  SDValue N01 = N0.getOperand(1);
4103  if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
4104  return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
4105  }
4106 
4107  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4108  // use.
4109  {
4110  SDValue Sh, Y;
4111 
4112  // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
4113  if (N0.getOpcode() == ISD::SHL &&
4115  Sh = N0; Y = N1;
4116  } else if (N1.getOpcode() == ISD::SHL &&
4118  N1->hasOneUse()) {
4119  Sh = N1; Y = N0;
4120  }
4121 
4122  if (Sh.getNode()) {
4123  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
4124  return DAG.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
4125  }
4126  }
4127 
4128  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4130  N0.getOpcode() == ISD::ADD &&
4132  isMulAddWithConstProfitable(N, N0, N1))
4133  return DAG.getNode(
4134  ISD::ADD, DL, VT,
4135  DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4136  DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4137 
4138  // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4140  if (N0.getOpcode() == ISD::VSCALE && NC1) {
4141  const APInt &C0 = N0.getConstantOperandAPInt(0);
4142  const APInt &C1 = NC1->getAPIntValue();
4143  return DAG.getVScale(DL, VT, C0 * C1);
4144  }
4145 
4146  // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4147  APInt MulVal;
4148  if (N0.getOpcode() == ISD::STEP_VECTOR &&
4149  ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4150  const APInt &C0 = N0.getConstantOperandAPInt(0);
4151  APInt NewStep = C0 * MulVal;
4152  return DAG.getStepVector(DL, VT, NewStep);
4153  }
4154 
4155  // Fold ((mul x, 0/undef) -> 0,
4156  // (mul x, 1) -> x) -> x)
4157  // -> and(x, mask)
4158  // We can replace vectors with '0' and '1' factors with a clearing mask.
4159  if (VT.isFixedLengthVector()) {
4160  unsigned NumElts = VT.getVectorNumElements();
4161  SmallBitVector ClearMask;
4162  ClearMask.reserve(NumElts);
4163  auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4164  if (!V || V->isZero()) {
4165  ClearMask.push_back(true);
4166  return true;
4167  }
4168  ClearMask.push_back(false);
4169  return V->isOne();
4170  };
4171  if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4172  ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4173  assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4174  EVT LegalSVT = N1.getOperand(0).getValueType();
4175  SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4176  SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4177  SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
4178  for (unsigned I = 0; I != NumElts; ++I)
4179  if (ClearMask[I])
4180  Mask[I] = Zero;
4181  return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4182  }
4183  }
4184 
4185  // reassociate mul
4186  if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4187  return RMUL;
4188 
4189  // Simplify the operands using demanded-bits information.
4190  if (SimplifyDemandedBits(SDValue(N, 0)))
4191  return SDValue(N, 0);
4192 
4193  return SDValue();
4194 }
4195 
4196 /// Return true if divmod libcall is available.
4197 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
4198  const TargetLowering &TLI) {
4199  RTLIB::Libcall LC;
4200  EVT NodeType = Node->getValueType(0);
4201  if (!NodeType.isSimple())
4202  return false;
4203  switch (NodeType.getSimpleVT().SimpleTy) {
4204  default: return false; // No libcall for vector types.
4205  case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4206  case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4207  case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4208  case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4209  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4210  }
4211 
4212  return TLI.getLibcallName(LC) != nullptr;
4213 }
4214 
4215 /// Issue divrem if both quotient and remainder are needed.
4216 SDValue DAGCombiner::useDivRem(SDNode *Node) {
4217  if (Node->use_empty())
4218  return SDValue(); // This is a dead node, leave it alone.
4219 
4220  unsigned Opcode = Node->getOpcode();
4221  bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4222  unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4223 
4224  // DivMod lib calls can still work on non-legal types if using lib-calls.
4225  EVT VT = Node->getValueType(0);
4226  if (VT.isVector() || !VT.isInteger())
4227  return SDValue();
4228 
4229  if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4230  return SDValue();
4231 
4232  // If DIVREM is going to get expanded into a libcall,
4233  // but there is no libcall available, then don't combine.
4234  if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4235  !isDivRemLibcallAvailable(Node, isSigned, TLI))
4236  return SDValue();
4237 
4238  // If div is legal, it's better to do the normal expansion
4239  unsigned OtherOpcode = 0;
4240  if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4241  OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4242  if (TLI.isOperationLegalOrCustom(Opcode, VT))
4243  return SDValue();
4244  } else {
4245  OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4246  if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4247  return SDValue();
4248  }
4249 
4250  SDValue Op0 = Node->getOperand(0);
4251  SDValue Op1 = Node->getOperand(1);
4252  SDValue combined;
4253  for (SDNode *User : Op0->uses()) {
4254  if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4255  User->use_empty())
4256  continue;
4257  // Convert the other matching node(s), too;
4258  // otherwise, the DIVREM may get target-legalized into something
4259  // target-specific that we won't be able to recognize.
4260  unsigned UserOpc = User->getOpcode();
4261  if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4262  User->getOperand(0) == Op0 &&
4263  User->getOperand(1) == Op1) {
4264  if (!combined) {
4265  if (UserOpc == OtherOpcode) {
4266  SDVTList VTs = DAG.getVTList(VT, VT);
4267  combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4268  } else if (UserOpc == DivRemOpc) {
4269  combined = SDValue(User, 0);
4270  } else {
4271  assert(UserOpc == Opcode);
4272  continue;
4273  }
4274  }
4275  if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4276  CombineTo(User, combined);
4277  else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4278  CombineTo(User, combined.getValue(1));
4279  }
4280  }
4281  return combined;
4282 }
4283 
4285  SDValue N0 = N->getOperand(0);
4286  SDValue N1 = N->getOperand(1);
4287  EVT VT = N->getValueType(0);
4288  SDLoc DL(N);
4289 
4290  unsigned Opc = N->getOpcode();
4291  bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4293 
4294  // X / undef -> undef
4295  // X % undef -> undef
4296  // X / 0 -> undef
4297  // X % 0 -> undef
4298  // NOTE: This includes vectors where any divisor element is zero/undef.
4299  if (DAG.isUndef(Opc, {N0, N1}))
4300  return DAG.getUNDEF(VT);
4301 
4302  // undef / X -> 0
4303  // undef % X -> 0
4304  if (N0.isUndef())
4305  return DAG.getConstant(0, DL, VT);
4306 
4307  // 0 / X -> 0
4308  // 0 % X -> 0
4310  if (N0C && N0C->isZero())
4311  return N0;
4312 
4313  // X / X -> 1
4314  // X % X -> 0
4315  if (N0 == N1)
4316  return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4317 
4318  // X / 1 -> X
4319  // X % 1 -> 0
4320  // If this is a boolean op (single-bit element type), we can't have
4321  // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4322  // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4323  // it's a 1.
4324  if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4325  return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4326 
4327  return SDValue();
4328 }
4329 
4330 SDValue DAGCombiner::visitSDIV(SDNode *N) {
4331  SDValue N0 = N->getOperand(0);
4332  SDValue N1 = N->getOperand(1);
4333  EVT VT = N->getValueType(0);
4334  EVT CCVT = getSetCCResultType(VT);
4335  SDLoc DL(N);
4336 
4337  // fold (sdiv c1, c2) -> c1/c2
4338  if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4339  return C;
4340 
4341  // fold vector ops
4342  if (VT.isVector())
4343  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4344  return FoldedVOp;
4345 
4346  // fold (sdiv X, -1) -> 0-X
4348  if (N1C && N1C->isAllOnes())
4349  return DAG.getNegative(N0, DL, VT);
4350 
4351  // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4352  if (N1C && N1C->getAPIntValue().isMinSignedValue())
4353  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4354  DAG.getConstant(1, DL, VT),
4355  DAG.getConstant(0, DL, VT));
4356 
4357  if (SDValue V = simplifyDivRem(N, DAG))
4358  return V;
4359 
4360  if (SDValue NewSel = foldBinOpIntoSelect(N))
4361  return NewSel;
4362 
4363  // If we know the sign bits of both operands are zero, strength reduce to a
4364  // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
4365  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4366  return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4367 
4368  if (SDValue V = visitSDIVLike(N0, N1, N)) {
4369  // If the corresponding remainder node exists, update its users with
4370  // (Dividend - (Quotient * Divisor).
4371  if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4372  { N0, N1 })) {
4373  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4374  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4375  AddToWorklist(Mul.getNode());
4376  AddToWorklist(Sub.getNode());
4377  CombineTo(RemNode, Sub);
4378  }
4379  return V;
4380  }
4381 
4382  // sdiv, srem -> sdivrem
4383  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4384  // true. Otherwise, we break the simplification logic in visitREM().
4386  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4387  if (SDValue DivRem = useDivRem(N))
4388  return DivRem;
4389 
4390  return SDValue();
4391 }
4392 
4393 static bool isDivisorPowerOfTwo(SDValue Divisor) {
4394  // Helper for determining whether a value is a power-2 constant scalar or a
4395  // vector of such elements.
4396  auto IsPowerOfTwo = [](ConstantSDNode *C) {
4397  if (C->isZero() || C->isOpaque())
4398  return false;
4399  if (C->getAPIntValue().isPowerOf2())
4400  return true;
4401  if (C->getAPIntValue().isNegatedPowerOf2())
4402  return true;
4403  return false;
4404  };
4405 
4406  return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
4407 }
4408 
4409 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4410  SDLoc DL(N);
4411  EVT VT = N->getValueType(0);
4412  EVT CCVT = getSetCCResultType(VT);
4413  unsigned BitWidth = VT.getScalarSizeInBits();
4