LLVM  10.0.0svn
DAGCombiner.cpp
Go to the documentation of this file.
1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10 // both before and after the DAG is legalized.
11 //
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
48 #include "llvm/IR/Attributes.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/DataLayout.h"
51 #include "llvm/IR/DerivedTypes.h"
52 #include "llvm/IR/Function.h"
53 #include "llvm/IR/LLVMContext.h"
54 #include "llvm/IR/Metadata.h"
55 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/CodeGen.h"
58 #include "llvm/Support/Compiler.h"
59 #include "llvm/Support/Debug.h"
61 #include "llvm/Support/KnownBits.h"
67 #include <algorithm>
68 #include <cassert>
69 #include <cstdint>
70 #include <functional>
71 #include <iterator>
72 #include <string>
73 #include <tuple>
74 #include <utility>
75 
76 using namespace llvm;
77 
78 #define DEBUG_TYPE "dagcombine"
79 
80 STATISTIC(NodesCombined , "Number of dag nodes combined");
81 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
82 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
83 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
84 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
85 STATISTIC(SlicedLoads, "Number of load sliced");
86 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
87 
88 static cl::opt<bool>
89 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
90  cl::desc("Enable DAG combiner's use of IR alias analysis"));
91 
92 static cl::opt<bool>
93 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
94  cl::desc("Enable DAG combiner's use of TBAA"));
95 
96 #ifndef NDEBUG
98 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
99  cl::desc("Only use DAG-combiner alias analysis in this"
100  " function"));
101 #endif
102 
103 /// Hidden option to stress test load slicing, i.e., when this option
104 /// is enabled, load slicing bypasses most of its profitability guards.
105 static cl::opt<bool>
106 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
107  cl::desc("Bypass the profitability model of load slicing"),
108  cl::init(false));
109 
110 static cl::opt<bool>
111  MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
112  cl::desc("DAG combiner may split indexing from loads"));
113 
115  "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
116  cl::desc("Limit the number of operands to inline for Token Factors"));
117 
118 namespace {
119 
120  class DAGCombiner {
121  SelectionDAG &DAG;
122  const TargetLowering &TLI;
124  CodeGenOpt::Level OptLevel;
125  bool LegalOperations = false;
126  bool LegalTypes = false;
127  bool ForCodeSize;
128 
129  /// Worklist of all of the nodes that need to be simplified.
130  ///
131  /// This must behave as a stack -- new nodes to process are pushed onto the
132  /// back and when processing we pop off of the back.
133  ///
134  /// The worklist will not contain duplicates but may contain null entries
135  /// due to nodes being deleted from the underlying DAG.
136  SmallVector<SDNode *, 64> Worklist;
137 
138  /// Mapping from an SDNode to its position on the worklist.
139  ///
140  /// This is used to find and remove nodes from the worklist (by nulling
141  /// them) when they are deleted from the underlying DAG. It relies on
142  /// stable indices of nodes within the worklist.
143  DenseMap<SDNode *, unsigned> WorklistMap;
144  /// This records all nodes attempted to add to the worklist since we
145  /// considered a new worklist entry. As we keep do not add duplicate nodes
146  /// in the worklist, this is different from the tail of the worklist.
147  SmallSetVector<SDNode *, 32> PruningList;
148 
149  /// Set of nodes which have been combined (at least once).
150  ///
151  /// This is used to allow us to reliably add any operands of a DAG node
152  /// which have not yet been combined to the worklist.
153  SmallPtrSet<SDNode *, 32> CombinedNodes;
154 
155  // AA - Used for DAG load/store alias analysis.
156  AliasAnalysis *AA;
157 
158  /// When an instruction is simplified, add all users of the instruction to
159  /// the work lists because they might get more simplified now.
160  void AddUsersToWorklist(SDNode *N) {
161  for (SDNode *Node : N->uses())
162  AddToWorklist(Node);
163  }
164 
165  // Prune potentially dangling nodes. This is called after
166  // any visit to a node, but should also be called during a visit after any
167  // failed combine which may have created a DAG node.
168  void clearAddedDanglingWorklistEntries() {
169  // Check any nodes added to the worklist to see if they are prunable.
170  while (!PruningList.empty()) {
171  auto *N = PruningList.pop_back_val();
172  if (N->use_empty())
173  recursivelyDeleteUnusedNodes(N);
174  }
175  }
176 
177  SDNode *getNextWorklistEntry() {
178  // Before we do any work, remove nodes that are not in use.
179  clearAddedDanglingWorklistEntries();
180  SDNode *N = nullptr;
181  // The Worklist holds the SDNodes in order, but it may contain null
182  // entries.
183  while (!N && !Worklist.empty()) {
184  N = Worklist.pop_back_val();
185  }
186 
187  if (N) {
188  bool GoodWorklistEntry = WorklistMap.erase(N);
189  (void)GoodWorklistEntry;
190  assert(GoodWorklistEntry &&
191  "Found a worklist entry without a corresponding map entry!");
192  }
193  return N;
194  }
195 
196  /// Call the node-specific routine that folds each particular type of node.
197  SDValue visit(SDNode *N);
198 
199  public:
200  DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
201  : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
202  OptLevel(OL), AA(AA) {
203  ForCodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
204 
205  MaximumLegalStoreInBits = 0;
206  for (MVT VT : MVT::all_valuetypes())
207  if (EVT(VT).isSimple() && VT != MVT::Other &&
208  TLI.isTypeLegal(EVT(VT)) &&
209  VT.getSizeInBits() >= MaximumLegalStoreInBits)
210  MaximumLegalStoreInBits = VT.getSizeInBits();
211  }
212 
213  void ConsiderForPruning(SDNode *N) {
214  // Mark this for potential pruning.
215  PruningList.insert(N);
216  }
217 
218  /// Add to the worklist making sure its instance is at the back (next to be
219  /// processed.)
220  void AddToWorklist(SDNode *N) {
222  "Deleted Node added to Worklist");
223 
224  // Skip handle nodes as they can't usefully be combined and confuse the
225  // zero-use deletion strategy.
226  if (N->getOpcode() == ISD::HANDLENODE)
227  return;
228 
229  ConsiderForPruning(N);
230 
231  if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
232  Worklist.push_back(N);
233  }
234 
235  /// Remove all instances of N from the worklist.
236  void removeFromWorklist(SDNode *N) {
237  CombinedNodes.erase(N);
238  PruningList.remove(N);
239 
240  auto It = WorklistMap.find(N);
241  if (It == WorklistMap.end())
242  return; // Not in the worklist.
243 
244  // Null out the entry rather than erasing it to avoid a linear operation.
245  Worklist[It->second] = nullptr;
246  WorklistMap.erase(It);
247  }
248 
249  void deleteAndRecombine(SDNode *N);
250  bool recursivelyDeleteUnusedNodes(SDNode *N);
251 
252  /// Replaces all uses of the results of one DAG node with new values.
253  SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
254  bool AddTo = true);
255 
256  /// Replaces all uses of the results of one DAG node with new values.
257  SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
258  return CombineTo(N, &Res, 1, AddTo);
259  }
260 
261  /// Replaces all uses of the results of one DAG node with new values.
262  SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
263  bool AddTo = true) {
264  SDValue To[] = { Res0, Res1 };
265  return CombineTo(N, To, 2, AddTo);
266  }
267 
268  void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
269 
270  private:
271  unsigned MaximumLegalStoreInBits;
272 
273  /// Check the specified integer node value to see if it can be simplified or
274  /// if things it uses can be simplified by bit propagation.
275  /// If so, return true.
276  bool SimplifyDemandedBits(SDValue Op) {
277  unsigned BitWidth = Op.getScalarValueSizeInBits();
279  return SimplifyDemandedBits(Op, DemandedBits);
280  }
281 
282  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
283  EVT VT = Op.getValueType();
284  unsigned NumElts = VT.isVector() ? VT.getVectorNumElements() : 1;
285  APInt DemandedElts = APInt::getAllOnesValue(NumElts);
286  return SimplifyDemandedBits(Op, DemandedBits, DemandedElts);
287  }
288 
289  /// Check the specified vector node value to see if it can be simplified or
290  /// if things it uses can be simplified as it only uses some of the
291  /// elements. If so, return true.
292  bool SimplifyDemandedVectorElts(SDValue Op) {
293  unsigned NumElts = Op.getValueType().getVectorNumElements();
294  APInt DemandedElts = APInt::getAllOnesValue(NumElts);
295  return SimplifyDemandedVectorElts(Op, DemandedElts);
296  }
297 
298  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
299  const APInt &DemandedElts);
300  bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
301  bool AssumeSingleUse = false);
302 
303  bool CombineToPreIndexedLoadStore(SDNode *N);
304  bool CombineToPostIndexedLoadStore(SDNode *N);
305  SDValue SplitIndexingFromLoad(LoadSDNode *LD);
306  bool SliceUpLoad(SDNode *N);
307 
308  // Scalars have size 0 to distinguish from singleton vectors.
309  SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
310  bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
311  bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
312 
313  /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
314  /// load.
315  ///
316  /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
317  /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
318  /// \param EltNo index of the vector element to load.
319  /// \param OriginalLoad load that EVE came from to be replaced.
320  /// \returns EVE on success SDValue() on failure.
321  SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
322  SDValue EltNo,
323  LoadSDNode *OriginalLoad);
324  void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
325  SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
326  SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
327  SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
328  SDValue PromoteIntBinOp(SDValue Op);
329  SDValue PromoteIntShiftOp(SDValue Op);
330  SDValue PromoteExtend(SDValue Op);
331  bool PromoteLoad(SDValue Op);
332 
333  /// Call the node-specific routine that knows how to fold each
334  /// particular type of node. If that doesn't do anything, try the
335  /// target-specific DAG combines.
336  SDValue combine(SDNode *N);
337 
338  // Visitation implementation - Implement dag node combining for different
339  // node types. The semantics are as follows:
340  // Return Value:
341  // SDValue.getNode() == 0 - No change was made
342  // SDValue.getNode() == N - N was replaced, is dead and has been handled.
343  // otherwise - N should be replaced by the returned Operand.
344  //
345  SDValue visitTokenFactor(SDNode *N);
346  SDValue visitMERGE_VALUES(SDNode *N);
347  SDValue visitADD(SDNode *N);
348  SDValue visitADDLike(SDNode *N);
349  SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
350  SDValue visitSUB(SDNode *N);
351  SDValue visitADDSAT(SDNode *N);
352  SDValue visitSUBSAT(SDNode *N);
353  SDValue visitADDC(SDNode *N);
354  SDValue visitADDO(SDNode *N);
355  SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
356  SDValue visitSUBC(SDNode *N);
357  SDValue visitSUBO(SDNode *N);
358  SDValue visitADDE(SDNode *N);
359  SDValue visitADDCARRY(SDNode *N);
360  SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
361  SDValue visitSUBE(SDNode *N);
362  SDValue visitSUBCARRY(SDNode *N);
363  SDValue visitMUL(SDNode *N);
364  SDValue useDivRem(SDNode *N);
365  SDValue visitSDIV(SDNode *N);
366  SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
367  SDValue visitUDIV(SDNode *N);
368  SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
369  SDValue visitREM(SDNode *N);
370  SDValue visitMULHU(SDNode *N);
371  SDValue visitMULHS(SDNode *N);
372  SDValue visitSMUL_LOHI(SDNode *N);
373  SDValue visitUMUL_LOHI(SDNode *N);
374  SDValue visitMULO(SDNode *N);
375  SDValue visitIMINMAX(SDNode *N);
376  SDValue visitAND(SDNode *N);
377  SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
378  SDValue visitOR(SDNode *N);
379  SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
380  SDValue visitXOR(SDNode *N);
381  SDValue SimplifyVBinOp(SDNode *N);
382  SDValue visitSHL(SDNode *N);
383  SDValue visitSRA(SDNode *N);
384  SDValue visitSRL(SDNode *N);
385  SDValue visitFunnelShift(SDNode *N);
386  SDValue visitRotate(SDNode *N);
387  SDValue visitABS(SDNode *N);
388  SDValue visitBSWAP(SDNode *N);
389  SDValue visitBITREVERSE(SDNode *N);
390  SDValue visitCTLZ(SDNode *N);
391  SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
392  SDValue visitCTTZ(SDNode *N);
393  SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
394  SDValue visitCTPOP(SDNode *N);
395  SDValue visitSELECT(SDNode *N);
396  SDValue visitVSELECT(SDNode *N);
397  SDValue visitSELECT_CC(SDNode *N);
398  SDValue visitSETCC(SDNode *N);
399  SDValue visitSETCCCARRY(SDNode *N);
400  SDValue visitSIGN_EXTEND(SDNode *N);
401  SDValue visitZERO_EXTEND(SDNode *N);
402  SDValue visitANY_EXTEND(SDNode *N);
403  SDValue visitAssertExt(SDNode *N);
404  SDValue visitSIGN_EXTEND_INREG(SDNode *N);
405  SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
406  SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
407  SDValue visitTRUNCATE(SDNode *N);
408  SDValue visitBITCAST(SDNode *N);
409  SDValue visitBUILD_PAIR(SDNode *N);
410  SDValue visitFADD(SDNode *N);
411  SDValue visitFSUB(SDNode *N);
412  SDValue visitFMUL(SDNode *N);
413  SDValue visitFMA(SDNode *N);
414  SDValue visitFDIV(SDNode *N);
415  SDValue visitFREM(SDNode *N);
416  SDValue visitFSQRT(SDNode *N);
417  SDValue visitFCOPYSIGN(SDNode *N);
418  SDValue visitFPOW(SDNode *N);
419  SDValue visitSINT_TO_FP(SDNode *N);
420  SDValue visitUINT_TO_FP(SDNode *N);
421  SDValue visitFP_TO_SINT(SDNode *N);
422  SDValue visitFP_TO_UINT(SDNode *N);
423  SDValue visitFP_ROUND(SDNode *N);
424  SDValue visitFP_ROUND_INREG(SDNode *N);
425  SDValue visitFP_EXTEND(SDNode *N);
426  SDValue visitFNEG(SDNode *N);
427  SDValue visitFABS(SDNode *N);
428  SDValue visitFCEIL(SDNode *N);
429  SDValue visitFTRUNC(SDNode *N);
430  SDValue visitFFLOOR(SDNode *N);
431  SDValue visitFMINNUM(SDNode *N);
432  SDValue visitFMAXNUM(SDNode *N);
433  SDValue visitFMINIMUM(SDNode *N);
434  SDValue visitFMAXIMUM(SDNode *N);
435  SDValue visitBRCOND(SDNode *N);
436  SDValue visitBR_CC(SDNode *N);
437  SDValue visitLOAD(SDNode *N);
438 
439  SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
440  SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
441 
442  SDValue visitSTORE(SDNode *N);
443  SDValue visitLIFETIME_END(SDNode *N);
444  SDValue visitINSERT_VECTOR_ELT(SDNode *N);
445  SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
446  SDValue visitBUILD_VECTOR(SDNode *N);
447  SDValue visitCONCAT_VECTORS(SDNode *N);
448  SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
449  SDValue visitVECTOR_SHUFFLE(SDNode *N);
450  SDValue visitSCALAR_TO_VECTOR(SDNode *N);
451  SDValue visitINSERT_SUBVECTOR(SDNode *N);
452  SDValue visitMLOAD(SDNode *N);
453  SDValue visitMSTORE(SDNode *N);
454  SDValue visitMGATHER(SDNode *N);
455  SDValue visitMSCATTER(SDNode *N);
456  SDValue visitFP_TO_FP16(SDNode *N);
457  SDValue visitFP16_TO_FP(SDNode *N);
458  SDValue visitVECREDUCE(SDNode *N);
459 
460  SDValue visitFADDForFMACombine(SDNode *N);
461  SDValue visitFSUBForFMACombine(SDNode *N);
462  SDValue visitFMULForFMADistributiveCombine(SDNode *N);
463 
464  SDValue XformToShuffleWithZero(SDNode *N);
465  bool reassociationCanBreakAddressingModePattern(unsigned Opc,
466  const SDLoc &DL, SDValue N0,
467  SDValue N1);
468  SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
469  SDValue N1);
470  SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
471  SDValue N1, SDNodeFlags Flags);
472 
473  SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
474 
475  SDValue foldSelectOfConstants(SDNode *N);
476  SDValue foldVSelectOfConstants(SDNode *N);
477  SDValue foldBinOpIntoSelect(SDNode *BO);
478  bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
479  SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
480  SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
481  SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
482  SDValue N2, SDValue N3, ISD::CondCode CC,
483  bool NotExtCompare = false);
484  SDValue convertSelectOfFPConstantsToLoadOffset(
485  const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
486  ISD::CondCode CC);
487  SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
488  SDValue N2, SDValue N3, ISD::CondCode CC);
489  SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
490  const SDLoc &DL);
491  SDValue unfoldMaskedMerge(SDNode *N);
492  SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
493  SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
494  const SDLoc &DL, bool foldBooleans);
495  SDValue rebuildSetCC(SDValue N);
496 
497  bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
498  SDValue &CC) const;
499  bool isOneUseSetCC(SDValue N) const;
500 
501  SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
502  unsigned HiOp);
503  SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
504  SDValue CombineExtLoad(SDNode *N);
505  SDValue CombineZExtLogicopShiftLoad(SDNode *N);
506  SDValue combineRepeatedFPDivisors(SDNode *N);
507  SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
508  SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
509  SDValue BuildSDIV(SDNode *N);
510  SDValue BuildSDIVPow2(SDNode *N);
511  SDValue BuildUDIV(SDNode *N);
512  SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
513  SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
514  SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
515  SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
516  SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
517  SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
518  SDNodeFlags Flags, bool Reciprocal);
519  SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
520  SDNodeFlags Flags, bool Reciprocal);
521  SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
522  bool DemandHighBits = true);
523  SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
524  SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
525  SDValue InnerPos, SDValue InnerNeg,
526  unsigned PosOpcode, unsigned NegOpcode,
527  const SDLoc &DL);
528  SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
529  SDValue MatchLoadCombine(SDNode *N);
530  SDValue MatchStoreCombine(StoreSDNode *N);
531  SDValue ReduceLoadWidth(SDNode *N);
532  SDValue ReduceLoadOpStoreWidth(SDNode *N);
534  SDValue TransformFPLoadStorePair(SDNode *N);
535  SDValue convertBuildVecZextToZext(SDNode *N);
536  SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
537  SDValue reduceBuildVecToShuffle(SDNode *N);
538  SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
539  ArrayRef<int> VectorMask, SDValue VecIn1,
540  SDValue VecIn2, unsigned LeftIdx,
541  bool DidSplitVec);
542  SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
543 
544  /// Walk up chain skipping non-aliasing memory nodes,
545  /// looking for aliasing nodes and adding them to the Aliases vector.
546  void GatherAllAliases(SDNode *N, SDValue OriginalChain,
547  SmallVectorImpl<SDValue> &Aliases);
548 
549  /// Return true if there is any possibility that the two addresses overlap.
550  bool isAlias(SDNode *Op0, SDNode *Op1) const;
551 
552  /// Walk up chain skipping non-aliasing memory nodes, looking for a better
553  /// chain (aliasing node.)
554  SDValue FindBetterChain(SDNode *N, SDValue Chain);
555 
556  /// Try to replace a store and any possibly adjacent stores on
557  /// consecutive chains with better chains. Return true only if St is
558  /// replaced.
559  ///
560  /// Notice that other chains may still be replaced even if the function
561  /// returns false.
562  bool findBetterNeighborChains(StoreSDNode *St);
563 
564  // Helper for findBetterNeighborChains. Walk up store chain add additional
565  // chained stores that do not overlap and can be parallelized.
566  bool parallelizeChainedStores(StoreSDNode *St);
567 
568  /// Holds a pointer to an LSBaseSDNode as well as information on where it
569  /// is located in a sequence of memory operations connected by a chain.
570  struct MemOpLink {
571  // Ptr to the mem node.
572  LSBaseSDNode *MemNode;
573 
574  // Offset from the base ptr.
575  int64_t OffsetFromBase;
576 
577  MemOpLink(LSBaseSDNode *N, int64_t Offset)
578  : MemNode(N), OffsetFromBase(Offset) {}
579  };
580 
581  /// This is a helper function for visitMUL to check the profitability
582  /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
583  /// MulNode is the original multiply, AddNode is (add x, c1),
584  /// and ConstNode is c2.
585  bool isMulAddWithConstProfitable(SDNode *MulNode,
586  SDValue &AddNode,
587  SDValue &ConstNode);
588 
589  /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
590  /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
591  /// the type of the loaded value to be extended.
592  bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
593  EVT LoadResultTy, EVT &ExtVT);
594 
595  /// Helper function to calculate whether the given Load/Store can have its
596  /// width reduced to ExtVT.
597  bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
598  EVT &MemVT, unsigned ShAmt = 0);
599 
600  /// Used by BackwardsPropagateMask to find suitable loads.
601  bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
602  SmallPtrSetImpl<SDNode*> &NodesWithConsts,
603  ConstantSDNode *Mask, SDNode *&NodeToMask);
604  /// Attempt to propagate a given AND node back to load leaves so that they
605  /// can be combined into narrow loads.
606  bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
607 
608  /// Helper function for MergeConsecutiveStores which merges the
609  /// component store chains.
610  SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
611  unsigned NumStores);
612 
613  /// This is a helper function for MergeConsecutiveStores. When the
614  /// source elements of the consecutive stores are all constants or
615  /// all extracted vector elements, try to merge them into one
616  /// larger store introducing bitcasts if necessary. \return True
617  /// if a merged store was created.
618  bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
619  EVT MemVT, unsigned NumStores,
620  bool IsConstantSrc, bool UseVector,
621  bool UseTrunc);
622 
623  /// This is a helper function for MergeConsecutiveStores. Stores
624  /// that potentially may be merged with St are placed in
625  /// StoreNodes. RootNode is a chain predecessor to all store
626  /// candidates.
627  void getStoreMergeCandidates(StoreSDNode *St,
628  SmallVectorImpl<MemOpLink> &StoreNodes,
629  SDNode *&Root);
630 
631  /// Helper function for MergeConsecutiveStores. Checks if
632  /// candidate stores have indirect dependency through their
633  /// operands. RootNode is the predecessor to all stores calculated
634  /// by getStoreMergeCandidates and is used to prune the dependency check.
635  /// \return True if safe to merge.
636  bool checkMergeStoreCandidatesForDependencies(
637  SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
638  SDNode *RootNode);
639 
640  /// Merge consecutive store operations into a wide store.
641  /// This optimization uses wide integers or vectors when possible.
642  /// \return number of stores that were merged into a merged store (the
643  /// affected nodes are stored as a prefix in \p StoreNodes).
644  bool MergeConsecutiveStores(StoreSDNode *St);
645 
646  /// Try to transform a truncation where C is a constant:
647  /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
648  ///
649  /// \p N needs to be a truncation and its first operand an AND. Other
650  /// requirements are checked by the function (e.g. that trunc is
651  /// single-use) and if missed an empty SDValue is returned.
652  SDValue distributeTruncateThroughAnd(SDNode *N);
653 
654  /// Helper function to determine whether the target supports operation
655  /// given by \p Opcode for type \p VT, that is, whether the operation
656  /// is legal or custom before legalizing operations, and whether is
657  /// legal (but not custom) after legalization.
658  bool hasOperation(unsigned Opcode, EVT VT) {
659  if (LegalOperations)
660  return TLI.isOperationLegal(Opcode, VT);
661  return TLI.isOperationLegalOrCustom(Opcode, VT);
662  }
663 
664  public:
665  /// Runs the dag combiner on all nodes in the work list
666  void Run(CombineLevel AtLevel);
667 
668  SelectionDAG &getDAG() const { return DAG; }
669 
670  /// Returns a type large enough to hold any valid shift amount - before type
671  /// legalization these can be huge.
672  EVT getShiftAmountTy(EVT LHSTy) {
673  assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
674  return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
675  }
676 
677  /// This method returns true if we are running before type legalization or
678  /// if the specified VT is legal.
679  bool isTypeLegal(const EVT &VT) {
680  if (!LegalTypes) return true;
681  return TLI.isTypeLegal(VT);
682  }
683 
684  /// Convenience wrapper around TargetLowering::getSetCCResultType
685  EVT getSetCCResultType(EVT VT) const {
686  return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
687  }
688 
689  void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
690  SDValue OrigLoad, SDValue ExtLoad,
692  };
693 
694 /// This class is a DAGUpdateListener that removes any deleted
695 /// nodes from the worklist.
696 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
697  DAGCombiner &DC;
698 
699 public:
700  explicit WorklistRemover(DAGCombiner &dc)
701  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
702 
703  void NodeDeleted(SDNode *N, SDNode *E) override {
704  DC.removeFromWorklist(N);
705  }
706 };
707 
708 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
709  DAGCombiner &DC;
710 
711 public:
712  explicit WorklistInserter(DAGCombiner &dc)
713  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
714 
715  // FIXME: Ideally we could add N to the worklist, but this causes exponential
716  // compile time costs in large DAGs, e.g. Halide.
717  void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
718 };
719 
720 } // end anonymous namespace
721 
722 //===----------------------------------------------------------------------===//
723 // TargetLowering::DAGCombinerInfo implementation
724 //===----------------------------------------------------------------------===//
725 
727  ((DAGCombiner*)DC)->AddToWorklist(N);
728 }
729 
731 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
732  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
733 }
734 
736 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
737  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
738 }
739 
741 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
742  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
743 }
744 
747  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
748 }
749 
750 //===----------------------------------------------------------------------===//
751 // Helper Functions
752 //===----------------------------------------------------------------------===//
753 
754 void DAGCombiner::deleteAndRecombine(SDNode *N) {
755  removeFromWorklist(N);
756 
757  // If the operands of this node are only used by the node, they will now be
758  // dead. Make sure to re-visit them and recursively delete dead nodes.
759  for (const SDValue &Op : N->ops())
760  // For an operand generating multiple values, one of the values may
761  // become dead allowing further simplification (e.g. split index
762  // arithmetic from an indexed load).
763  if (Op->hasOneUse() || Op->getNumValues() > 1)
764  AddToWorklist(Op.getNode());
765 
766  DAG.DeleteNode(N);
767 }
768 
769 /// Return 1 if we can compute the negated form of the specified expression for
770 /// the same cost as the expression itself, or 2 if we can compute the negated
771 /// form more cheaply than the expression itself.
772 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
773  const TargetLowering &TLI,
774  const TargetOptions *Options,
775  bool ForCodeSize,
776  unsigned Depth = 0) {
777  // fneg is removable even if it has multiple uses.
778  if (Op.getOpcode() == ISD::FNEG)
779  return 2;
780 
781  // Don't allow anything with multiple uses unless we know it is free.
782  EVT VT = Op.getValueType();
783  const SDNodeFlags Flags = Op->getFlags();
784  if (!Op.hasOneUse() &&
785  !(Op.getOpcode() == ISD::FP_EXTEND &&
786  TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
787  return 0;
788 
789  // Don't recurse exponentially.
790  if (Depth > 6)
791  return 0;
792 
793  switch (Op.getOpcode()) {
794  default: return false;
795  case ISD::ConstantFP: {
796  if (!LegalOperations)
797  return 1;
798 
799  // Don't invert constant FP values after legalization unless the target says
800  // the negated constant is legal.
801  return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
802  TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
803  ForCodeSize);
804  }
805  case ISD::BUILD_VECTOR: {
806  // Only permit BUILD_VECTOR of constants.
807  if (llvm::any_of(Op->op_values(), [&](SDValue N) {
808  return !N.isUndef() && !isa<ConstantFPSDNode>(N);
809  }))
810  return 0;
811  if (!LegalOperations)
812  return 1;
813  if (TLI.isOperationLegal(ISD::ConstantFP, VT) &&
815  return 1;
816  return llvm::all_of(Op->op_values(), [&](SDValue N) {
817  return N.isUndef() ||
818  TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
819  ForCodeSize);
820  });
821  }
822  case ISD::FADD:
823  if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
824  return 0;
825 
826  // After operation legalization, it might not be legal to create new FSUBs.
827  if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
828  return 0;
829 
830  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
831  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
832  Options, ForCodeSize, Depth + 1))
833  return V;
834  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
835  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
836  ForCodeSize, Depth + 1);
837  case ISD::FSUB:
838  // We can't turn -(A-B) into B-A when we honor signed zeros.
839  if (!Options->NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
840  return 0;
841 
842  // fold (fneg (fsub A, B)) -> (fsub B, A)
843  return 1;
844 
845  case ISD::FMUL:
846  case ISD::FDIV:
847  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
848  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
849  Options, ForCodeSize, Depth + 1))
850  return V;
851 
852  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
853  ForCodeSize, Depth + 1);
854 
855  case ISD::FP_EXTEND:
856  case ISD::FP_ROUND:
857  case ISD::FSIN:
858  return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
859  ForCodeSize, Depth + 1);
860  }
861 }
862 
863 /// If isNegatibleForFree returns true, return the newly negated expression.
865  bool LegalOperations, bool ForCodeSize,
866  unsigned Depth = 0) {
867  // fneg is removable even if it has multiple uses.
868  if (Op.getOpcode() == ISD::FNEG)
869  return Op.getOperand(0);
870 
871  assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
872  const TargetOptions &Options = DAG.getTarget().Options;
873  const SDNodeFlags Flags = Op->getFlags();
874 
875  switch (Op.getOpcode()) {
876  default: llvm_unreachable("Unknown code");
877  case ISD::ConstantFP: {
878  APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
879  V.changeSign();
880  return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
881  }
882  case ISD::BUILD_VECTOR: {
884  for (SDValue C : Op->op_values()) {
885  if (C.isUndef()) {
886  Ops.push_back(C);
887  continue;
888  }
889  APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
890  V.changeSign();
891  Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType()));
892  }
893  return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);
894  }
895  case ISD::FADD:
896  assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
897 
898  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
899  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
900  DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
901  Depth + 1))
902  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
903  GetNegatedExpression(Op.getOperand(0), DAG,
904  LegalOperations, ForCodeSize,
905  Depth + 1),
906  Op.getOperand(1), Flags);
907  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
908  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
909  GetNegatedExpression(Op.getOperand(1), DAG,
910  LegalOperations, ForCodeSize,
911  Depth + 1),
912  Op.getOperand(0), Flags);
913  case ISD::FSUB:
914  // fold (fneg (fsub 0, B)) -> B
915  if (ConstantFPSDNode *N0CFP =
916  isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true))
917  if (N0CFP->isZero())
918  return Op.getOperand(1);
919 
920  // fold (fneg (fsub A, B)) -> (fsub B, A)
921  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
922  Op.getOperand(1), Op.getOperand(0), Flags);
923 
924  case ISD::FMUL:
925  case ISD::FDIV:
926  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
927  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
928  DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
929  Depth + 1))
930  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
931  GetNegatedExpression(Op.getOperand(0), DAG,
932  LegalOperations, ForCodeSize,
933  Depth + 1),
934  Op.getOperand(1), Flags);
935 
936  // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
937  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
938  Op.getOperand(0),
939  GetNegatedExpression(Op.getOperand(1), DAG,
940  LegalOperations, ForCodeSize,
941  Depth + 1), Flags);
942 
943  case ISD::FP_EXTEND:
944  case ISD::FSIN:
945  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
946  GetNegatedExpression(Op.getOperand(0), DAG,
947  LegalOperations, ForCodeSize,
948  Depth + 1));
949  case ISD::FP_ROUND:
950  return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
951  GetNegatedExpression(Op.getOperand(0), DAG,
952  LegalOperations, ForCodeSize,
953  Depth + 1),
954  Op.getOperand(1));
955  }
956 }
957 
958 // APInts must be the same size for most operations, this helper
959 // function zero extends the shorter of the pair so that they match.
960 // We provide an Offset so that we can create bitwidths that won't overflow.
961 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
962  unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
963  LHS = LHS.zextOrSelf(Bits);
964  RHS = RHS.zextOrSelf(Bits);
965 }
966 
967 // Return true if this node is a setcc, or is a select_cc
968 // that selects between the target values used for true and false, making it
969 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
970 // the appropriate nodes based on the type of node we are checking. This
971 // simplifies life a bit for the callers.
972 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
973  SDValue &CC) const {
974  if (N.getOpcode() == ISD::SETCC) {
975  LHS = N.getOperand(0);
976  RHS = N.getOperand(1);
977  CC = N.getOperand(2);
978  return true;
979  }
980 
981  if (N.getOpcode() != ISD::SELECT_CC ||
982  !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
983  !TLI.isConstFalseVal(N.getOperand(3).getNode()))
984  return false;
985 
986  if (TLI.getBooleanContents(N.getValueType()) ==
988  return false;
989 
990  LHS = N.getOperand(0);
991  RHS = N.getOperand(1);
992  CC = N.getOperand(4);
993  return true;
994 }
995 
996 /// Return true if this is a SetCC-equivalent operation with only one use.
997 /// If this is true, it allows the users to invert the operation for free when
998 /// it is profitable to do so.
999 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1000  SDValue N0, N1, N2;
1001  if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
1002  return true;
1003  return false;
1004 }
1005 
1006 // Returns the SDNode if it is a constant float BuildVector
1007 // or constant float.
1009  if (isa<ConstantFPSDNode>(N))
1010  return N.getNode();
1012  return N.getNode();
1013  return nullptr;
1014 }
1015 
1016 // Determines if it is a constant integer or a build vector of constant
1017 // integers (and undefs).
1018 // Do not permit build vector implicit truncation.
1019 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1020  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
1021  return !(Const->isOpaque() && NoOpaques);
1022  if (N.getOpcode() != ISD::BUILD_VECTOR)
1023  return false;
1024  unsigned BitWidth = N.getScalarValueSizeInBits();
1025  for (const SDValue &Op : N->op_values()) {
1026  if (Op.isUndef())
1027  continue;
1029  if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1030  (Const->isOpaque() && NoOpaques))
1031  return false;
1032  }
1033  return true;
1034 }
1035 
1036 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1037 // undef's.
1038 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1039  if (V.getOpcode() != ISD::BUILD_VECTOR)
1040  return false;
1041  return isConstantOrConstantVector(V, NoOpaques) ||
1043 }
1044 
1045 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1046  const SDLoc &DL,
1047  SDValue N0,
1048  SDValue N1) {
1049  // Currently this only tries to ensure we don't undo the GEP splits done by
1050  // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1051  // we check if the following transformation would be problematic:
1052  // (load/store (add, (add, x, offset1), offset2)) ->
1053  // (load/store (add, x, offset1+offset2)).
1054 
1055  if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1056  return false;
1057 
1058  if (N0.hasOneUse())
1059  return false;
1060 
1061  auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1062  auto *C2 = dyn_cast<ConstantSDNode>(N1);
1063  if (!C1 || !C2)
1064  return false;
1065 
1066  const APInt &C1APIntVal = C1->getAPIntValue();
1067  const APInt &C2APIntVal = C2->getAPIntValue();
1068  if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
1069  return false;
1070 
1071  const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1072  if (CombinedValueIntVal.getBitWidth() > 64)
1073  return false;
1074  const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1075 
1076  for (SDNode *Node : N0->uses()) {
1077  auto LoadStore = dyn_cast<MemSDNode>(Node);
1078  if (LoadStore) {
1079  // Is x[offset2] already not a legal addressing mode? If so then
1080  // reassociating the constants breaks nothing (we test offset2 because
1081  // that's the one we hope to fold into the load or store).
1083  AM.HasBaseReg = true;
1084  AM.BaseOffs = C2APIntVal.getSExtValue();
1085  EVT VT = LoadStore->getMemoryVT();
1086  unsigned AS = LoadStore->getAddressSpace();
1087  Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1088  if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1089  continue;
1090 
1091  // Would x[offset1+offset2] still be a legal addressing mode?
1092  AM.BaseOffs = CombinedValue;
1093  if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1094  return true;
1095  }
1096  }
1097 
1098  return false;
1099 }
1100 
1101 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1102 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1103 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1104  SDValue N0, SDValue N1) {
1105  EVT VT = N0.getValueType();
1106 
1107  if (N0.getOpcode() != Opc)
1108  return SDValue();
1109 
1110  // Don't reassociate reductions.
1111  if (N0->getFlags().hasVectorReduction())
1112  return SDValue();
1113 
1115  if (SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1116  // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1117  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, C1, C2))
1118  return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1119  return SDValue();
1120  }
1121  if (N0.hasOneUse()) {
1122  // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1123  // iff (op x, c1) has one use
1124  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1125  if (!OpNode.getNode())
1126  return SDValue();
1127  AddToWorklist(OpNode.getNode());
1128  return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1129  }
1130  }
1131  return SDValue();
1132 }
1133 
1134 // Try to reassociate commutative binops.
1135 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1136  SDValue N1, SDNodeFlags Flags) {
1137  assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1138  // Don't reassociate reductions.
1139  if (Flags.hasVectorReduction())
1140  return SDValue();
1141 
1142  // Floating-point reassociation is not allowed without loose FP math.
1143  if (N0.getValueType().isFloatingPoint() ||
1145  if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1146  return SDValue();
1147 
1148  if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1149  return Combined;
1150  if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1151  return Combined;
1152  return SDValue();
1153 }
1154 
1155 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1156  bool AddTo) {
1157  assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1158  ++NodesCombined;
1159  LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1160  To[0].getNode()->dump(&DAG);
1161  dbgs() << " and " << NumTo - 1 << " other values\n");
1162  for (unsigned i = 0, e = NumTo; i != e; ++i)
1163  assert((!To[i].getNode() ||
1164  N->getValueType(i) == To[i].getValueType()) &&
1165  "Cannot combine value to value of different type!");
1166 
1167  WorklistRemover DeadNodes(*this);
1168  DAG.ReplaceAllUsesWith(N, To);
1169  if (AddTo) {
1170  // Push the new nodes and any users onto the worklist
1171  for (unsigned i = 0, e = NumTo; i != e; ++i) {
1172  if (To[i].getNode()) {
1173  AddToWorklist(To[i].getNode());
1174  AddUsersToWorklist(To[i].getNode());
1175  }
1176  }
1177  }
1178 
1179  // Finally, if the node is now dead, remove it from the graph. The node
1180  // may not be dead if the replacement process recursively simplified to
1181  // something else needing this node.
1182  if (N->use_empty())
1183  deleteAndRecombine(N);
1184  return SDValue(N, 0);
1185 }
1186 
1187 void DAGCombiner::
1188 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1189  // Replace all uses. If any nodes become isomorphic to other nodes and
1190  // are deleted, make sure to remove them from our worklist.
1191  WorklistRemover DeadNodes(*this);
1192  DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1193 
1194  // Push the new node and any (possibly new) users onto the worklist.
1195  AddToWorklist(TLO.New.getNode());
1196  AddUsersToWorklist(TLO.New.getNode());
1197 
1198  // Finally, if the node is now dead, remove it from the graph. The node
1199  // may not be dead if the replacement process recursively simplified to
1200  // something else needing this node.
1201  if (TLO.Old.getNode()->use_empty())
1202  deleteAndRecombine(TLO.Old.getNode());
1203 }
1204 
1205 /// Check the specified integer node value to see if it can be simplified or if
1206 /// things it uses can be simplified by bit propagation. If so, return true.
1207 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1208  const APInt &DemandedElts) {
1209  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1210  KnownBits Known;
1211  if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO))
1212  return false;
1213 
1214  // Revisit the node.
1215  AddToWorklist(Op.getNode());
1216 
1217  // Replace the old value with the new one.
1218  ++NodesCombined;
1219  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1220  dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1221  dbgs() << '\n');
1222 
1223  CommitTargetLoweringOpt(TLO);
1224  return true;
1225 }
1226 
1227 /// Check the specified vector node value to see if it can be simplified or
1228 /// if things it uses can be simplified as it only uses some of the elements.
1229 /// If so, return true.
1230 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1231  const APInt &DemandedElts,
1232  bool AssumeSingleUse) {
1233  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1234  APInt KnownUndef, KnownZero;
1235  if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1236  TLO, 0, AssumeSingleUse))
1237  return false;
1238 
1239  // Revisit the node.
1240  AddToWorklist(Op.getNode());
1241 
1242  // Replace the old value with the new one.
1243  ++NodesCombined;
1244  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1245  dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1246  dbgs() << '\n');
1247 
1248  CommitTargetLoweringOpt(TLO);
1249  return true;
1250 }
1251 
1252 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1253  SDLoc DL(Load);
1254  EVT VT = Load->getValueType(0);
1255  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1256 
1257  LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1258  Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1259  WorklistRemover DeadNodes(*this);
1260  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1261  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1262  deleteAndRecombine(Load);
1263  AddToWorklist(Trunc.getNode());
1264 }
1265 
1266 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1267  Replace = false;
1268  SDLoc DL(Op);
1269  if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1270  LoadSDNode *LD = cast<LoadSDNode>(Op);
1271  EVT MemVT = LD->getMemoryVT();
1273  : LD->getExtensionType();
1274  Replace = true;
1275  return DAG.getExtLoad(ExtType, DL, PVT,
1276  LD->getChain(), LD->getBasePtr(),
1277  MemVT, LD->getMemOperand());
1278  }
1279 
1280  unsigned Opc = Op.getOpcode();
1281  switch (Opc) {
1282  default: break;
1283  case ISD::AssertSext:
1284  if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1285  return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1286  break;
1287  case ISD::AssertZext:
1288  if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1289  return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1290  break;
1291  case ISD::Constant: {
1292  unsigned ExtOpc =
1294  return DAG.getNode(ExtOpc, DL, PVT, Op);
1295  }
1296  }
1297 
1298  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1299  return SDValue();
1300  return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1301 }
1302 
1303 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1305  return SDValue();
1306  EVT OldVT = Op.getValueType();
1307  SDLoc DL(Op);
1308  bool Replace = false;
1309  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1310  if (!NewOp.getNode())
1311  return SDValue();
1312  AddToWorklist(NewOp.getNode());
1313 
1314  if (Replace)
1315  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1316  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1317  DAG.getValueType(OldVT));
1318 }
1319 
1320 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1321  EVT OldVT = Op.getValueType();
1322  SDLoc DL(Op);
1323  bool Replace = false;
1324  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1325  if (!NewOp.getNode())
1326  return SDValue();
1327  AddToWorklist(NewOp.getNode());
1328 
1329  if (Replace)
1330  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1331  return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1332 }
1333 
1334 /// Promote the specified integer binary operation if the target indicates it is
1335 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1336 /// i32 since i16 instructions are longer.
1337 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1338  if (!LegalOperations)
1339  return SDValue();
1340 
1341  EVT VT = Op.getValueType();
1342  if (VT.isVector() || !VT.isInteger())
1343  return SDValue();
1344 
1345  // If operation type is 'undesirable', e.g. i16 on x86, consider
1346  // promoting it.
1347  unsigned Opc = Op.getOpcode();
1348  if (TLI.isTypeDesirableForOp(Opc, VT))
1349  return SDValue();
1350 
1351  EVT PVT = VT;
1352  // Consult target whether it is a good idea to promote this operation and
1353  // what's the right type to promote it to.
1354  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1355  assert(PVT != VT && "Don't know what type to promote to!");
1356 
1357  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1358 
1359  bool Replace0 = false;
1360  SDValue N0 = Op.getOperand(0);
1361  SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1362 
1363  bool Replace1 = false;
1364  SDValue N1 = Op.getOperand(1);
1365  SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1366  SDLoc DL(Op);
1367 
1368  SDValue RV =
1369  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1370 
1371  // We are always replacing N0/N1's use in N and only need
1372  // additional replacements if there are additional uses.
1373  Replace0 &= !N0->hasOneUse();
1374  Replace1 &= (N0 != N1) && !N1->hasOneUse();
1375 
1376  // Combine Op here so it is preserved past replacements.
1377  CombineTo(Op.getNode(), RV);
1378 
1379  // If operands have a use ordering, make sure we deal with
1380  // predecessor first.
1381  if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1382  std::swap(N0, N1);
1383  std::swap(NN0, NN1);
1384  }
1385 
1386  if (Replace0) {
1387  AddToWorklist(NN0.getNode());
1388  ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1389  }
1390  if (Replace1) {
1391  AddToWorklist(NN1.getNode());
1392  ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1393  }
1394  return Op;
1395  }
1396  return SDValue();
1397 }
1398 
1399 /// Promote the specified integer shift operation if the target indicates it is
1400 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1401 /// i32 since i16 instructions are longer.
1402 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1403  if (!LegalOperations)
1404  return SDValue();
1405 
1406  EVT VT = Op.getValueType();
1407  if (VT.isVector() || !VT.isInteger())
1408  return SDValue();
1409 
1410  // If operation type is 'undesirable', e.g. i16 on x86, consider
1411  // promoting it.
1412  unsigned Opc = Op.getOpcode();
1413  if (TLI.isTypeDesirableForOp(Opc, VT))
1414  return SDValue();
1415 
1416  EVT PVT = VT;
1417  // Consult target whether it is a good idea to promote this operation and
1418  // what's the right type to promote it to.
1419  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1420  assert(PVT != VT && "Don't know what type to promote to!");
1421 
1422  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1423 
1424  bool Replace = false;
1425  SDValue N0 = Op.getOperand(0);
1426  SDValue N1 = Op.getOperand(1);
1427  if (Opc == ISD::SRA)
1428  N0 = SExtPromoteOperand(N0, PVT);
1429  else if (Opc == ISD::SRL)
1430  N0 = ZExtPromoteOperand(N0, PVT);
1431  else
1432  N0 = PromoteOperand(N0, PVT, Replace);
1433 
1434  if (!N0.getNode())
1435  return SDValue();
1436 
1437  SDLoc DL(Op);
1438  SDValue RV =
1439  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1440 
1441  AddToWorklist(N0.getNode());
1442  if (Replace)
1443  ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1444 
1445  // Deal with Op being deleted.
1446  if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1447  return RV;
1448  }
1449  return SDValue();
1450 }
1451 
1452 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1453  if (!LegalOperations)
1454  return SDValue();
1455 
1456  EVT VT = Op.getValueType();
1457  if (VT.isVector() || !VT.isInteger())
1458  return SDValue();
1459 
1460  // If operation type is 'undesirable', e.g. i16 on x86, consider
1461  // promoting it.
1462  unsigned Opc = Op.getOpcode();
1463  if (TLI.isTypeDesirableForOp(Opc, VT))
1464  return SDValue();
1465 
1466  EVT PVT = VT;
1467  // Consult target whether it is a good idea to promote this operation and
1468  // what's the right type to promote it to.
1469  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1470  assert(PVT != VT && "Don't know what type to promote to!");
1471  // fold (aext (aext x)) -> (aext x)
1472  // fold (aext (zext x)) -> (zext x)
1473  // fold (aext (sext x)) -> (sext x)
1474  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1475  return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1476  }
1477  return SDValue();
1478 }
1479 
1480 bool DAGCombiner::PromoteLoad(SDValue Op) {
1481  if (!LegalOperations)
1482  return false;
1483 
1484  if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1485  return false;
1486 
1487  EVT VT = Op.getValueType();
1488  if (VT.isVector() || !VT.isInteger())
1489  return false;
1490 
1491  // If operation type is 'undesirable', e.g. i16 on x86, consider
1492  // promoting it.
1493  unsigned Opc = Op.getOpcode();
1494  if (TLI.isTypeDesirableForOp(Opc, VT))
1495  return false;
1496 
1497  EVT PVT = VT;
1498  // Consult target whether it is a good idea to promote this operation and
1499  // what's the right type to promote it to.
1500  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1501  assert(PVT != VT && "Don't know what type to promote to!");
1502 
1503  SDLoc DL(Op);
1504  SDNode *N = Op.getNode();
1505  LoadSDNode *LD = cast<LoadSDNode>(N);
1506  EVT MemVT = LD->getMemoryVT();
1508  : LD->getExtensionType();
1509  SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1510  LD->getChain(), LD->getBasePtr(),
1511  MemVT, LD->getMemOperand());
1512  SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1513 
1514  LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1515  Result.getNode()->dump(&DAG); dbgs() << '\n');
1516  WorklistRemover DeadNodes(*this);
1517  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1518  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1519  deleteAndRecombine(N);
1520  AddToWorklist(Result.getNode());
1521  return true;
1522  }
1523  return false;
1524 }
1525 
1526 /// Recursively delete a node which has no uses and any operands for
1527 /// which it is the only use.
1528 ///
1529 /// Note that this both deletes the nodes and removes them from the worklist.
1530 /// It also adds any nodes who have had a user deleted to the worklist as they
1531 /// may now have only one use and subject to other combines.
1532 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1533  if (!N->use_empty())
1534  return false;
1535 
1537  Nodes.insert(N);
1538  do {
1539  N = Nodes.pop_back_val();
1540  if (!N)
1541  continue;
1542 
1543  if (N->use_empty()) {
1544  for (const SDValue &ChildN : N->op_values())
1545  Nodes.insert(ChildN.getNode());
1546 
1547  removeFromWorklist(N);
1548  DAG.DeleteNode(N);
1549  } else {
1550  AddToWorklist(N);
1551  }
1552  } while (!Nodes.empty());
1553  return true;
1554 }
1555 
1556 //===----------------------------------------------------------------------===//
1557 // Main DAG Combiner implementation
1558 //===----------------------------------------------------------------------===//
1559 
1560 void DAGCombiner::Run(CombineLevel AtLevel) {
1561  // set the instance variables, so that the various visit routines may use it.
1562  Level = AtLevel;
1563  LegalOperations = Level >= AfterLegalizeVectorOps;
1564  LegalTypes = Level >= AfterLegalizeTypes;
1565 
1566  WorklistInserter AddNodes(*this);
1567 
1568  // Add all the dag nodes to the worklist.
1569  for (SDNode &Node : DAG.allnodes())
1570  AddToWorklist(&Node);
1571 
1572  // Create a dummy node (which is not added to allnodes), that adds a reference
1573  // to the root node, preventing it from being deleted, and tracking any
1574  // changes of the root.
1575  HandleSDNode Dummy(DAG.getRoot());
1576 
1577  // While we have a valid worklist entry node, try to combine it.
1578  while (SDNode *N = getNextWorklistEntry()) {
1579  // If N has no uses, it is dead. Make sure to revisit all N's operands once
1580  // N is deleted from the DAG, since they too may now be dead or may have a
1581  // reduced number of uses, allowing other xforms.
1582  if (recursivelyDeleteUnusedNodes(N))
1583  continue;
1584 
1585  WorklistRemover DeadNodes(*this);
1586 
1587  // If this combine is running after legalizing the DAG, re-legalize any
1588  // nodes pulled off the worklist.
1589  if (Level == AfterLegalizeDAG) {
1590  SmallSetVector<SDNode *, 16> UpdatedNodes;
1591  bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1592 
1593  for (SDNode *LN : UpdatedNodes) {
1594  AddToWorklist(LN);
1595  AddUsersToWorklist(LN);
1596  }
1597  if (!NIsValid)
1598  continue;
1599  }
1600 
1601  LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1602 
1603  // Add any operands of the new node which have not yet been combined to the
1604  // worklist as well. Because the worklist uniques things already, this
1605  // won't repeatedly process the same operand.
1606  CombinedNodes.insert(N);
1607  for (const SDValue &ChildN : N->op_values())
1608  if (!CombinedNodes.count(ChildN.getNode()))
1609  AddToWorklist(ChildN.getNode());
1610 
1611  SDValue RV = combine(N);
1612 
1613  if (!RV.getNode())
1614  continue;
1615 
1616  ++NodesCombined;
1617 
1618  // If we get back the same node we passed in, rather than a new node or
1619  // zero, we know that the node must have defined multiple values and
1620  // CombineTo was used. Since CombineTo takes care of the worklist
1621  // mechanics for us, we have no work to do in this case.
1622  if (RV.getNode() == N)
1623  continue;
1624 
1626  RV.getOpcode() != ISD::DELETED_NODE &&
1627  "Node was deleted but visit returned new node!");
1628 
1629  LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1630 
1631  if (N->getNumValues() == RV.getNode()->getNumValues())
1632  DAG.ReplaceAllUsesWith(N, RV.getNode());
1633  else {
1634  assert(N->getValueType(0) == RV.getValueType() &&
1635  N->getNumValues() == 1 && "Type mismatch");
1636  DAG.ReplaceAllUsesWith(N, &RV);
1637  }
1638 
1639  // Push the new node and any users onto the worklist
1640  AddToWorklist(RV.getNode());
1641  AddUsersToWorklist(RV.getNode());
1642 
1643  // Finally, if the node is now dead, remove it from the graph. The node
1644  // may not be dead if the replacement process recursively simplified to
1645  // something else needing this node. This will also take care of adding any
1646  // operands which have lost a user to the worklist.
1647  recursivelyDeleteUnusedNodes(N);
1648  }
1649 
1650  // If the root changed (e.g. it was a dead load, update the root).
1651  DAG.setRoot(Dummy.getValue());
1652  DAG.RemoveDeadNodes();
1653 }
1654 
1655 SDValue DAGCombiner::visit(SDNode *N) {
1656  switch (N->getOpcode()) {
1657  default: break;
1658  case ISD::TokenFactor: return visitTokenFactor(N);
1659  case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1660  case ISD::ADD: return visitADD(N);
1661  case ISD::SUB: return visitSUB(N);
1662  case ISD::SADDSAT:
1663  case ISD::UADDSAT: return visitADDSAT(N);
1664  case ISD::SSUBSAT:
1665  case ISD::USUBSAT: return visitSUBSAT(N);
1666  case ISD::ADDC: return visitADDC(N);
1667  case ISD::SADDO:
1668  case ISD::UADDO: return visitADDO(N);
1669  case ISD::SUBC: return visitSUBC(N);
1670  case ISD::SSUBO:
1671  case ISD::USUBO: return visitSUBO(N);
1672  case ISD::ADDE: return visitADDE(N);
1673  case ISD::ADDCARRY: return visitADDCARRY(N);
1674  case ISD::SUBE: return visitSUBE(N);
1675  case ISD::SUBCARRY: return visitSUBCARRY(N);
1676  case ISD::MUL: return visitMUL(N);
1677  case ISD::SDIV: return visitSDIV(N);
1678  case ISD::UDIV: return visitUDIV(N);
1679  case ISD::SREM:
1680  case ISD::UREM: return visitREM(N);
1681  case ISD::MULHU: return visitMULHU(N);
1682  case ISD::MULHS: return visitMULHS(N);
1683  case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1684  case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1685  case ISD::SMULO:
1686  case ISD::UMULO: return visitMULO(N);
1687  case ISD::SMIN:
1688  case ISD::SMAX:
1689  case ISD::UMIN:
1690  case ISD::UMAX: return visitIMINMAX(N);
1691  case ISD::AND: return visitAND(N);
1692  case ISD::OR: return visitOR(N);
1693  case ISD::XOR: return visitXOR(N);
1694  case ISD::SHL: return visitSHL(N);
1695  case ISD::SRA: return visitSRA(N);
1696  case ISD::SRL: return visitSRL(N);
1697  case ISD::ROTR:
1698  case ISD::ROTL: return visitRotate(N);
1699  case ISD::FSHL:
1700  case ISD::FSHR: return visitFunnelShift(N);
1701  case ISD::ABS: return visitABS(N);
1702  case ISD::BSWAP: return visitBSWAP(N);
1703  case ISD::BITREVERSE: return visitBITREVERSE(N);
1704  case ISD::CTLZ: return visitCTLZ(N);
1705  case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1706  case ISD::CTTZ: return visitCTTZ(N);
1707  case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1708  case ISD::CTPOP: return visitCTPOP(N);
1709  case ISD::SELECT: return visitSELECT(N);
1710  case ISD::VSELECT: return visitVSELECT(N);
1711  case ISD::SELECT_CC: return visitSELECT_CC(N);
1712  case ISD::SETCC: return visitSETCC(N);
1713  case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1714  case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1715  case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1716  case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1717  case ISD::AssertSext:
1718  case ISD::AssertZext: return visitAssertExt(N);
1719  case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1720  case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1721  case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1722  case ISD::TRUNCATE: return visitTRUNCATE(N);
1723  case ISD::BITCAST: return visitBITCAST(N);
1724  case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1725  case ISD::FADD: return visitFADD(N);
1726  case ISD::FSUB: return visitFSUB(N);
1727  case ISD::FMUL: return visitFMUL(N);
1728  case ISD::FMA: return visitFMA(N);
1729  case ISD::FDIV: return visitFDIV(N);
1730  case ISD::FREM: return visitFREM(N);
1731  case ISD::FSQRT: return visitFSQRT(N);
1732  case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1733  case ISD::FPOW: return visitFPOW(N);
1734  case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1735  case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1736  case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1737  case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1738  case ISD::FP_ROUND: return visitFP_ROUND(N);
1739  case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
1740  case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1741  case ISD::FNEG: return visitFNEG(N);
1742  case ISD::FABS: return visitFABS(N);
1743  case ISD::FFLOOR: return visitFFLOOR(N);
1744  case ISD::FMINNUM: return visitFMINNUM(N);
1745  case ISD::FMAXNUM: return visitFMAXNUM(N);
1746  case ISD::FMINIMUM: return visitFMINIMUM(N);
1747  case ISD::FMAXIMUM: return visitFMAXIMUM(N);
1748  case ISD::FCEIL: return visitFCEIL(N);
1749  case ISD::FTRUNC: return visitFTRUNC(N);
1750  case ISD::BRCOND: return visitBRCOND(N);
1751  case ISD::BR_CC: return visitBR_CC(N);
1752  case ISD::LOAD: return visitLOAD(N);
1753  case ISD::STORE: return visitSTORE(N);
1754  case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1755  case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1756  case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1757  case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1758  case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1759  case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1760  case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1761  case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1762  case ISD::MGATHER: return visitMGATHER(N);
1763  case ISD::MLOAD: return visitMLOAD(N);
1764  case ISD::MSCATTER: return visitMSCATTER(N);
1765  case ISD::MSTORE: return visitMSTORE(N);
1766  case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1767  case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1768  case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1769  case ISD::VECREDUCE_FADD:
1770  case ISD::VECREDUCE_FMUL:
1771  case ISD::VECREDUCE_ADD:
1772  case ISD::VECREDUCE_MUL:
1773  case ISD::VECREDUCE_AND:
1774  case ISD::VECREDUCE_OR:
1775  case ISD::VECREDUCE_XOR:
1776  case ISD::VECREDUCE_SMAX:
1777  case ISD::VECREDUCE_SMIN:
1778  case ISD::VECREDUCE_UMAX:
1779  case ISD::VECREDUCE_UMIN:
1780  case ISD::VECREDUCE_FMAX:
1781  case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
1782  }
1783  return SDValue();
1784 }
1785 
1786 SDValue DAGCombiner::combine(SDNode *N) {
1787  SDValue RV = visit(N);
1788 
1789  // If nothing happened, try a target-specific DAG combine.
1790  if (!RV.getNode()) {
1792  "Node was deleted but visit returned NULL!");
1793 
1794  if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1796 
1797  // Expose the DAG combiner to the target combiner impls.
1799  DagCombineInfo(DAG, Level, false, this);
1800 
1801  RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1802  }
1803  }
1804 
1805  // If nothing happened still, try promoting the operation.
1806  if (!RV.getNode()) {
1807  switch (N->getOpcode()) {
1808  default: break;
1809  case ISD::ADD:
1810  case ISD::SUB:
1811  case ISD::MUL:
1812  case ISD::AND:
1813  case ISD::OR:
1814  case ISD::XOR:
1815  RV = PromoteIntBinOp(SDValue(N, 0));
1816  break;
1817  case ISD::SHL:
1818  case ISD::SRA:
1819  case ISD::SRL:
1820  RV = PromoteIntShiftOp(SDValue(N, 0));
1821  break;
1822  case ISD::SIGN_EXTEND:
1823  case ISD::ZERO_EXTEND:
1824  case ISD::ANY_EXTEND:
1825  RV = PromoteExtend(SDValue(N, 0));
1826  break;
1827  case ISD::LOAD:
1828  if (PromoteLoad(SDValue(N, 0)))
1829  RV = SDValue(N, 0);
1830  break;
1831  }
1832  }
1833 
1834  // If N is a commutative binary node, try to eliminate it if the commuted
1835  // version is already present in the DAG.
1836  if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1837  N->getNumValues() == 1) {
1838  SDValue N0 = N->getOperand(0);
1839  SDValue N1 = N->getOperand(1);
1840 
1841  // Constant operands are canonicalized to RHS.
1842  if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1843  SDValue Ops[] = {N1, N0};
1844  SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1845  N->getFlags());
1846  if (CSENode)
1847  return SDValue(CSENode, 0);
1848  }
1849  }
1850 
1851  return RV;
1852 }
1853 
1854 /// Given a node, return its input chain if it has one, otherwise return a null
1855 /// sd operand.
1857  if (unsigned NumOps = N->getNumOperands()) {
1858  if (N->getOperand(0).getValueType() == MVT::Other)
1859  return N->getOperand(0);
1860  if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1861  return N->getOperand(NumOps-1);
1862  for (unsigned i = 1; i < NumOps-1; ++i)
1863  if (N->getOperand(i).getValueType() == MVT::Other)
1864  return N->getOperand(i);
1865  }
1866  return SDValue();
1867 }
1868 
1869 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1870  // If N has two operands, where one has an input chain equal to the other,
1871  // the 'other' chain is redundant.
1872  if (N->getNumOperands() == 2) {
1873  if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1874  return N->getOperand(0);
1875  if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1876  return N->getOperand(1);
1877  }
1878 
1879  // Don't simplify token factors if optnone.
1880  if (OptLevel == CodeGenOpt::None)
1881  return SDValue();
1882 
1883  // If the sole user is a token factor, we should make sure we have a
1884  // chance to merge them together. This prevents TF chains from inhibiting
1885  // optimizations.
1886  if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1887  AddToWorklist(*(N->use_begin()));
1888 
1889  SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1890  SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1891  SmallPtrSet<SDNode*, 16> SeenOps;
1892  bool Changed = false; // If we should replace this token factor.
1893 
1894  // Start out with this token factor.
1895  TFs.push_back(N);
1896 
1897  // Iterate through token factors. The TFs grows when new token factors are
1898  // encountered.
1899  for (unsigned i = 0; i < TFs.size(); ++i) {
1900  // Limit number of nodes to inline, to avoid quadratic compile times.
1901  // We have to add the outstanding Token Factors to Ops, otherwise we might
1902  // drop Ops from the resulting Token Factors.
1903  if (Ops.size() > TokenFactorInlineLimit) {
1904  for (unsigned j = i; j < TFs.size(); j++)
1905  Ops.emplace_back(TFs[j], 0);
1906  // Drop unprocessed Token Factors from TFs, so we do not add them to the
1907  // combiner worklist later.
1908  TFs.resize(i);
1909  break;
1910  }
1911 
1912  SDNode *TF = TFs[i];
1913  // Check each of the operands.
1914  for (const SDValue &Op : TF->op_values()) {
1915  switch (Op.getOpcode()) {
1916  case ISD::EntryToken:
1917  // Entry tokens don't need to be added to the list. They are
1918  // redundant.
1919  Changed = true;
1920  break;
1921 
1922  case ISD::TokenFactor:
1923  if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1924  // Queue up for processing.
1925  TFs.push_back(Op.getNode());
1926  Changed = true;
1927  break;
1928  }
1930 
1931  default:
1932  // Only add if it isn't already in the list.
1933  if (SeenOps.insert(Op.getNode()).second)
1934  Ops.push_back(Op);
1935  else
1936  Changed = true;
1937  break;
1938  }
1939  }
1940  }
1941 
1942  // Re-visit inlined Token Factors, to clean them up in case they have been
1943  // removed. Skip the first Token Factor, as this is the current node.
1944  for (unsigned i = 1, e = TFs.size(); i < e; i++)
1945  AddToWorklist(TFs[i]);
1946 
1947  // Remove Nodes that are chained to another node in the list. Do so
1948  // by walking up chains breath-first stopping when we've seen
1949  // another operand. In general we must climb to the EntryNode, but we can exit
1950  // early if we find all remaining work is associated with just one operand as
1951  // no further pruning is possible.
1952 
1953  // List of nodes to search through and original Ops from which they originate.
1955  SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1956  SmallPtrSet<SDNode *, 16> SeenChains;
1957  bool DidPruneOps = false;
1958 
1959  unsigned NumLeftToConsider = 0;
1960  for (const SDValue &Op : Ops) {
1961  Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1962  OpWorkCount.push_back(1);
1963  }
1964 
1965  auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1966  // If this is an Op, we can remove the op from the list. Remark any
1967  // search associated with it as from the current OpNumber.
1968  if (SeenOps.count(Op) != 0) {
1969  Changed = true;
1970  DidPruneOps = true;
1971  unsigned OrigOpNumber = 0;
1972  while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1973  OrigOpNumber++;
1974  assert((OrigOpNumber != Ops.size()) &&
1975  "expected to find TokenFactor Operand");
1976  // Re-mark worklist from OrigOpNumber to OpNumber
1977  for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1978  if (Worklist[i].second == OrigOpNumber) {
1979  Worklist[i].second = OpNumber;
1980  }
1981  }
1982  OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1983  OpWorkCount[OrigOpNumber] = 0;
1984  NumLeftToConsider--;
1985  }
1986  // Add if it's a new chain
1987  if (SeenChains.insert(Op).second) {
1988  OpWorkCount[OpNumber]++;
1989  Worklist.push_back(std::make_pair(Op, OpNumber));
1990  }
1991  };
1992 
1993  for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1994  // We need at least be consider at least 2 Ops to prune.
1995  if (NumLeftToConsider <= 1)
1996  break;
1997  auto CurNode = Worklist[i].first;
1998  auto CurOpNumber = Worklist[i].second;
1999  assert((OpWorkCount[CurOpNumber] > 0) &&
2000  "Node should not appear in worklist");
2001  switch (CurNode->getOpcode()) {
2002  case ISD::EntryToken:
2003  // Hitting EntryToken is the only way for the search to terminate without
2004  // hitting
2005  // another operand's search. Prevent us from marking this operand
2006  // considered.
2007  NumLeftToConsider++;
2008  break;
2009  case ISD::TokenFactor:
2010  for (const SDValue &Op : CurNode->op_values())
2011  AddToWorklist(i, Op.getNode(), CurOpNumber);
2012  break;
2013  case ISD::LIFETIME_START:
2014  case ISD::LIFETIME_END:
2015  case ISD::CopyFromReg:
2016  case ISD::CopyToReg:
2017  AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2018  break;
2019  default:
2020  if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2021  AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2022  break;
2023  }
2024  OpWorkCount[CurOpNumber]--;
2025  if (OpWorkCount[CurOpNumber] == 0)
2026  NumLeftToConsider--;
2027  }
2028 
2029  // If we've changed things around then replace token factor.
2030  if (Changed) {
2031  SDValue Result;
2032  if (Ops.empty()) {
2033  // The entry token is the only possible outcome.
2034  Result = DAG.getEntryNode();
2035  } else {
2036  if (DidPruneOps) {
2037  SmallVector<SDValue, 8> PrunedOps;
2038  //
2039  for (const SDValue &Op : Ops) {
2040  if (SeenChains.count(Op.getNode()) == 0)
2041  PrunedOps.push_back(Op);
2042  }
2043  Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2044  } else {
2045  Result = DAG.getTokenFactor(SDLoc(N), Ops);
2046  }
2047  }
2048  return Result;
2049  }
2050  return SDValue();
2051 }
2052 
2053 /// MERGE_VALUES can always be eliminated.
2054 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2055  WorklistRemover DeadNodes(*this);
2056  // Replacing results may cause a different MERGE_VALUES to suddenly
2057  // be CSE'd with N, and carry its uses with it. Iterate until no
2058  // uses remain, to ensure that the node can be safely deleted.
2059  // First add the users of this node to the work list so that they
2060  // can be tried again once they have new operands.
2061  AddUsersToWorklist(N);
2062  do {
2063  // Do as a single replacement to avoid rewalking use lists.
2065  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2066  Ops.push_back(N->getOperand(i));
2067  DAG.ReplaceAllUsesWith(N, Ops.data());
2068  } while (!N->use_empty());
2069  deleteAndRecombine(N);
2070  return SDValue(N, 0); // Return N so it doesn't get rechecked!
2071 }
2072 
2073 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2074 /// ConstantSDNode pointer else nullptr.
2077  return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2078 }
2079 
2080 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2081  assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2082  "Unexpected binary operator");
2083 
2084  // Don't do this unless the old select is going away. We want to eliminate the
2085  // binary operator, not replace a binop with a select.
2086  // TODO: Handle ISD::SELECT_CC.
2087  unsigned SelOpNo = 0;
2088  SDValue Sel = BO->getOperand(0);
2089  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2090  SelOpNo = 1;
2091  Sel = BO->getOperand(1);
2092  }
2093 
2094  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2095  return SDValue();
2096 
2097  SDValue CT = Sel.getOperand(1);
2098  if (!isConstantOrConstantVector(CT, true) &&
2100  return SDValue();
2101 
2102  SDValue CF = Sel.getOperand(2);
2103  if (!isConstantOrConstantVector(CF, true) &&
2105  return SDValue();
2106 
2107  // Bail out if any constants are opaque because we can't constant fold those.
2108  // The exception is "and" and "or" with either 0 or -1 in which case we can
2109  // propagate non constant operands into select. I.e.:
2110  // and (select Cond, 0, -1), X --> select Cond, 0, X
2111  // or X, (select Cond, -1, 0) --> select Cond, -1, X
2112  auto BinOpcode = BO->getOpcode();
2113  bool CanFoldNonConst =
2114  (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2117 
2118  SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2119  if (!CanFoldNonConst &&
2120  !isConstantOrConstantVector(CBO, true) &&
2122  return SDValue();
2123 
2124  EVT VT = Sel.getValueType();
2125 
2126  // In case of shift value and shift amount may have different VT. For instance
2127  // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
2128  // swapped operands and value types do not match. NB: x86 is fine if operands
2129  // are not swapped with shift amount VT being not bigger than shifted value.
2130  // TODO: that is possible to check for a shift operation, correct VTs and
2131  // still perform optimization on x86 if needed.
2132  if (SelOpNo && VT != CBO.getValueType())
2133  return SDValue();
2134 
2135  // We have a select-of-constants followed by a binary operator with a
2136  // constant. Eliminate the binop by pulling the constant math into the select.
2137  // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2138  SDLoc DL(Sel);
2139  SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2140  : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2141  if (!CanFoldNonConst && !NewCT.isUndef() &&
2142  !isConstantOrConstantVector(NewCT, true) &&
2144  return SDValue();
2145 
2146  SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2147  : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2148  if (!CanFoldNonConst && !NewCF.isUndef() &&
2149  !isConstantOrConstantVector(NewCF, true) &&
2151  return SDValue();
2152 
2153  SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2154  SelectOp->setFlags(BO->getFlags());
2155  return SelectOp;
2156 }
2157 
2159  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2160  "Expecting add or sub");
2161 
2162  // Match a constant operand and a zext operand for the math instruction:
2163  // add Z, C
2164  // sub C, Z
2165  bool IsAdd = N->getOpcode() == ISD::ADD;
2166  SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2167  SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2168  auto *CN = dyn_cast<ConstantSDNode>(C);
2169  if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2170  return SDValue();
2171 
2172  // Match the zext operand as a setcc of a boolean.
2173  if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2174  Z.getOperand(0).getValueType() != MVT::i1)
2175  return SDValue();
2176 
2177  // Match the compare as: setcc (X & 1), 0, eq.
2178  SDValue SetCC = Z.getOperand(0);
2179  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2180  if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2181  SetCC.getOperand(0).getOpcode() != ISD::AND ||
2182  !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2183  return SDValue();
2184 
2185  // We are adding/subtracting a constant and an inverted low bit. Turn that
2186  // into a subtract/add of the low bit with incremented/decremented constant:
2187  // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2188  // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2189  EVT VT = C.getValueType();
2190  SDLoc DL(N);
2191  SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2192  SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2193  DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2194  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2195 }
2196 
2197 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2198 /// a shift and add with a different constant.
2200  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2201  "Expecting add or sub");
2202 
2203  // We need a constant operand for the add/sub, and the other operand is a
2204  // logical shift right: add (srl), C or sub C, (srl).
2205  // TODO - support non-uniform vector amounts.
2206  bool IsAdd = N->getOpcode() == ISD::ADD;
2207  SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2208  SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2209  ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
2210  if (!C || ShiftOp.getOpcode() != ISD::SRL)
2211  return SDValue();
2212 
2213  // The shift must be of a 'not' value.
2214  SDValue Not = ShiftOp.getOperand(0);
2215  if (!Not.hasOneUse() || !isBitwiseNot(Not))
2216  return SDValue();
2217 
2218  // The shift must be moving the sign bit to the least-significant-bit.
2219  EVT VT = ShiftOp.getValueType();
2220  SDValue ShAmt = ShiftOp.getOperand(1);
2221  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2222  if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2223  return SDValue();
2224 
2225  // Eliminate the 'not' by adjusting the shift and add/sub constant:
2226  // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2227  // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2228  SDLoc DL(N);
2229  auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2230  SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2231  APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
2232  return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
2233 }
2234 
2235 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2236 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2237 /// are no common bits set in the operands).
2238 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2239  SDValue N0 = N->getOperand(0);
2240  SDValue N1 = N->getOperand(1);
2241  EVT VT = N0.getValueType();
2242  SDLoc DL(N);
2243 
2244  // fold vector ops
2245  if (VT.isVector()) {
2246  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2247  return FoldedVOp;
2248 
2249  // fold (add x, 0) -> x, vector edition
2251  return N0;
2253  return N1;
2254  }
2255 
2256  // fold (add x, undef) -> undef
2257  if (N0.isUndef())
2258  return N0;
2259 
2260  if (N1.isUndef())
2261  return N1;
2262 
2264  // canonicalize constant to RHS
2266  return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2267  // fold (add c1, c2) -> c1+c2
2268  return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
2269  N1.getNode());
2270  }
2271 
2272  // fold (add x, 0) -> x
2273  if (isNullConstant(N1))
2274  return N0;
2275 
2276  if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2277  // fold ((A-c1)+c2) -> (A+(c2-c1))
2278  if (N0.getOpcode() == ISD::SUB &&
2279  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2280  SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N1.getNode(),
2281  N0.getOperand(1).getNode());
2282  assert(Sub && "Constant folding failed");
2283  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2284  }
2285 
2286  // fold ((c1-A)+c2) -> (c1+c2)-A
2287  if (N0.getOpcode() == ISD::SUB &&
2288  isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2289  SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N1.getNode(),
2290  N0.getOperand(0).getNode());
2291  assert(Add && "Constant folding failed");
2292  return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2293  }
2294 
2295  // add (sext i1 X), 1 -> zext (not i1 X)
2296  // We don't transform this pattern:
2297  // add (zext i1 X), -1 -> sext (not i1 X)
2298  // because most (?) targets generate better code for the zext form.
2299  if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2300  isOneOrOneSplat(N1)) {
2301  SDValue X = N0.getOperand(0);
2302  if ((!LegalOperations ||
2303  (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2304  TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2305  X.getScalarValueSizeInBits() == 1) {
2306  SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2307  return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2308  }
2309  }
2310 
2311  // Undo the add -> or combine to merge constant offsets from a frame index.
2312  if (N0.getOpcode() == ISD::OR &&
2313  isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2314  isa<ConstantSDNode>(N0.getOperand(1)) &&
2315  DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2316  SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2317  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2318  }
2319  }
2320 
2321  if (SDValue NewSel = foldBinOpIntoSelect(N))
2322  return NewSel;
2323 
2324  // reassociate add
2325  if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2326  if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2327  return RADD;
2328  }
2329  // fold ((0-A) + B) -> B-A
2330  if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2331  return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2332 
2333  // fold (A + (0-B)) -> A-B
2334  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2335  return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2336 
2337  // fold (A+(B-A)) -> B
2338  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2339  return N1.getOperand(0);
2340 
2341  // fold ((B-A)+A) -> B
2342  if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2343  return N0.getOperand(0);
2344 
2345  // fold ((A-B)+(C-A)) -> (C-B)
2346  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2347  N0.getOperand(0) == N1.getOperand(1))
2348  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2349  N0.getOperand(1));
2350 
2351  // fold ((A-B)+(B-C)) -> (A-C)
2352  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2353  N0.getOperand(1) == N1.getOperand(0))
2354  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2355  N1.getOperand(1));
2356 
2357  // fold (A+(B-(A+C))) to (B-C)
2358  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2359  N0 == N1.getOperand(1).getOperand(0))
2360  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2361  N1.getOperand(1).getOperand(1));
2362 
2363  // fold (A+(B-(C+A))) to (B-C)
2364  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2365  N0 == N1.getOperand(1).getOperand(1))
2366  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2367  N1.getOperand(1).getOperand(0));
2368 
2369  // fold (A+((B-A)+or-C)) to (B+or-C)
2370  if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2371  N1.getOperand(0).getOpcode() == ISD::SUB &&
2372  N0 == N1.getOperand(0).getOperand(1))
2373  return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2374  N1.getOperand(1));
2375 
2376  // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2377  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2378  SDValue N00 = N0.getOperand(0);
2379  SDValue N01 = N0.getOperand(1);
2380  SDValue N10 = N1.getOperand(0);
2381  SDValue N11 = N1.getOperand(1);
2382 
2384  return DAG.getNode(ISD::SUB, DL, VT,
2385  DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2386  DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2387  }
2388 
2389  // fold (add (umax X, C), -C) --> (usubsat X, C)
2390  if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2391  auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2392  return (!Max && !Op) ||
2393  (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2394  };
2395  if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2396  /*AllowUndefs*/ true))
2397  return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2398  N0.getOperand(1));
2399  }
2400 
2401  if (SimplifyDemandedBits(SDValue(N, 0)))
2402  return SDValue(N, 0);
2403 
2404  if (isOneOrOneSplat(N1)) {
2405  // fold (add (xor a, -1), 1) -> (sub 0, a)
2406  if (isBitwiseNot(N0))
2407  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2408  N0.getOperand(0));
2409 
2410  // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2411  if (N0.getOpcode() == ISD::ADD ||
2412  N0.getOpcode() == ISD::UADDO ||
2413  N0.getOpcode() == ISD::SADDO) {
2414  SDValue A, Xor;
2415 
2416  if (isBitwiseNot(N0.getOperand(0))) {
2417  A = N0.getOperand(1);
2418  Xor = N0.getOperand(0);
2419  } else if (isBitwiseNot(N0.getOperand(1))) {
2420  A = N0.getOperand(0);
2421  Xor = N0.getOperand(1);
2422  }
2423 
2424  if (Xor)
2425  return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2426  }
2427 
2428  // Look for:
2429  // add (add x, y), 1
2430  // And if the target does not like this form then turn into:
2431  // sub y, (xor x, -1)
2432  if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2433  N0.getOpcode() == ISD::ADD) {
2434  SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2435  DAG.getAllOnesConstant(DL, VT));
2436  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2437  }
2438  }
2439 
2440  // (x - y) + -1 -> add (xor y, -1), x
2441  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2443  SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2444  return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2445  }
2446 
2447  if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2448  return Combined;
2449 
2450  if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2451  return Combined;
2452 
2453  return SDValue();
2454 }
2455 
2456 SDValue DAGCombiner::visitADD(SDNode *N) {
2457  SDValue N0 = N->getOperand(0);
2458  SDValue N1 = N->getOperand(1);
2459  EVT VT = N0.getValueType();
2460  SDLoc DL(N);
2461 
2462  if (SDValue Combined = visitADDLike(N))
2463  return Combined;
2464 
2465  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2466  return V;
2467 
2468  if (SDValue V = foldAddSubOfSignBit(N, DAG))
2469  return V;
2470 
2471  // fold (a+b) -> (a|b) iff a and b share no bits.
2472  if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2473  DAG.haveNoCommonBitsSet(N0, N1))
2474  return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2475 
2476  return SDValue();
2477 }
2478 
2479 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2480  unsigned Opcode = N->getOpcode();
2481  SDValue N0 = N->getOperand(0);
2482  SDValue N1 = N->getOperand(1);
2483  EVT VT = N0.getValueType();
2484  SDLoc DL(N);
2485 
2486  // fold vector ops
2487  if (VT.isVector()) {
2488  // TODO SimplifyVBinOp
2489 
2490  // fold (add_sat x, 0) -> x, vector edition
2492  return N0;
2494  return N1;
2495  }
2496 
2497  // fold (add_sat x, undef) -> -1
2498  if (N0.isUndef() || N1.isUndef())
2499  return DAG.getAllOnesConstant(DL, VT);
2500 
2502  // canonicalize constant to RHS
2504  return DAG.getNode(Opcode, DL, VT, N1, N0);
2505  // fold (add_sat c1, c2) -> c3
2506  return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(),
2507  N1.getNode());
2508  }
2509 
2510  // fold (add_sat x, 0) -> x
2511  if (isNullConstant(N1))
2512  return N0;
2513 
2514  // If it cannot overflow, transform into an add.
2515  if (Opcode == ISD::UADDSAT)
2516  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2517  return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2518 
2519  return SDValue();
2520 }
2521 
2522 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2523  bool Masked = false;
2524 
2525  // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2526  while (true) {
2527  if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2528  V = V.getOperand(0);
2529  continue;
2530  }
2531 
2532  if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2533  Masked = true;
2534  V = V.getOperand(0);
2535  continue;
2536  }
2537 
2538  break;
2539  }
2540 
2541  // If this is not a carry, return.
2542  if (V.getResNo() != 1)
2543  return SDValue();
2544 
2545  if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2546  V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2547  return SDValue();
2548 
2549  EVT VT = V.getNode()->getValueType(0);
2550  if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2551  return SDValue();
2552 
2553  // If the result is masked, then no matter what kind of bool it is we can
2554  // return. If it isn't, then we need to make sure the bool type is either 0 or
2555  // 1 and not other values.
2556  if (Masked ||
2557  TLI.getBooleanContents(V.getValueType()) ==
2559  return V;
2560 
2561  return SDValue();
2562 }
2563 
2564 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2565 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2566 /// the opcode and bypass the mask operation.
2567 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2568  SelectionDAG &DAG, const SDLoc &DL) {
2569  if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2570  return SDValue();
2571 
2572  EVT VT = N0.getValueType();
2573  if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2574  return SDValue();
2575 
2576  // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2577  // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2578  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2579 }
2580 
2581 /// Helper for doing combines based on N0 and N1 being added to each other.
2582 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2583  SDNode *LocReference) {
2584  EVT VT = N0.getValueType();
2585  SDLoc DL(LocReference);
2586 
2587  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2588  if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2590  return DAG.getNode(ISD::SUB, DL, VT, N0,
2591  DAG.getNode(ISD::SHL, DL, VT,
2592  N1.getOperand(0).getOperand(1),
2593  N1.getOperand(1)));
2594 
2595  if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2596  return V;
2597 
2598  // Look for:
2599  // add (add x, 1), y
2600  // And if the target does not like this form then turn into:
2601  // sub y, (xor x, -1)
2602  if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2603  N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2604  SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2605  DAG.getAllOnesConstant(DL, VT));
2606  return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2607  }
2608 
2609  // Hoist one-use subtraction by non-opaque constant:
2610  // (x - C) + y -> (x + y) - C
2611  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2612  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2613  isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2614  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2615  return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2616  }
2617  // Hoist one-use subtraction from non-opaque constant:
2618  // (C - x) + y -> (y - x) + C
2619  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2620  isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2621  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2622  return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2623  }
2624 
2625  // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2626  // rather than 'add 0/-1' (the zext should get folded).
2627  // add (sext i1 Y), X --> sub X, (zext i1 Y)
2628  if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2629  N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2631  SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2632  return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2633  }
2634 
2635  // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2636  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2637  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2638  if (TN->getVT() == MVT::i1) {
2639  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2640  DAG.getConstant(1, DL, VT));
2641  return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2642  }
2643  }
2644 
2645  // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2646  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2647  N1.getResNo() == 0)
2648  return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2649  N0, N1.getOperand(0), N1.getOperand(2));
2650 
2651  // (add X, Carry) -> (addcarry X, 0, Carry)
2653  if (SDValue Carry = getAsCarry(TLI, N1))
2654  return DAG.getNode(ISD::ADDCARRY, DL,
2655  DAG.getVTList(VT, Carry.getValueType()), N0,
2656  DAG.getConstant(0, DL, VT), Carry);
2657 
2658  return SDValue();
2659 }
2660 
2661 SDValue DAGCombiner::visitADDC(SDNode *N) {
2662  SDValue N0 = N->getOperand(0);
2663  SDValue N1 = N->getOperand(1);
2664  EVT VT = N0.getValueType();
2665  SDLoc DL(N);
2666 
2667  // If the flag result is dead, turn this into an ADD.
2668  if (!N->hasAnyUseOfValue(1))
2669  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2670  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2671 
2672  // canonicalize constant to RHS.
2675  if (N0C && !N1C)
2676  return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2677 
2678  // fold (addc x, 0) -> x + no carry out
2679  if (isNullConstant(N1))
2680  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2681  DL, MVT::Glue));
2682 
2683  // If it cannot overflow, transform into an add.
2684  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2685  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2686  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2687 
2688  return SDValue();
2689 }
2690 
2691 static SDValue flipBoolean(SDValue V, const SDLoc &DL,
2692  SelectionDAG &DAG, const TargetLowering &TLI) {
2693  EVT VT = V.getValueType();
2694 
2695  SDValue Cst;
2696  switch (TLI.getBooleanContents(VT)) {
2699  Cst = DAG.getConstant(1, DL, VT);
2700  break;
2702  Cst = DAG.getAllOnesConstant(DL, VT);
2703  break;
2704  }
2705 
2706  return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2707 }
2708 
2709 /**
2710  * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2711  * then the flip also occurs if computing the inverse is the same cost.
2712  * This function returns an empty SDValue in case it cannot flip the boolean
2713  * without increasing the cost of the computation. If you want to flip a boolean
2714  * no matter what, use flipBoolean.
2715  */
2717  const TargetLowering &TLI,
2718  bool Force) {
2719  if (Force && isa<ConstantSDNode>(V))
2720  return flipBoolean(V, SDLoc(V), DAG, TLI);
2721 
2722  if (V.getOpcode() != ISD::XOR)
2723  return SDValue();
2724 
2725  ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2726  if (!Const)
2727  return SDValue();
2728 
2729  EVT VT = V.getValueType();
2730 
2731  bool IsFlip = false;
2732  switch(TLI.getBooleanContents(VT)) {
2734  IsFlip = Const->isOne();
2735  break;
2737  IsFlip = Const->isAllOnesValue();
2738  break;
2740  IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2741  break;
2742  }
2743 
2744  if (IsFlip)
2745  return V.getOperand(0);
2746  if (Force)
2747  return flipBoolean(V, SDLoc(V), DAG, TLI);
2748  return SDValue();
2749 }
2750 
2751 SDValue DAGCombiner::visitADDO(SDNode *N) {
2752  SDValue N0 = N->getOperand(0);
2753  SDValue N1 = N->getOperand(1);
2754  EVT VT = N0.getValueType();
2755  bool IsSigned = (ISD::SADDO == N->getOpcode());
2756 
2757  EVT CarryVT = N->getValueType(1);
2758  SDLoc DL(N);
2759 
2760  // If the flag result is dead, turn this into an ADD.
2761  if (!N->hasAnyUseOfValue(1))
2762  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2763  DAG.getUNDEF(CarryVT));
2764 
2765  // canonicalize constant to RHS.
2768  return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2769 
2770  // fold (addo x, 0) -> x + no carry out
2771  if (isNullOrNullSplat(N1))
2772  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2773 
2774  if (!IsSigned) {
2775  // If it cannot overflow, transform into an add.
2776  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2777  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2778  DAG.getConstant(0, DL, CarryVT));
2779 
2780  // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2781  if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2782  SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2783  DAG.getConstant(0, DL, VT), N0.getOperand(0));
2784  return CombineTo(N, Sub,
2785  flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2786  }
2787 
2788  if (SDValue Combined = visitUADDOLike(N0, N1, N))
2789  return Combined;
2790 
2791  if (SDValue Combined = visitUADDOLike(N1, N0, N))
2792  return Combined;
2793  }
2794 
2795  return SDValue();
2796 }
2797 
2798 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2799  EVT VT = N0.getValueType();
2800  if (VT.isVector())
2801  return SDValue();
2802 
2803  // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2804  // If Y + 1 cannot overflow.
2805  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2806  SDValue Y = N1.getOperand(0);
2807  SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2808  if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2809  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2810  N1.getOperand(2));
2811  }
2812 
2813  // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2815  if (SDValue Carry = getAsCarry(TLI, N1))
2816  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2817  DAG.getConstant(0, SDLoc(N), VT), Carry);
2818 
2819  return SDValue();
2820 }
2821 
2822 SDValue DAGCombiner::visitADDE(SDNode *N) {
2823  SDValue N0 = N->getOperand(0);
2824  SDValue N1 = N->getOperand(1);
2825  SDValue CarryIn = N->getOperand(2);
2826 
2827  // canonicalize constant to RHS
2830  if (N0C && !N1C)
2831  return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2832  N1, N0, CarryIn);
2833 
2834  // fold (adde x, y, false) -> (addc x, y)
2835  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2836  return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2837 
2838  return SDValue();
2839 }
2840 
2841 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2842  SDValue N0 = N->getOperand(0);
2843  SDValue N1 = N->getOperand(1);
2844  SDValue CarryIn = N->getOperand(2);
2845  SDLoc DL(N);
2846 
2847  // canonicalize constant to RHS
2850  if (N0C && !N1C)
2851  return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2852 
2853  // fold (addcarry x, y, false) -> (uaddo x, y)
2854  if (isNullConstant(CarryIn)) {
2855  if (!LegalOperations ||
2857  return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2858  }
2859 
2860  // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2861  if (isNullConstant(N0) && isNullConstant(N1)) {
2862  EVT VT = N0.getValueType();
2863  EVT CarryVT = CarryIn.getValueType();
2864  SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2865  AddToWorklist(CarryExt.getNode());
2866  return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2867  DAG.getConstant(1, DL, VT)),
2868  DAG.getConstant(0, DL, CarryVT));
2869  }
2870 
2871  if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2872  return Combined;
2873 
2874  if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2875  return Combined;
2876 
2877  return SDValue();
2878 }
2879 
2880 /**
2881  * If we are facing some sort of diamond carry propapagtion pattern try to
2882  * break it up to generate something like:
2883  * (addcarry X, 0, (addcarry A, B, Z):Carry)
2884  *
2885  * The end result is usually an increase in operation required, but because the
2886  * carry is now linearized, other tranforms can kick in and optimize the DAG.
2887  *
2888  * Patterns typically look something like
2889  * (uaddo A, B)
2890  * / \
2891  * Carry Sum
2892  * | \
2893  * | (addcarry *, 0, Z)
2894  * | /
2895  * \ Carry
2896  * | /
2897  * (addcarry X, *, *)
2898  *
2899  * But numerous variation exist. Our goal is to identify A, B, X and Z and
2900  * produce a combine with a single path for carry propagation.
2901  */
2903  SDValue X, SDValue Carry0, SDValue Carry1,
2904  SDNode *N) {
2905  if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2906  return SDValue();
2907  if (Carry1.getOpcode() != ISD::UADDO)
2908  return SDValue();
2909 
2910  SDValue Z;
2911 
2912  /**
2913  * First look for a suitable Z. It will present itself in the form of
2914  * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2915  */
2916  if (Carry0.getOpcode() == ISD::ADDCARRY &&
2917  isNullConstant(Carry0.getOperand(1))) {
2918  Z = Carry0.getOperand(2);
2919  } else if (Carry0.getOpcode() == ISD::UADDO &&
2920  isOneConstant(Carry0.getOperand(1))) {
2921  EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2922  Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2923  } else {
2924  // We couldn't find a suitable Z.
2925  return SDValue();
2926  }
2927 
2928 
2929  auto cancelDiamond = [&](SDValue A,SDValue B) {
2930  SDLoc DL(N);
2931  SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
2932  Combiner.AddToWorklist(NewY.getNode());
2933  return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
2934  DAG.getConstant(0, DL, X.getValueType()),
2935  NewY.getValue(1));
2936  };
2937 
2938  /**
2939  * (uaddo A, B)
2940  * |
2941  * Sum
2942  * |
2943  * (addcarry *, 0, Z)
2944  */
2945  if (Carry0.getOperand(0) == Carry1.getValue(0)) {
2946  return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
2947  }
2948 
2949  /**
2950  * (addcarry A, 0, Z)
2951  * |
2952  * Sum
2953  * |
2954  * (uaddo *, B)
2955  */
2956  if (Carry1.getOperand(0) == Carry0.getValue(0)) {
2957  return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
2958  }
2959 
2960  if (Carry1.getOperand(1) == Carry0.getValue(0)) {
2961  return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
2962  }
2963 
2964  return SDValue();
2965 }
2966 
2967 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2968  SDNode *N) {
2969  // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
2970  if (isBitwiseNot(N0))
2971  if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
2972  SDLoc DL(N);
2973  SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
2974  N0.getOperand(0), NotC);
2975  return CombineTo(N, Sub,
2976  flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2977  }
2978 
2979  // Iff the flag result is dead:
2980  // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2981  // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
2982  // or the dependency between the instructions.
2983  if ((N0.getOpcode() == ISD::ADD ||
2984  (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
2985  N0.getValue(1) != CarryIn)) &&
2986  isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2987  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2988  N0.getOperand(0), N0.getOperand(1), CarryIn);
2989 
2990  /**
2991  * When one of the addcarry argument is itself a carry, we may be facing
2992  * a diamond carry propagation. In which case we try to transform the DAG
2993  * to ensure linear carry propagation if that is possible.
2994  */
2995  if (auto Y = getAsCarry(TLI, N1)) {
2996  // Because both are carries, Y and Z can be swapped.
2997  if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
2998  return R;
2999  if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3000  return R;
3001  }
3002 
3003  return SDValue();
3004 }
3005 
3006 // Since it may not be valid to emit a fold to zero for vector initializers
3007 // check if we can before folding.
3008 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3009  SelectionDAG &DAG, bool LegalOperations) {
3010  if (!VT.isVector())
3011  return DAG.getConstant(0, DL, VT);
3012  if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3013  return DAG.getConstant(0, DL, VT);
3014  return SDValue();
3015 }
3016 
3017 SDValue DAGCombiner::visitSUB(SDNode *N) {
3018  SDValue N0 = N->getOperand(0);
3019  SDValue N1 = N->getOperand(1);
3020  EVT VT = N0.getValueType();
3021  SDLoc DL(N);
3022 
3023  // fold vector ops
3024  if (VT.isVector()) {
3025  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3026  return FoldedVOp;
3027 
3028  // fold (sub x, 0) -> x, vector edition
3030  return N0;
3031  }
3032 
3033  // fold (sub x, x) -> 0
3034  // FIXME: Refactor this and xor and other similar operations together.
3035  if (N0 == N1)
3036  return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3039  // fold (sub c1, c2) -> c1-c2
3040  return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
3041  N1.getNode());
3042  }
3043 
3044  if (SDValue NewSel = foldBinOpIntoSelect(N))
3045  return NewSel;
3046 
3048 
3049  // fold (sub x, c) -> (add x, -c)
3050  if (N1C) {
3051  return DAG.getNode(ISD::ADD, DL, VT, N0,
3052  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3053  }
3054 
3055  if (isNullOrNullSplat(N0)) {
3056  unsigned BitWidth = VT.getScalarSizeInBits();
3057  // Right-shifting everything out but the sign bit followed by negation is
3058  // the same as flipping arithmetic/logical shift type without the negation:
3059  // -(X >>u 31) -> (X >>s 31)
3060  // -(X >>s 31) -> (X >>u 31)
3061  if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3062  ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3063  if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3064  auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3065  if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3066  return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3067  }
3068  }
3069 
3070  // 0 - X --> 0 if the sub is NUW.
3071  if (N->getFlags().hasNoUnsignedWrap())
3072  return N0;
3073 
3074  if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3075  // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3076  // N1 must be 0 because negating the minimum signed value is undefined.
3077  if (N->getFlags().hasNoSignedWrap())
3078  return N0;
3079 
3080  // 0 - X --> X if X is 0 or the minimum signed value.
3081  return N1;
3082  }
3083  }
3084 
3085  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3086  if (isAllOnesOrAllOnesSplat(N0))
3087  return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3088 
3089  // fold (A - (0-B)) -> A+B
3090  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3091  return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3092 
3093  // fold A-(A-B) -> B
3094  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3095  return N1.getOperand(1);
3096 
3097  // fold (A+B)-A -> B
3098  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3099  return N0.getOperand(1);
3100 
3101  // fold (A+B)-B -> A
3102  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3103  return N0.getOperand(0);
3104 
3105  // fold (A+C1)-C2 -> A+(C1-C2)
3106  if (N0.getOpcode() == ISD::ADD &&
3107  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3108  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3109  SDValue NewC = DAG.FoldConstantArithmetic(
3110  ISD::SUB, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
3111  assert(NewC && "Constant folding failed");
3112  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3113  }
3114 
3115  // fold C2-(A+C1) -> (C2-C1)-A
3116  if (N1.getOpcode() == ISD::ADD) {
3117  SDValue N11 = N1.getOperand(1);
3118  if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3119  isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3120  SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
3121  N11.getNode());
3122  assert(NewC && "Constant folding failed");
3123  return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3124  }
3125  }
3126 
3127  // fold (A-C1)-C2 -> A-(C1+C2)
3128  if (N0.getOpcode() == ISD::SUB &&
3129  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3130  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3131  SDValue NewC = DAG.FoldConstantArithmetic(
3132  ISD::ADD, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
3133  assert(NewC && "Constant folding failed");
3134  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3135  }
3136 
3137  // fold (c1-A)-c2 -> (c1-c2)-A
3138  if (N0.getOpcode() == ISD::SUB &&
3139  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3140  isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3141  SDValue NewC = DAG.FoldConstantArithmetic(
3142  ISD::SUB, DL, VT, N0.getOperand(0).getNode(), N1.getNode());
3143  assert(NewC && "Constant folding failed");
3144  return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3145  }
3146 
3147  // fold ((A+(B+or-C))-B) -> A+or-C
3148  if (N0.getOpcode() == ISD::ADD &&
3149  (N0.getOperand(1).getOpcode() == ISD::SUB ||
3150  N0.getOperand(1).getOpcode() == ISD::ADD) &&
3151  N0.getOperand(1).getOperand(0) == N1)
3152  return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3153  N0.getOperand(1).getOperand(1));
3154 
3155  // fold ((A+(C+B))-B) -> A+C
3156  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3157  N0.getOperand(1).getOperand(1) == N1)
3158  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3159  N0.getOperand(1).getOperand(0));
3160 
3161  // fold ((A-(B-C))-C) -> A-B
3162  if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3163  N0.getOperand(1).getOperand(1) == N1)
3164  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3165  N0.getOperand(1).getOperand(0));
3166 
3167  // fold (A-(B-C)) -> A+(C-B)
3168  if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3169  return DAG.getNode(ISD::ADD, DL, VT, N0,
3170  DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3171  N1.getOperand(0)));
3172 
3173  // fold (X - (-Y * Z)) -> (X + (Y * Z))
3174  if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3175  if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3177  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3178  N1.getOperand(0).getOperand(1),
3179  N1.getOperand(1));
3180  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3181  }
3182  if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3184  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3185  N1.getOperand(0),
3186  N1.getOperand(1).getOperand(1));
3187  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3188  }
3189  }
3190 
3191  // If either operand of a sub is undef, the result is undef
3192  if (N0.isUndef())
3193  return N0;
3194  if (N1.isUndef())
3195  return N1;
3196 
3197  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3198  return V;
3199 
3200  if (SDValue V = foldAddSubOfSignBit(N, DAG))
3201  return V;
3202 
3203  if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3204  return V;
3205 
3206  // (x - y) - 1 -> add (xor y, -1), x
3207  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3208  SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3209  DAG.getAllOnesConstant(DL, VT));
3210  return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3211  }
3212 
3213  // Look for:
3214  // sub y, (xor x, -1)
3215  // And if the target does not like this form then turn into:
3216  // add (add x, y), 1
3217  if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3218  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3219  return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3220  }
3221 
3222  // Hoist one-use addition by non-opaque constant:
3223  // (x + C) - y -> (x - y) + C
3224  if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3225  isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3226  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3227  return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3228  }
3229  // y - (x + C) -> (y - x) - C
3230  if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3231  isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3232  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3233  return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3234  }
3235  // (x - C) - y -> (x - y) - C
3236  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3237  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3238  isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3239  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3240  return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3241  }
3242  // (C - x) - y -> C - (x + y)
3243  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3244  isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3245  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3246  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3247  }
3248 
3249  // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3250  // rather than 'sub 0/1' (the sext should get folded).
3251  // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3252  if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3253  N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3254  TLI.getBooleanContents(VT) ==
3256  SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3257  return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3258  }
3259 
3260  // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3261  if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3262  if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3263  SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3264  SDValue S0 = N1.getOperand(0);
3265  if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
3266  unsigned OpSizeInBits = VT.getScalarSizeInBits();
3268  if (C->getAPIntValue() == (OpSizeInBits - 1))
3269  return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3270  }
3271  }
3272  }
3273 
3274  // If the relocation model supports it, consider symbol offsets.
3275  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3276  if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3277  // fold (sub Sym, c) -> Sym-c
3278  if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3279  return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3280  GA->getOffset() -
3281  (uint64_t)N1C->getSExtValue());
3282  // fold (sub Sym+c1, Sym+c2) -> c1-c2
3283  if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3284  if (GA->getGlobal() == GB->getGlobal())
3285  return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3286  DL, VT);
3287  }
3288 
3289  // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3290  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3291  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3292  if (TN->getVT() == MVT::i1) {
3293  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3294  DAG.getConstant(1, DL, VT));
3295  return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3296  }
3297  }
3298 
3299  // Prefer an add for more folding potential and possibly better codegen:
3300  // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3301  if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3302  SDValue ShAmt = N1.getOperand(1);
3303  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3304  if (ShAmtC &&
3305  ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3306  SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3307  return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3308  }
3309  }
3310 
3311  return SDValue();
3312 }
3313 
3314 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3315  SDValue N0 = N->getOperand(0);
3316  SDValue N1 = N->getOperand(1);
3317  EVT VT = N0.getValueType();
3318  SDLoc DL(N);
3319 
3320  // fold vector ops
3321  if (VT.isVector()) {
3322  // TODO SimplifyVBinOp
3323 
3324  // fold (sub_sat x, 0) -> x, vector edition
3326  return N0;
3327  }
3328 
3329  // fold (sub_sat x, undef) -> 0
3330  if (N0.isUndef() || N1.isUndef())
3331  return DAG.getConstant(0, DL, VT);
3332 
3333  // fold (sub_sat x, x) -> 0
3334  if (N0 == N1)
3335  return DAG.getConstant(0, DL, VT);
3336 
3339  // fold (sub_sat c1, c2) -> c3
3340  return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(),
3341  N1.getNode());
3342  }
3343 
3344  // fold (sub_sat x, 0) -> x
3345  if (isNullConstant(N1))
3346  return N0;
3347 
3348  return SDValue();
3349 }
3350 
3351 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3352  SDValue N0 = N->getOperand(0);
3353  SDValue N1 = N->getOperand(1);
3354  EVT VT = N0.getValueType();
3355  SDLoc DL(N);
3356 
3357  // If the flag result is dead, turn this into an SUB.
3358  if (!N->hasAnyUseOfValue(1))
3359  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3360  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3361 
3362  // fold (subc x, x) -> 0 + no borrow
3363  if (N0 == N1)
3364  return CombineTo(N, DAG.getConstant(0, DL, VT),
3365  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3366 
3367  // fold (subc x, 0) -> x + no borrow
3368  if (isNullConstant(N1))
3369  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3370 
3371  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3372  if (isAllOnesConstant(N0))
3373  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3374  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3375 
3376  return SDValue();
3377 }
3378 
3379 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3380  SDValue N0 = N->getOperand(0);
3381  SDValue N1 = N->getOperand(1);
3382  EVT VT = N0.getValueType();
3383  bool IsSigned = (ISD::SSUBO == N->getOpcode());
3384 
3385  EVT CarryVT = N->getValueType(1);
3386  SDLoc DL(N);
3387 
3388  // If the flag result is dead, turn this into an SUB.
3389  if (!N->hasAnyUseOfValue(1))
3390  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3391  DAG.getUNDEF(CarryVT));
3392 
3393  // fold (subo x, x) -> 0 + no borrow
3394  if (N0 == N1)
3395  return CombineTo(N, DAG.getConstant(0, DL, VT),
3396  DAG.getConstant(0, DL, CarryVT));
3397 
3399 
3400  // fold (subox, c) -> (addo x, -c)
3401  if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3402  return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3403  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3404  }
3405 
3406  // fold (subo x, 0) -> x + no borrow
3407  if (isNullOrNullSplat(N1))
3408  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3409 
3410  // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3411  if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3412  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3413  DAG.getConstant(0, DL, CarryVT));
3414 
3415  return SDValue();
3416 }
3417 
3418 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3419  SDValue N0 = N->getOperand(0);
3420  SDValue N1 = N->getOperand(1);
3421  SDValue CarryIn = N->getOperand(2);
3422 
3423  // fold (sube x, y, false) -> (subc x, y)
3424  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3425  return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3426 
3427  return SDValue();
3428 }
3429 
3430 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3431  SDValue N0 = N->getOperand(0);
3432  SDValue N1 = N->getOperand(1);
3433  SDValue CarryIn = N->getOperand(2);
3434 
3435  // fold (subcarry x, y, false) -> (usubo x, y)
3436  if (isNullConstant(CarryIn)) {
3437  if (!LegalOperations ||
3439  return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3440  }
3441 
3442  return SDValue();
3443 }
3444 
3445 SDValue DAGCombiner::visitMUL(SDNode *N) {
3446  SDValue N0 = N->getOperand(0);
3447  SDValue N1 = N->getOperand(1);
3448  EVT VT = N0.getValueType();
3449 
3450  // fold (mul x, undef) -> 0
3451  if (N0.isUndef() || N1.isUndef())
3452  return DAG.getConstant(0, SDLoc(N), VT);
3453 
3454  bool N0IsConst = false;
3455  bool N1IsConst = false;
3456  bool N1IsOpaqueConst = false;
3457  bool N0IsOpaqueConst = false;
3458  APInt ConstValue0, ConstValue1;
3459  // fold vector ops
3460  if (VT.isVector()) {
3461  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3462  return FoldedVOp;
3463 
3464  N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
3465  N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3466  assert((!N0IsConst ||
3467  ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
3468  "Splat APInt should be element width");
3469  assert((!N1IsConst ||
3470  ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3471  "Splat APInt should be element width");
3472  } else {
3473  N0IsConst = isa<ConstantSDNode>(N0);
3474  if (N0IsConst) {
3475  ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
3476  N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
3477  }
3478  N1IsConst = isa<ConstantSDNode>(N1);
3479  if (N1IsConst) {
3480  ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3481  N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3482  }
3483  }
3484 
3485  // fold (mul c1, c2) -> c1*c2
3486  if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
3487  return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
3488  N0.getNode(), N1.getNode());
3489 
3490  // canonicalize constant to RHS (vector doesn't have to splat)
3493  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3494  // fold (mul x, 0) -> 0
3495  if (N1IsConst && ConstValue1.isNullValue())
3496  return N1;
3497  // fold (mul x, 1) -> x
3498  if (N1IsConst && ConstValue1.isOneValue())
3499  return N0;
3500 
3501  if (SDValue NewSel = foldBinOpIntoSelect(N))
3502  return NewSel;
3503 
3504  // fold (mul x, -1) -> 0-x
3505  if (N1IsConst && ConstValue1.isAllOnesValue()) {
3506  SDLoc DL(N);
3507  return DAG.getNode(ISD::SUB, DL, VT,
3508  DAG.getConstant(0, DL, VT), N0);
3509  }
3510  // fold (mul x, (1 << c)) -> x << c
3511  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3512  DAG.isKnownToBeAPowerOfTwo(N1) &&
3513  (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3514  SDLoc DL(N);
3515  SDValue LogBase2 = BuildLogBase2(N1, DL);
3516  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3517  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3518  return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3519  }
3520  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3521  if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3522  unsigned Log2Val = (-ConstValue1).logBase2();
3523  SDLoc DL(N);
3524  // FIXME: If the input is something that is easily negated (e.g. a
3525  // single-use add), we should put the negate there.
3526  return DAG.getNode(ISD::SUB, DL, VT,
3527  DAG.getConstant(0, DL, VT),
3528  DAG.getNode(ISD::SHL, DL, VT, N0,
3529  DAG.getConstant(Log2Val, DL,
3530  getShiftAmountTy(N0.getValueType()))));
3531  }
3532 
3533  // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3534  // mul x, (2^N + 1) --> add (shl x, N), x
3535  // mul x, (2^N - 1) --> sub (shl x, N), x
3536  // Examples: x * 33 --> (x << 5) + x
3537  // x * 15 --> (x << 4) - x
3538  // x * -33 --> -((x << 5) + x)
3539  // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3540  if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) {
3541  // TODO: We could handle more general decomposition of any constant by
3542  // having the target set a limit on number of ops and making a
3543  // callback to determine that sequence (similar to sqrt expansion).
3544  unsigned MathOp = ISD::DELETED_NODE;
3545  APInt MulC = ConstValue1.abs();
3546  if ((MulC - 1).isPowerOf2())
3547  MathOp = ISD::ADD;
3548  else if ((MulC + 1).isPowerOf2())
3549  MathOp = ISD::SUB;
3550 
3551  if (MathOp != ISD::DELETED_NODE) {
3552  unsigned ShAmt =
3553  MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3554  assert(ShAmt < VT.getScalarSizeInBits() &&
3555  "multiply-by-constant generated out of bounds shift");
3556  SDLoc DL(N);
3557  SDValue Shl =
3558  DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3559  SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
3560  if (ConstValue1.isNegative())
3561  R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3562  return R;
3563  }
3564  }
3565 
3566  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3567  if (N0.getOpcode() == ISD::SHL &&
3568  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3569  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3570  SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3572  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3573  }
3574 
3575  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3576  // use.
3577  {
3578  SDValue Sh(nullptr, 0), Y(nullptr, 0);
3579 
3580  // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
3581  if (N0.getOpcode() == ISD::SHL &&
3583  N0.getNode()->hasOneUse()) {
3584  Sh = N0; Y = N1;
3585  } else if (N1.getOpcode() == ISD::SHL &&
3587  N1.getNode()->hasOneUse()) {
3588  Sh = N1; Y = N0;
3589  }
3590 
3591  if (Sh.getNode()) {
3592  SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3593  return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3594  }
3595  }
3596 
3597  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3599  N0.getOpcode() == ISD::ADD &&
3601  isMulAddWithConstProfitable(N, N0, N1))
3602  return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3603  DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3604  N0.getOperand(0), N1),
3605  DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3606  N0.getOperand(1), N1));
3607 
3608  // reassociate mul
3609  if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3610  return RMUL;
3611 
3612  return SDValue();
3613 }
3614 
3615 /// Return true if divmod libcall is available.
3616 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3617  const TargetLowering &TLI) {
3618  RTLIB::Libcall LC;
3619  EVT NodeType = Node->getValueType(0);
3620  if (!NodeType.isSimple())
3621  return false;
3622  switch (NodeType.getSimpleVT().SimpleTy) {
3623  default: return false; // No libcall for vector types.
3624  case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
3625  case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3626  case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3627  case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3628  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3629  }
3630 
3631  return TLI.getLibcallName(LC) != nullptr;
3632 }
3633 
3634 /// Issue divrem if both quotient and remainder are needed.
3635 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3636  if (Node->use_empty())
3637  return SDValue(); // This is a dead node, leave it alone.
3638 
3639  unsigned Opcode = Node->getOpcode();
3640  bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3641  unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3642 
3643  // DivMod lib calls can still work on non-legal types if using lib-calls.
3644  EVT VT = Node->getValueType(0);
3645  if (VT.isVector() || !VT.isInteger())
3646  return SDValue();
3647 
3648  if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3649  return SDValue();
3650 
3651  // If DIVREM is going to get expanded into a libcall,
3652  // but there is no libcall available, then don't combine.
3653  if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3654  !isDivRemLibcallAvailable(Node, isSigned, TLI))
3655  return SDValue();
3656 
3657  // If div is legal, it's better to do the normal expansion
3658  unsigned OtherOpcode = 0;
3659  if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3660  OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3661  if (TLI.isOperationLegalOrCustom(Opcode, VT))
3662  return SDValue();
3663  } else {
3664  OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3665  if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3666  return SDValue();
3667  }
3668 
3669  SDValue Op0 = Node->getOperand(0);
3670  SDValue Op1 = Node->getOperand(1);
3671  SDValue combined;
3672  for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3673  UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3674  SDNode *User = *UI;
3675  if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3676  User->use_empty())
3677  continue;
3678  // Convert the other matching node(s), too;
3679  // otherwise, the DIVREM may get target-legalized into something
3680  // target-specific that we won't be able to recognize.
3681  unsigned UserOpc = User->getOpcode();
3682  if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3683  User->getOperand(0) == Op0 &&
3684  User->getOperand(1) == Op1) {
3685  if (!combined) {
3686  if (UserOpc == OtherOpcode) {
3687  SDVTList VTs = DAG.getVTList(VT, VT);
3688  combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3689  } else if (UserOpc == DivRemOpc) {
3690  combined = SDValue(User, 0);
3691  } else {
3692  assert(UserOpc == Opcode);
3693  continue;
3694  }
3695  }
3696  if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3697  CombineTo(User, combined);
3698  else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3699  CombineTo(User, combined.getValue(1));
3700  }
3701  }
3702  return combined;
3703 }
3704 
3706  SDValue N0 = N->getOperand(0);
3707  SDValue N1 = N->getOperand(1);
3708  EVT VT = N->getValueType(0);
3709  SDLoc DL(N);
3710 
3711  unsigned Opc = N->getOpcode();
3712  bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
3714 
3715  // X / undef -> undef
3716  // X % undef -> undef
3717  // X / 0 -> undef
3718  // X % 0 -> undef
3719  // NOTE: This includes vectors where any divisor element is zero/undef.
3720  if (DAG.isUndef(Opc, {N0, N1}))
3721  return DAG.getUNDEF(VT);
3722 
3723  // undef / X -> 0
3724  // undef % X -> 0
3725  if (N0.isUndef())
3726  return DAG.getConstant(0, DL, VT);
3727 
3728  // 0 / X -> 0
3729  // 0 % X -> 0
3731  if (N0C && N0C->isNullValue())
3732  return N0;
3733 
3734  // X / X -> 1
3735  // X % X -> 0
3736  if (N0 == N1)
3737  return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
3738 
3739  // X / 1 -> X
3740  // X % 1 -> 0
3741  // If this is a boolean op (single-bit element type), we can't have
3742  // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3743  // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3744  // it's a 1.
3745  if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
3746  return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
3747 
3748  return SDValue();
3749 }
3750 
3751 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3752  SDValue N0 = N->getOperand(0);
3753  SDValue N1 = N->getOperand(1);
3754  EVT VT = N->getValueType(0);
3755  EVT CCVT = getSetCCResultType(VT);
3756 
3757  // fold vector ops
3758  if (VT.isVector())
3759  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3760  return FoldedVOp;
3761 
3762  SDLoc DL(N);
3763 
3764  // fold (sdiv c1, c2) -> c1/c2
3767  if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
3768  return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
3769  // fold (sdiv X, -1) -> 0-X
3770  if (N1C && N1C->isAllOnesValue())
3771  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3772  // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3773  if (N1C && N1C->getAPIntValue().isMinSignedValue())
3774  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3775  DAG.getConstant(1, DL, VT),
3776  DAG.getConstant(0, DL, VT));
3777 
3778  if (SDValue V = simplifyDivRem(N, DAG))
3779  return V;
3780 
3781  if (SDValue NewSel = foldBinOpIntoSelect(N))
3782  return NewSel;
3783 
3784  // If we know the sign bits of both operands are zero, strength reduce to a
3785  // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
3786  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3787  return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3788 
3789  if (SDValue V = visitSDIVLike(N0, N1, N)) {
3790  // If the corresponding remainder node exists, update its users with
3791  // (Dividend - (Quotient * Divisor).
3792  if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
3793  { N0, N1 })) {
3794  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3795  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3796  AddToWorklist(Mul.getNode());
3797  AddToWorklist(Sub.getNode());
3798  CombineTo(RemNode, Sub);
3799  }
3800  return V;
3801  }
3802 
3803  // sdiv, srem -> sdivrem
3804  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3805  // true. Otherwise, we break the simplification logic in visitREM().
3807  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3808  if (SDValue DivRem = useDivRem(N))
3809  return DivRem;
3810 
3811  return SDValue();
3812 }
3813 
3814 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3815  SDLoc DL(N);
3816  EVT VT = N->getValueType(0);
3817  EVT CCVT = getSetCCResultType(VT);
3818  unsigned BitWidth = VT.getScalarSizeInBits();
3819 
3820  // Helper for determining whether a value is a power-2 constant scalar or a
3821  // vector of such elements.
3822  auto IsPowerOfTwo = [](ConstantSDNode *C) {
3823  if (C->isNullValue() || C->isOpaque())
3824  return false;
3825  if (C->getAPIntValue().isPowerOf2())
3826  return true;
3827  if ((-C->getAPIntValue()).isPowerOf2())
3828  return true;
3829  return false;
3830  };
3831 
3832  // fold (sdiv X, pow2) -> simple ops after legalize
3833  // FIXME: We check for the exact bit here because the generic lowering gives
3834  // better results in that case. The target-specific lowering should learn how
3835  // to handle exact sdivs efficiently.
3836  if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3837  // Target-specific implementation of sdiv x, pow2.
3838  if (SDValue Res = BuildSDIVPow2(N))
3839  return Res;
3840 
3841  // Create constants that are functions of the shift amount value.
3842  EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3843  SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3844  SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3845  C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3846  SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3847  if (!isConstantOrConstantVector(Inexact))
3848  return SDValue();
3849 
3850  // Splat the sign bit into the register
3851  SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3852  DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3853  AddToWorklist(Sign.getNode());
3854 
3855  // Add (N0 < 0) ? abs2 - 1 : 0;
3856  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3857  AddToWorklist(Srl.getNode());
3858  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3859  AddToWorklist(Add.getNode());
3860  SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3861  AddToWorklist(Sra.getNode());
3862 
3863  // Special case: (sdiv X, 1) -> X
3864  // Special Case: (sdiv X, -1) -> 0-X
3865  SDValue One = DAG.getConstant(1, DL, VT);
3866  SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3867  SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3868  SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3869  SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3870  Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3871 
3872  // If dividing by a positive value, we're done. Otherwise, the result must
3873  // be negated.
3874  SDValue Zero = DAG.getConstant(0, DL, VT);
3875  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3876 
3877  // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3878  SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3879  SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3880  return Res;
3881  }
3882 
3883  // If integer divide is expensive and we satisfy the requirements, emit an
3884  // alternate sequence. Targets may check function attributes for size/speed
3885  // trade-offs.
3887  if (isConstantOrConstantVector(N1) &&
3888  !TLI.isIntDivCheap(N->getValueType(0), Attr))
3889  if (SDValue Op = BuildSDIV(N))
3890  return Op;
3891 
3892  return SDValue();
3893 }
3894 
3895 SDValue DAGCombiner::visitUDIV(SDNode *N) {
3896  SDValue N0 = N->getOperand(0);
3897  SDValue N1 = N->getOperand(1);
3898  EVT VT = N->getValueType(0);
3899  EVT CCVT = getSetCCResultType(VT);
3900 
3901  // fold vector ops
3902  if (VT.isVector())
3903  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3904  return FoldedVOp;
3905 
3906  SDLoc DL(N);
3907 
3908  // fold (udiv c1, c2) -> c1/c2
3911  if (N0C && N1C)
3912  if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
3913  N0C, N1C))
3914  return Folded;
3915  // fold (udiv X, -1) -> select(X == -1, 1, 0)
3916  if (N1C && N1C->getAPIntValue().isAllOnesValue())
3917  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3918  DAG.getConstant(1, DL, VT),
3919  DAG.getConstant(0, DL, VT));
3920 
3921  if (SDValue V = simplifyDivRem(N, DAG))
3922  return V;
3923 
3924  if (SDValue NewSel = foldBinOpIntoSelect(N))
3925  return NewSel;
3926 
3927  if (SDValue V = visitUDIVLike(N0, N1, N)) {
3928  // If the corresponding remainder node exists, update its users with
3929  // (Dividend - (Quotient * Divisor).
3930  if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
3931  { N0, N1 })) {
3932  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3933  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3934  AddToWorklist(Mul.getNode());
3935  AddToWorklist(Sub.getNode());
3936  CombineTo(RemNode, Sub);
3937  }
3938  return V;
3939  }
3940 
3941  // sdiv, srem -> sdivrem
3942  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3943  // true. Otherwise, we break the simplification logic in visitREM().
3945  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3946  if (SDValue DivRem = useDivRem(N))
3947  return DivRem;
3948 
3949  return SDValue();
3950 }
3951 
3952 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3953  SDLoc DL(N);
3954  EVT VT = N->getValueType(0);
3955 
3956  // fold (udiv x, (1 << c)) -> x >>u c
3957  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3958  DAG.isKnownToBeAPowerOfTwo(N1)) {
3959  SDValue LogBase2 = BuildLogBase2(N1, DL);
3960  AddToWorklist(LogBase2.getNode());
3961 
3962  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3963  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3964  AddToWorklist(Trunc.getNode());
3965  return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3966  }
3967 
3968  // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3969  if (N1.getOpcode() == ISD::SHL) {
3970  SDValue N10 = N1.getOperand(0);
3971  if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3972  DAG.isKnownToBeAPowerOfTwo(N10)) {
3973  SDValue LogBase2 = BuildLogBase2(N10, DL);
3974  AddToWorklist(LogBase2.getNode());
3975 
3976  EVT ADDVT = N1.getOperand(1).getValueType();
3977  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3978  AddToWorklist(Trunc.getNode());
3979  SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3980  AddToWorklist(Add.getNode());
3981  return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3982  }
3983  }
3984 
3985  // fold (udiv x, c) -> alternate
3987  if (isConstantOrConstantVector(N1) &&
3988  !TLI.isIntDivCheap(N->getValueType(0), Attr))
3989  if (SDValue Op = BuildUDIV(N))
3990  return Op;
3991 
3992  return SDValue();
3993 }
3994 
3995 // handles ISD::SREM and ISD::UREM
3996 SDValue DAGCombiner::visitREM(SDNode *N) {
3997  unsigned Opcode = N->getOpcode();
3998  SDValue N0 = N->getOperand(0);
3999  SDValue N1 = N->getOperand(1);
4000  EVT VT = N->getValueType(0);
4001  EVT CCVT = getSetCCResultType(VT);
4002 
4003  bool isSigned = (Opcode == ISD::SREM);
4004  SDLoc DL(N);
4005 
4006  // fold (rem c1, c2) -> c1%c2
4009  if (N0C && N1C)
4010  if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
4011  return Folded;
4012  // fold (urem X, -1) -> select(X == -1, 0, x)
4013  if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
4014  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4015  DAG.getConstant(0, DL, VT), N0);
4016 
4017  if (SDValue V = simplifyDivRem(N, DAG))
4018  return V;
4019 
4020  if (SDValue NewSel = foldBinOpIntoSelect(N))
4021  return NewSel;
4022 
4023  if (isSigned) {
4024  // If we know the sign bits of both operands are zero, strength reduce to a
4025  // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4026  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4027  return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4028  } else {
4029  SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4030  if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4031  // fold (urem x, pow2) -> (and x, pow2-1)
4032  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4033  AddToWorklist(Add.getNode());
4034  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4035  }
4036  if (N1.getOpcode() == ISD::SHL &&
4037  DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
4038  // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4039  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4040  AddToWorklist(Add.getNode());
4041  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4042  }
4043  }
4044 
4046 
4047  // If X/C can be simplified by the division-by-constant logic, lower
4048  // X%C to the equivalent of X-X/C*C.
4049  // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4050  // speculative DIV must not cause a DIVREM conversion. We guard against this
4051  // by skipping the simplification if isIntDivCheap(). When div is not cheap,
4052  // combine will not return a DIVREM. Regardless, checking cheapness here
4053  // makes sense since the simplification results in fatter code.
4054  if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4055  SDValue OptimizedDiv =
4056  isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4057  if (OptimizedDiv.getNode()) {
4058  // If the equivalent Div node also exists, update its users.
4059  unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4060  if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4061  { N0, N1 }))
4062  CombineTo(DivNode, OptimizedDiv);
4063  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4064  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4065  AddToWorklist(OptimizedDiv.getNode());
4066  AddToWorklist(Mul.getNode());
4067  return Sub;
4068  }
4069  }
4070 
4071  // sdiv, srem -> sdivrem
4072  if (SDValue DivRem = useDivRem(N))
4073  return DivRem.getValue(1);
4074 
4075  return SDValue();
4076 }
4077 
4078 SDValue DAGCombiner::visitMULHS(SDNode *N) {
4079  SDValue N0 = N->getOperand(0);
4080  SDValue N1 = N->getOperand(1);
4081  EVT VT = N->getValueType(0);
4082  SDLoc DL(N);
4083 
4084  if (VT.isVector()) {
4085  // fold (mulhs x, 0) -> 0
4087  return N1;
4089  return N0;
4090  }
4091 
4092  // fold (mulhs x, 0) -> 0
4093  if (isNullConstant(N1))
4094  return N1;
4095  // fold (mulhs x, 1) -> (sra x, size(x)-1)
4096  if (isOneConstant(N1))
4097  return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4098  DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
4099  getShiftAmountTy(N0.getValueType())));
4100 
4101  // fold (mulhs x, undef) -> 0
4102  if (N0.isUndef() || N1.isUndef())
4103  return DAG.getConstant(0, DL, VT);
4104 
4105  // If the type twice as wide is legal, transform the mulhs to a wider multiply
4106  // plus a shift.
4107  if (VT.isSimple() && !VT.isVector()) {
4108  MVT Simple = VT.getSimpleVT();
4109  unsigned SimpleSize = Simple.getSizeInBits();
4110  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4111  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4112  N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4113  N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4114  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4115  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4116  DAG.getConstant(SimpleSize, DL,
4117  getShiftAmountTy(N1.getValueType())));
4118  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4119  }
4120  }
4121 
4122  return SDValue();
4123 }
4124 
4125 SDValue DAGCombiner::visitMULHU(SDNode *N) {
4126  SDValue N0 = N->getOperand(0);
4127  SDValue N1 = N->getOperand(1);
4128  EVT VT = N->getValueType(0);
4129  SDLoc DL(N);
4130 
4131  if (VT.isVector()) {
4132  // fold (mulhu x, 0) -> 0
4134  return N1;
4136  return N0;
4137  }
4138 
4139  // fold (mulhu x, 0) -> 0
4140  if (isNullConstant(N1))
4141  return N1;
4142  // fold (mulhu x, 1) -> 0
4143  if (isOneConstant(N1))
4144  return DAG.getConstant(0, DL, N0.getValueType());
4145  // fold (mulhu x, undef) -> 0
4146  if (N0.isUndef() || N1.isUndef())
4147  return DAG.getConstant(0, DL, VT);
4148 
4149  // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4150  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4151  DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4152  unsigned NumEltBits = VT.getScalarSizeInBits();
4153  SDValue LogBase2 = BuildLogBase2(N1, DL);
4154  SDValue SRLAmt = DAG.getNode(
4155  ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4156  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4157  SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4158  return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4159  }
4160 
4161  // If the type twice as wide is legal, transform the mulhu to a wider multiply
4162  // plus a shift.
4163  if (VT.isSimple() && !VT.isVector()) {
4164  MVT Simple = VT.getSimpleVT();
4165  unsigned SimpleSize = Simple.getSizeInBits();
4166  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4167  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4168  N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4169  N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4170  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4171  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4172  DAG.getConstant(SimpleSize, DL,
4173  getShiftAmountTy(N1.getValueType())));
4174  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4175  }
4176  }
4177 
4178  return SDValue();
4179 }
4180 
4181 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4182 /// give the opcodes for the two computations that are being performed. Return
4183 /// true if a simplification was made.
4184 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4185  unsigned HiOp) {
4186  // If the high half is not needed, just compute the low half.
4187  bool HiExists = N->hasAnyUseOfValue(1);
4188  if (!HiExists && (!LegalOperations ||
4189  TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4190  SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4191  return CombineTo(N, Res, Res);
4192  }
4193 
4194  // If the low half is not needed, just compute the high half.
4195  bool LoExists = N->hasAnyUseOfValue(0);
4196  if (!LoExists && (!LegalOperations ||
4197  TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4198  SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4199  return CombineTo(N, Res, Res);
4200  }
4201 
4202  // If both halves are used, return as it is.
4203  if (LoExists && HiExists)
4204  return SDValue();
4205 
4206  // If the two computed results can be simplified separately, separate them.
4207  if (LoExists) {
4208  SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4209  AddToWorklist(Lo.getNode());
4210  SDValue LoOpt = combine(Lo.getNode());
4211  if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4212  (!LegalOperations ||
4213  TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4214  return CombineTo(N, LoOpt, LoOpt);
4215  }
4216 
4217  if (HiExists) {
4218  SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4219  AddToWorklist(Hi.getNode());
4220  SDValue HiOpt = combine(Hi.getNode());
4221  if (HiOpt.getNode() && HiOpt != Hi &&
4222  (!LegalOperations ||
4223  TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4224  return CombineTo(N, HiOpt, HiOpt);
4225  }
4226 
4227  return SDValue();
4228 }
4229 
4230 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4231  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4232  return Res;
4233 
4234  EVT VT = N->getValueType(0);
4235  SDLoc DL(N);
4236 
4237  // If the type is twice as wide is legal, transform the mulhu to a wider
4238  // multiply plus a shift.
4239  if (VT.isSimple() && !VT.isVector()) {
4240  MVT Simple = VT.getSimpleVT();
4241  unsigned SimpleSize = Simple.getSizeInBits();
4242  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4243  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4244  SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4245  SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4246  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4247  // Compute the high part as N1.
4248  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4249  DAG.getConstant(SimpleSize, DL,
4250  getShiftAmountTy(Lo.getValueType())));
4251  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4252  // Compute the low part as N0.
4253  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4254  return CombineTo(N, Lo, Hi);
4255  }
4256  }
4257 
4258  return SDValue();
4259 }
4260 
4261 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4262  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4263  return Res;
4264 
4265  EVT VT = N->getValueType(0);
4266  SDLoc DL(N);
4267 
4268  // If the type is twice as wide is legal, transform the mulhu to a wider
4269  // multiply plus a shift.
4270  if (VT.isSimple() && !VT.isVector()) {
4271  MVT Simple = VT.getSimpleVT();
4272  unsigned SimpleSize = Simple.getSizeInBits();
4273  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4274  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4275  SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4276  SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4277  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4278  // Compute the high part as N1.
4279  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4280  DAG.getConstant(SimpleSize, DL,
4281  getShiftAmountTy(Lo.getValueType())));
4282  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4283  // Compute the low part as N0.
4284  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4285  return CombineTo(N, Lo, Hi);
4286  }
4287  }
4288 
4289  return SDValue();
4290 }
4291 
4292 SDValue DAGCombiner::visitMULO(SDNode *N) {
4293  bool IsSigned = (ISD::SMULO == N->getOpcode());
4294 
4295  // (mulo x, 2) -> (addo x, x)
4297  if (C2->getAPIntValue() == 2)
4298  return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, SDLoc(N),
4299  N->getVTList(), N->getOperand(0), N->getOperand(0));
4300 
4301  return SDValue();
4302 }
4303 
4304 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
4305  SDValue N0 = N->getOperand(0);
4306  SDValue N1 = N->getOperand(1);
4307  EVT VT = N0.getValueType();
4308 
4309  // fold vector ops
4310  if (VT.isVector())
4311  if (SDValue FoldedVOp = SimplifyVBinOp(N))
4312  return FoldedVOp;
4313 
4314  // fold operation with constant operands.
4317  if (N0C && N1C)
4318  return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
4319 
4320  // canonicalize constant to RHS
4323  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
4324 
4325  // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
4326  // Only do this if the current op isn't legal and the flipped is.
4327  unsigned Opcode = N->getOpcode();
4328  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4329  if (!TLI.isOperationLegal(Opcode, VT) &&
4330  (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
4331  (N1.isUndef() || DAG.SignBitIsZero(N1))) {
4332  unsigned AltOpcode;
4333  switch (Opcode) {
4334  case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4335  case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4336  case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4337  case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4338  default: llvm_unreachable("Unknown MINMAX opcode");
4339  }
4340  if (TLI.isOperationLegal(AltOpcode, VT))
4341  return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4342  }
4343 
4344  return SDValue();
4345 }
4346 
4347 /// If this is a bitwise logic instruction and both operands have the same
4348 /// opcode, try to sink the other opcode after the logic instruction.
4349 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4350  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4351  EVT VT = N0.getValueType();
4352  unsigned LogicOpcode = N->getOpcode();
4353  unsigned HandOpcode = N0.getOpcode();
4354  assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
4355  LogicOpcode == ISD::XOR) && "Expected logic opcode");
4356  assert(HandOpcode == N1.getOpcode() && "Bad input!");
4357 
4358  // Bail early if none of these transforms apply.
4359  if (N0.getNumOperands() == 0)
4360  return SDValue();
4361 
4362  // FIXME: We should check number of uses of the operands to not increase
4363  // the instruction count for all transforms.
4364 
4365  // Handle size-changing casts.
4366  SDValue X = N0.getOperand(0);
4367  SDValue Y = N1.getOperand(0);
4368  EVT XVT = X.getValueType();
4369  SDLoc DL(N);
4370  if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4371  HandOpcode == ISD::SIGN_EXTEND) {
4372  // If both operands have other uses, this transform would create extra
4373  // instructions without eliminating anything.
4374  if (!N0.hasOneUse() && !N1.hasOneUse())
4375  return SDValue();
4376  // We need matching integer source types.
4377  if (XVT != Y.getValueType())
4378  return SDValue();
4379  // Don't create an illegal op during or after legalization. Don't ever
4380  // create an unsupported vector op.
4381  if ((VT.isVector() || LegalOperations) &&
4382  !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4383  return SDValue();
4384  // Avoid infinite looping with PromoteIntBinOp.
4385  // TODO: Should we apply desirable/legal constraints to all opcodes?
4386  if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4387  !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4388  return SDValue();
4389  // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4390  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4391  return DAG.getNode(HandOpcode, DL, VT, Logic);
4392  }
4393 
4394  // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4395  if (HandOpcode == ISD::TRUNCATE) {
4396  // If both operands have other uses, this transform would create extra
4397  // instructions without eliminating anything.
4398  if (!N0.hasOneUse() && !N1.hasOneUse())
4399  return SDValue();
4400  // We need matching source types.
4401  if (XVT != Y.getValueType())
4402  return SDValue();
4403  // Don't create an illegal op during or after legalization.
4404  if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
4405  return SDValue();
4406  // Be extra careful sinking truncate. If it's free, there's no benefit in
4407  // widening a binop. Also, don't create a logic op on an illegal type.
4408  if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
4409  return SDValue();
4410  if (!TLI.isTypeLegal(XVT))
4411  return SDValue();
4412  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4413  return DAG.getNode(HandOpcode, DL, VT, Logic);
4414  }
4415 
4416  // For binops SHL/SRL/SRA/AND:
4417  // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4418  if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
4419  HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
4420  N0.getOperand(1) == N1.getOperand(1)) {
4421  // If either operand has other uses, this transform is not an improvement.
4422  if (!N0.hasOneUse() || !N1.hasOneUse())
4423  return SDValue();
4424  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4425  return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4426  }
4427 
4428  // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4429  if (HandOpcode == ISD::BSWAP) {
4430  // If either operand has other uses, this transform is not an improvement.
4431  if (!N0.hasOneUse() || !N1.hasOneUse())
4432  return SDValue();
4433  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4434  return DAG.getNode(HandOpcode, DL, VT, Logic);
4435  }
4436 
4437  // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4438  // Only perform this optimization up until type legalization, before
4439  // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4440  // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4441  // we don't want to undo this promotion.
4442  // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4443  // on scalars.
4444  if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
4445  Level <= AfterLegalizeTypes) {
4446  // Input types must be integer and the same.
4447  if (XVT.isInteger() && XVT == Y.getValueType()) {
4448  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4449  return DAG.getNode(HandOpcode, DL, VT, Logic);
4450  }
4451  }
4452 
4453  // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4454  // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4455  // If both shuffles use the same mask, and both shuffle within a single
4456  // vector, then it is worthwhile to move the swizzle after the operation.
4457  // The type-legalizer generates this pattern when loading illegal
4458  // vector types from memory. In many cases this allows additional shuffle
4459  // optimizations.
4460  // There are other cases where moving the shuffle after the xor/and/or
4461  // is profitable even if shuffles don't perform a swizzle.
4462  // If both shuffles use the same mask, and both shuffles have the same first
4463  // or second operand, then it might still be profitable to move the shuffle
4464  // after the xor/and/or operation.
4465  if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4466  auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4467  auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4468  assert(X.getValueType() == Y.getValueType() &&
4469  "Inputs to shuffles are not the same type");
4470 
4471  // Check that both shuffles use the same mask. The masks are known to be of
4472  // the same length because the result vector type is the same.
4473  // Check also that shuffles have only one use to avoid introducing extra
4474  // instructions.
4475  if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
4476  !SVN0->getMask().equals(SVN1->getMask()))
4477  return SDValue();
4478 
4479  // Don't try to fold this node if it requires introducing a
4480  // build vector of all zeros that might be illegal at this stage.
4481  SDValue ShOp = N0.getOperand(1);
4482  if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4483  ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4484 
4485  // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4486  if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4487  SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4488  N0.getOperand(0), N1.getOperand(0));
4489  return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4490  }
4491 
4492  // Don't try to fold this node if it requires introducing a
4493  // build vector of all zeros that might be illegal at this stage.
4494  ShOp = N0.getOperand(0);
4495  if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4496  ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4497 
4498  // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4499  if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4500  SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4501  N1.getOperand(1));
4502  return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4503  }
4504  }
4505 
4506  return SDValue();
4507 }
4508 
4509 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4510 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4511  const SDLoc &DL) {
4512  SDValue LL, LR, RL, RR, N0CC, N1CC;
4513  if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4514  !isSetCCEquivalent(N1, RL, RR, N1CC))
4515  return SDValue();
4516 
4517  assert(N0.getValueType() == N1.getValueType() &&
4518  "Unexpected operand types for bitwise logic op");
4519  assert(LL.getValueType() == LR.getValueType() &&
4520  RL.getValueType() == RR.getValueType() &&
4521  "Unexpected operand types for setcc");
4522 
4523  // If we're here post-legalization or the logic op type is not i1, the logic
4524  // op type must match a setcc result type. Also, all folds require new
4525  // operations on the left and right operands, so those types must match.
4526  EVT VT = N0.getValueType();
4527  EVT OpVT = LL.getValueType();
4528  if (LegalOperations || VT.getScalarType() != MVT::i1)
4529  if (VT != getSetCCResultType(OpVT))
4530  return SDValue();
4531  if (OpVT != RL.getValueType())
4532  return SDValue();
4533 
4534  ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4535  ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4536  bool IsInteger = OpVT.isInteger();
4537  if (LR == RR && CC0 == CC1 && IsInteger) {
4538  bool IsZero = isNullOrNullSplat(LR);
4539  bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4540 
4541  // All bits clear?
4542  bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
4543  // All sign bits clear?
4544  bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
4545  // Any bits set?
4546  bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
4547  // Any sign bits set?
4548  bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
4549 
4550  // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
4551  // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4552  // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
4553  // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
4554  if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
4555  SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4556  AddToWorklist(Or.getNode());
4557  return DAG.getSetCC(DL, VT, Or, LR, CC1);
4558  }
4559 
4560  // All bits set?
4561  bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
4562  // All sign bits set?
4563  bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
4564  // Any bits clear?
4565  bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
4566  // Any sign bits clear?
4567  bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
4568 
4569  // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4570  // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
4571  // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4572  // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
4573  if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
4574  SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4575  AddToWorklist(And.getNode());
4576  return DAG.getSetCC(DL, VT, And, LR, CC1);
4577  }
4578  }
4579 
4580  // TODO: What is the 'or' equivalent of this fold?
4581  // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4582  if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
4583  IsInteger && CC0 == ISD::SETNE &&
4584  ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
4585  (isAllOnesConstant(LR) && isNullConstant(RR)))) {
4586  SDValue One = DAG.getConstant(1, DL, OpVT);
4587  SDValue Two = DAG.getConstant(2, DL, OpVT);
4588  SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4589  AddToWorklist(Add.getNode());
4590  return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4591  }
4592 
4593  // Try more general transforms if the predicates match and the only user of
4594  // the compares is the 'and' or 'or'.
4595  if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
4596  N0.hasOneUse() && N1.hasOneUse()) {
4597  // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4598  // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4599  if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
4600  SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4601  SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4602  SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4603  SDValue Zero = DAG.getConstant(0, DL, OpVT);
4604  return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4605  }
4606 
4607  // Turn compare of constants whose difference is 1 bit into add+and+setcc.
4608  // TODO - support non-uniform vector amounts.
4609  if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
4610  // Match a shared variable operand and 2 non-opaque constant operands.
4613  if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
4614  // Canonicalize larger constant as C0.
4615  if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
4616  std::swap(C0, C1);
4617 
4618  // The difference of the constants must be a single bit.
4619  const APInt &C0Val = C0->getAPIntValue();
4620  const APInt &C1Val = C1->getAPIntValue();
4621  if ((C0Val - C1Val).isPowerOf2()) {
4622  // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
4623  // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
4624  SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
4625  SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
4626  SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
4627  SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
4628  SDValue Zero = DAG.getConstant(0, DL, OpVT);
4629  return DAG.getSetCC(DL, VT, And, Zero, CC0);
4630  }
4631  }
4632  }
4633  }
4634 
4635  // Canonicalize equivalent operands to LL == RL.
4636  if (LL == RR && LR == RL) {
4637  CC1 = ISD::getSetCCSwappedOperands(CC1);
4638  std::swap(RL, RR);
4639  }
4640 
4641  // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4642  // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4643  if (LL == RL && LR == RR) {
4644  ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
4645  : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
4646  if (NewCC != ISD::SETCC_INVALID &&
4647  (!LegalOperations ||
4648  (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
4649  TLI.isOperationLegal(ISD::SETCC, OpVT))))
4650  return DAG.getSetCC(DL, VT, LL, LR, NewCC);
4651  }
4652 
4653  return SDValue();
4654 }
4655 
4656 /// This contains all DAGCombine rules which reduce two values combined by
4657 /// an And operation to a single value. This makes them reusable in the context
4658 /// of visitSELECT(). Rules involving constants are not included as
4659 /// visitSELECT() already handles those cases.
4660 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
4661  EVT VT = N1.getValueType();
4662  SDLoc DL(N);
4663 
4664  // fold (and x, undef) -> 0
4665  if (N0.isUndef() || N1.isUndef())
4666  return DAG.getConstant(0, DL, VT);
4667 
4668  if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4669  return V;
4670 
4671  if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
4672  VT.getSizeInBits() <= 64) {
4673  if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4674  if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4675  // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4676  // immediate for an add, but it is legal if its top c2 bits are set,
4677  // transform the ADD so the immediate doesn't need to be materialized
4678  // in a register.
4679  APInt ADDC = ADDI->getAPIntValue();
4680  APInt SRLC = SRLI->getAPIntValue();
4681  if (ADDC.getMinSignedBits() <= 64 &&
4682  SRLC.ult(VT.getSizeInBits()) &&
4683  !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4685  SRLC.getZExtValue());
4686  if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4687  ADDC |= Mask;
4688  if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4689  SDLoc DL0(N0);
4690  SDValue NewAdd =
4691  DAG.getNode(ISD::ADD, DL0, VT,
4692  N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4693  CombineTo(N0.getNode(), NewAdd);
4694  // Return N so it doesn't get rechecked!
4695  return SDValue(N, 0);
4696  }
4697  }
4698  }
4699  }
4700  }
4701  }
4702 
4703  // Reduce bit extract of low half of an integer to the narrower type.
4704  // (and (srl i64:x, K), KMask) ->
4705  // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4706  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4707  if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4708  if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4709  unsigned Size = VT.getSizeInBits();
4710  const APInt &AndMask = CAnd->getAPIntValue();
4711  unsigned ShiftBits = CShift->getZExtValue();
4712 
4713  // Bail out, this node will probably disappear anyway.
4714  if (ShiftBits == 0)
4715  return SDValue();
4716 
4717  unsigned MaskBits = AndMask.countTrailingOnes();
4718  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4719 
4720  if (AndMask.isMask() &&
4721  // Required bits must not span the two halves of the integer and
4722  // must fit in the half size type.
4723  (ShiftBits + MaskBits <= Size / 2) &&
4724  TLI.isNarrowingProfitable(VT, HalfVT) &&
4725  TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4726  TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4727  TLI.isTruncateFree(VT, HalfVT) &&
4728  TLI.isZExtFree(HalfVT, VT)) {
4729  // The isNarrowingProfitable is to avoid regressions on PPC and
4730  // AArch64 which match a few 64-bit bit insert / bit extract patterns
4731  // on downstream users of this. Those patterns could probably be
4732  // extended to handle extensions mixed in.
4733 
4734  SDValue SL(N0);
4735  assert(MaskBits <= Size);
4736 
4737  // Extracting the highest bit of the low half.
4738  EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4739  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4740  N0.getOperand(0));
4741 
4742  SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4743  SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4744  SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4745  SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4746  return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4747  }
4748  }
4749  }
4750  }
4751 
4752  return SDValue();
4753 }
4754 
4755 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4756  EVT LoadResultTy, EVT &ExtVT) {
4757  if (!AndC->getAPIntValue().isMask())
4758  return false;
4759 
4760  unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4761 
4762  ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4763  EVT LoadedVT = LoadN->getMemoryVT();
4764 
4765  if (ExtVT == LoadedVT &&
4766  (!LegalOperations ||
4767  TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4768  // ZEXTLOAD will match without needing to change the size of the value being
4769  // loaded.
4770  return true;
4771  }
4772 
4773  // Do not change the width of a volatile load.
4774  if (LoadN->isVolatile())
4775  return false;
4776 
4777  // Do not generate loads of non-round integer types since these can
4778  // be expensive (and would be wrong if the type is not byte sized).
4779  if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4780  return false;
4781 
4782  if (LegalOperations &&
4783  !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4784  return false;
4785 
4786  if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4787  return false;
4788 
4789  return true;
4790 }
4791 
4792 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4793  ISD::LoadExtType ExtType, EVT &MemVT,
4794  unsigned ShAmt) {
4795  if (!LDST)
4796  return false;
4797  // Only allow byte offsets.
4798  if (ShAmt % 8)
4799  return false;
4800 
4801  // Do not generate loads of non-round integer types since these can
4802  // be expensive (and would be wrong if the type is not byte sized).
4803  if (!MemVT.isRound())
4804  return false;
4805 
4806  // Don't change the width of a volatile load.
4807  if (LDST->isVolatile())
4808  return false;
4809 
4810  // Verify that we are actually reducing a load width here.
4811  if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4812  return false;
4813 
4814  // Ensure that this isn't going to produce an unsupported unaligned access.
4815  if (ShAmt &&
4816  !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4817  LDST->getAddressSpace(), ShAmt / 8,
4818  LDST->getMemOperand()->getFlags()))
4819  return false;
4820 
4821  // It's not possible to generate a constant of extended or untyped type.
4822  EVT PtrType = LDST->getBasePtr().getValueType();
4823  if (PtrType == MVT::Untyped || PtrType.isExtended())
4824  return false;
4825 
4826  if (isa<LoadSDNode>(LDST)) {
4827  LoadSDNode *Load = cast<LoadSDNode>(LDST);
4828  // Don't transform one with multiple uses, this would require adding a new
4829  // load.
4830  if (!SDValue(Load, 0).hasOneUse())
4831  return false;
4832 
4833  if (LegalOperations &&
4834  !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4835  return false;
4836 
4837  // For the transform to be legal, the load must produce only two values
4838  // (the value loaded and the chain). Don't transform a pre-increment
4839  // load, for example, which produces an extra value. Otherwise the
4840  // transformation is not equivalent, and the downstream logic to replace
4841  // uses gets things wrong.
4842  if (Load->getNumValues() > 2)
4843  return false;
4844 
4845  // If the load that we're shrinking is an extload and we're not just
4846