LLVM  10.0.0svn
DAGCombiner.cpp
Go to the documentation of this file.
1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10 // both before and after the DAG is legalized.
11 //
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/SmallPtrSet.h"
28 #include "llvm/ADT/SmallSet.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/ADT/Statistic.h"
47 #include "llvm/IR/Attributes.h"
48 #include "llvm/IR/Constant.h"
49 #include "llvm/IR/DataLayout.h"
50 #include "llvm/IR/DerivedTypes.h"
51 #include "llvm/IR/Function.h"
52 #include "llvm/IR/LLVMContext.h"
53 #include "llvm/IR/Metadata.h"
54 #include "llvm/Support/Casting.h"
55 #include "llvm/Support/CodeGen.h"
57 #include "llvm/Support/Compiler.h"
58 #include "llvm/Support/Debug.h"
60 #include "llvm/Support/KnownBits.h"
66 #include <algorithm>
67 #include <cassert>
68 #include <cstdint>
69 #include <functional>
70 #include <iterator>
71 #include <string>
72 #include <tuple>
73 #include <utility>
74 
75 using namespace llvm;
76 
77 #define DEBUG_TYPE "dagcombine"
78 
79 STATISTIC(NodesCombined , "Number of dag nodes combined");
80 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
81 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
82 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
83 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
84 STATISTIC(SlicedLoads, "Number of load sliced");
85 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
86 
87 static cl::opt<bool>
88 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
89  cl::desc("Enable DAG combiner's use of IR alias analysis"));
90 
91 static cl::opt<bool>
92 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
93  cl::desc("Enable DAG combiner's use of TBAA"));
94 
95 #ifndef NDEBUG
97 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
98  cl::desc("Only use DAG-combiner alias analysis in this"
99  " function"));
100 #endif
101 
102 /// Hidden option to stress test load slicing, i.e., when this option
103 /// is enabled, load slicing bypasses most of its profitability guards.
104 static cl::opt<bool>
105 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
106  cl::desc("Bypass the profitability model of load slicing"),
107  cl::init(false));
108 
109 static cl::opt<bool>
110  MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
111  cl::desc("DAG combiner may split indexing from loads"));
112 
113 static cl::opt<bool>
114  EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
115  cl::desc("DAG combiner enable merging multiple stores "
116  "into a wider store"));
117 
119  "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
120  cl::desc("Limit the number of operands to inline for Token Factors"));
121 
123  "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
124  cl::desc("Limit the number of times for the same StoreNode and RootNode "
125  "to bail out in store merging dependence check"));
126 
127 namespace {
128 
129  class DAGCombiner {
130  SelectionDAG &DAG;
131  const TargetLowering &TLI;
133  CodeGenOpt::Level OptLevel;
134  bool LegalOperations = false;
135  bool LegalTypes = false;
136  bool ForCodeSize;
137 
138  /// Worklist of all of the nodes that need to be simplified.
139  ///
140  /// This must behave as a stack -- new nodes to process are pushed onto the
141  /// back and when processing we pop off of the back.
142  ///
143  /// The worklist will not contain duplicates but may contain null entries
144  /// due to nodes being deleted from the underlying DAG.
145  SmallVector<SDNode *, 64> Worklist;
146 
147  /// Mapping from an SDNode to its position on the worklist.
148  ///
149  /// This is used to find and remove nodes from the worklist (by nulling
150  /// them) when they are deleted from the underlying DAG. It relies on
151  /// stable indices of nodes within the worklist.
152  DenseMap<SDNode *, unsigned> WorklistMap;
153  /// This records all nodes attempted to add to the worklist since we
154  /// considered a new worklist entry. As we keep do not add duplicate nodes
155  /// in the worklist, this is different from the tail of the worklist.
156  SmallSetVector<SDNode *, 32> PruningList;
157 
158  /// Set of nodes which have been combined (at least once).
159  ///
160  /// This is used to allow us to reliably add any operands of a DAG node
161  /// which have not yet been combined to the worklist.
162  SmallPtrSet<SDNode *, 32> CombinedNodes;
163 
164  /// Map from candidate StoreNode to the pair of RootNode and count.
165  /// The count is used to track how many times we have seen the StoreNode
166  /// with the same RootNode bail out in dependence check. If we have seen
167  /// the bail out for the same pair many times over a limit, we won't
168  /// consider the StoreNode with the same RootNode as store merging
169  /// candidate again.
171 
172  // AA - Used for DAG load/store alias analysis.
173  AliasAnalysis *AA;
174 
175  /// When an instruction is simplified, add all users of the instruction to
176  /// the work lists because they might get more simplified now.
177  void AddUsersToWorklist(SDNode *N) {
178  for (SDNode *Node : N->uses())
179  AddToWorklist(Node);
180  }
181 
182  // Prune potentially dangling nodes. This is called after
183  // any visit to a node, but should also be called during a visit after any
184  // failed combine which may have created a DAG node.
185  void clearAddedDanglingWorklistEntries() {
186  // Check any nodes added to the worklist to see if they are prunable.
187  while (!PruningList.empty()) {
188  auto *N = PruningList.pop_back_val();
189  if (N->use_empty())
190  recursivelyDeleteUnusedNodes(N);
191  }
192  }
193 
194  SDNode *getNextWorklistEntry() {
195  // Before we do any work, remove nodes that are not in use.
196  clearAddedDanglingWorklistEntries();
197  SDNode *N = nullptr;
198  // The Worklist holds the SDNodes in order, but it may contain null
199  // entries.
200  while (!N && !Worklist.empty()) {
201  N = Worklist.pop_back_val();
202  }
203 
204  if (N) {
205  bool GoodWorklistEntry = WorklistMap.erase(N);
206  (void)GoodWorklistEntry;
207  assert(GoodWorklistEntry &&
208  "Found a worklist entry without a corresponding map entry!");
209  }
210  return N;
211  }
212 
213  /// Call the node-specific routine that folds each particular type of node.
214  SDValue visit(SDNode *N);
215 
216  public:
217  DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
218  : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
219  OptLevel(OL), AA(AA) {
220  ForCodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
221 
222  MaximumLegalStoreInBits = 0;
223  for (MVT VT : MVT::all_valuetypes())
224  if (EVT(VT).isSimple() && VT != MVT::Other &&
225  TLI.isTypeLegal(EVT(VT)) &&
226  VT.getSizeInBits() >= MaximumLegalStoreInBits)
227  MaximumLegalStoreInBits = VT.getSizeInBits();
228  }
229 
230  void ConsiderForPruning(SDNode *N) {
231  // Mark this for potential pruning.
232  PruningList.insert(N);
233  }
234 
235  /// Add to the worklist making sure its instance is at the back (next to be
236  /// processed.)
237  void AddToWorklist(SDNode *N) {
239  "Deleted Node added to Worklist");
240 
241  // Skip handle nodes as they can't usefully be combined and confuse the
242  // zero-use deletion strategy.
243  if (N->getOpcode() == ISD::HANDLENODE)
244  return;
245 
246  ConsiderForPruning(N);
247 
248  if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
249  Worklist.push_back(N);
250  }
251 
252  /// Remove all instances of N from the worklist.
253  void removeFromWorklist(SDNode *N) {
254  CombinedNodes.erase(N);
255  PruningList.remove(N);
256  StoreRootCountMap.erase(N);
257 
258  auto It = WorklistMap.find(N);
259  if (It == WorklistMap.end())
260  return; // Not in the worklist.
261 
262  // Null out the entry rather than erasing it to avoid a linear operation.
263  Worklist[It->second] = nullptr;
264  WorklistMap.erase(It);
265  }
266 
267  void deleteAndRecombine(SDNode *N);
268  bool recursivelyDeleteUnusedNodes(SDNode *N);
269 
270  /// Replaces all uses of the results of one DAG node with new values.
271  SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
272  bool AddTo = true);
273 
274  /// Replaces all uses of the results of one DAG node with new values.
275  SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
276  return CombineTo(N, &Res, 1, AddTo);
277  }
278 
279  /// Replaces all uses of the results of one DAG node with new values.
280  SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
281  bool AddTo = true) {
282  SDValue To[] = { Res0, Res1 };
283  return CombineTo(N, To, 2, AddTo);
284  }
285 
286  void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
287 
288  private:
289  unsigned MaximumLegalStoreInBits;
290 
291  /// Check the specified integer node value to see if it can be simplified or
292  /// if things it uses can be simplified by bit propagation.
293  /// If so, return true.
294  bool SimplifyDemandedBits(SDValue Op) {
295  unsigned BitWidth = Op.getScalarValueSizeInBits();
297  return SimplifyDemandedBits(Op, DemandedBits);
298  }
299 
300  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
301  EVT VT = Op.getValueType();
302  unsigned NumElts = VT.isVector() ? VT.getVectorNumElements() : 1;
303  APInt DemandedElts = APInt::getAllOnesValue(NumElts);
304  return SimplifyDemandedBits(Op, DemandedBits, DemandedElts);
305  }
306 
307  /// Check the specified vector node value to see if it can be simplified or
308  /// if things it uses can be simplified as it only uses some of the
309  /// elements. If so, return true.
310  bool SimplifyDemandedVectorElts(SDValue Op) {
311  unsigned NumElts = Op.getValueType().getVectorNumElements();
312  APInt DemandedElts = APInt::getAllOnesValue(NumElts);
313  return SimplifyDemandedVectorElts(Op, DemandedElts);
314  }
315 
316  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
317  const APInt &DemandedElts);
318  bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
319  bool AssumeSingleUse = false);
320 
321  bool CombineToPreIndexedLoadStore(SDNode *N);
322  bool CombineToPostIndexedLoadStore(SDNode *N);
323  SDValue SplitIndexingFromLoad(LoadSDNode *LD);
324  bool SliceUpLoad(SDNode *N);
325 
326  // Scalars have size 0 to distinguish from singleton vectors.
327  SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
328  bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
329  bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
330 
331  /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
332  /// load.
333  ///
334  /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
335  /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
336  /// \param EltNo index of the vector element to load.
337  /// \param OriginalLoad load that EVE came from to be replaced.
338  /// \returns EVE on success SDValue() on failure.
339  SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
340  SDValue EltNo,
341  LoadSDNode *OriginalLoad);
342  void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
343  SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
344  SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
345  SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
346  SDValue PromoteIntBinOp(SDValue Op);
347  SDValue PromoteIntShiftOp(SDValue Op);
348  SDValue PromoteExtend(SDValue Op);
349  bool PromoteLoad(SDValue Op);
350 
351  /// Call the node-specific routine that knows how to fold each
352  /// particular type of node. If that doesn't do anything, try the
353  /// target-specific DAG combines.
354  SDValue combine(SDNode *N);
355 
356  // Visitation implementation - Implement dag node combining for different
357  // node types. The semantics are as follows:
358  // Return Value:
359  // SDValue.getNode() == 0 - No change was made
360  // SDValue.getNode() == N - N was replaced, is dead and has been handled.
361  // otherwise - N should be replaced by the returned Operand.
362  //
363  SDValue visitTokenFactor(SDNode *N);
364  SDValue visitMERGE_VALUES(SDNode *N);
365  SDValue visitADD(SDNode *N);
366  SDValue visitADDLike(SDNode *N);
367  SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
368  SDValue visitSUB(SDNode *N);
369  SDValue visitADDSAT(SDNode *N);
370  SDValue visitSUBSAT(SDNode *N);
371  SDValue visitADDC(SDNode *N);
372  SDValue visitADDO(SDNode *N);
373  SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
374  SDValue visitSUBC(SDNode *N);
375  SDValue visitSUBO(SDNode *N);
376  SDValue visitADDE(SDNode *N);
377  SDValue visitADDCARRY(SDNode *N);
378  SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
379  SDValue visitSUBE(SDNode *N);
380  SDValue visitSUBCARRY(SDNode *N);
381  SDValue visitMUL(SDNode *N);
382  SDValue visitMULFIX(SDNode *N);
383  SDValue useDivRem(SDNode *N);
384  SDValue visitSDIV(SDNode *N);
385  SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
386  SDValue visitUDIV(SDNode *N);
387  SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
388  SDValue visitREM(SDNode *N);
389  SDValue visitMULHU(SDNode *N);
390  SDValue visitMULHS(SDNode *N);
391  SDValue visitSMUL_LOHI(SDNode *N);
392  SDValue visitUMUL_LOHI(SDNode *N);
393  SDValue visitMULO(SDNode *N);
394  SDValue visitIMINMAX(SDNode *N);
395  SDValue visitAND(SDNode *N);
396  SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
397  SDValue visitOR(SDNode *N);
398  SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
399  SDValue visitXOR(SDNode *N);
400  SDValue SimplifyVBinOp(SDNode *N);
401  SDValue visitSHL(SDNode *N);
402  SDValue visitSRA(SDNode *N);
403  SDValue visitSRL(SDNode *N);
404  SDValue visitFunnelShift(SDNode *N);
405  SDValue visitRotate(SDNode *N);
406  SDValue visitABS(SDNode *N);
407  SDValue visitBSWAP(SDNode *N);
408  SDValue visitBITREVERSE(SDNode *N);
409  SDValue visitCTLZ(SDNode *N);
410  SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
411  SDValue visitCTTZ(SDNode *N);
412  SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
413  SDValue visitCTPOP(SDNode *N);
414  SDValue visitSELECT(SDNode *N);
415  SDValue visitVSELECT(SDNode *N);
416  SDValue visitSELECT_CC(SDNode *N);
417  SDValue visitSETCC(SDNode *N);
418  SDValue visitSETCCCARRY(SDNode *N);
419  SDValue visitSIGN_EXTEND(SDNode *N);
420  SDValue visitZERO_EXTEND(SDNode *N);
421  SDValue visitANY_EXTEND(SDNode *N);
422  SDValue visitAssertExt(SDNode *N);
423  SDValue visitSIGN_EXTEND_INREG(SDNode *N);
424  SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
425  SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
426  SDValue visitTRUNCATE(SDNode *N);
427  SDValue visitBITCAST(SDNode *N);
428  SDValue visitBUILD_PAIR(SDNode *N);
429  SDValue visitFADD(SDNode *N);
430  SDValue visitFSUB(SDNode *N);
431  SDValue visitFMUL(SDNode *N);
432  SDValue visitFMA(SDNode *N);
433  SDValue visitFDIV(SDNode *N);
434  SDValue visitFREM(SDNode *N);
435  SDValue visitFSQRT(SDNode *N);
436  SDValue visitFCOPYSIGN(SDNode *N);
437  SDValue visitFPOW(SDNode *N);
438  SDValue visitSINT_TO_FP(SDNode *N);
439  SDValue visitUINT_TO_FP(SDNode *N);
440  SDValue visitFP_TO_SINT(SDNode *N);
441  SDValue visitFP_TO_UINT(SDNode *N);
442  SDValue visitFP_ROUND(SDNode *N);
443  SDValue visitFP_EXTEND(SDNode *N);
444  SDValue visitFNEG(SDNode *N);
445  SDValue visitFABS(SDNode *N);
446  SDValue visitFCEIL(SDNode *N);
447  SDValue visitFTRUNC(SDNode *N);
448  SDValue visitFFLOOR(SDNode *N);
449  SDValue visitFMINNUM(SDNode *N);
450  SDValue visitFMAXNUM(SDNode *N);
451  SDValue visitFMINIMUM(SDNode *N);
452  SDValue visitFMAXIMUM(SDNode *N);
453  SDValue visitBRCOND(SDNode *N);
454  SDValue visitBR_CC(SDNode *N);
455  SDValue visitLOAD(SDNode *N);
456 
457  SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
458  SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
459 
460  SDValue visitSTORE(SDNode *N);
461  SDValue visitLIFETIME_END(SDNode *N);
462  SDValue visitINSERT_VECTOR_ELT(SDNode *N);
463  SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
464  SDValue visitBUILD_VECTOR(SDNode *N);
465  SDValue visitCONCAT_VECTORS(SDNode *N);
466  SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
467  SDValue visitVECTOR_SHUFFLE(SDNode *N);
468  SDValue visitSCALAR_TO_VECTOR(SDNode *N);
469  SDValue visitINSERT_SUBVECTOR(SDNode *N);
470  SDValue visitMLOAD(SDNode *N);
471  SDValue visitMSTORE(SDNode *N);
472  SDValue visitMGATHER(SDNode *N);
473  SDValue visitMSCATTER(SDNode *N);
474  SDValue visitFP_TO_FP16(SDNode *N);
475  SDValue visitFP16_TO_FP(SDNode *N);
476  SDValue visitVECREDUCE(SDNode *N);
477 
478  SDValue visitFADDForFMACombine(SDNode *N);
479  SDValue visitFSUBForFMACombine(SDNode *N);
480  SDValue visitFMULForFMADistributiveCombine(SDNode *N);
481 
482  SDValue XformToShuffleWithZero(SDNode *N);
483  bool reassociationCanBreakAddressingModePattern(unsigned Opc,
484  const SDLoc &DL, SDValue N0,
485  SDValue N1);
486  SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
487  SDValue N1);
488  SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
489  SDValue N1, SDNodeFlags Flags);
490 
491  SDValue visitShiftByConstant(SDNode *N);
492 
493  SDValue foldSelectOfConstants(SDNode *N);
494  SDValue foldVSelectOfConstants(SDNode *N);
495  SDValue foldBinOpIntoSelect(SDNode *BO);
496  bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
497  SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
498  SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
499  SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
500  SDValue N2, SDValue N3, ISD::CondCode CC,
501  bool NotExtCompare = false);
502  SDValue convertSelectOfFPConstantsToLoadOffset(
503  const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
504  ISD::CondCode CC);
505  SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
506  SDValue N2, SDValue N3, ISD::CondCode CC);
507  SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
508  const SDLoc &DL);
509  SDValue unfoldMaskedMerge(SDNode *N);
510  SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
511  SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
512  const SDLoc &DL, bool foldBooleans);
513  SDValue rebuildSetCC(SDValue N);
514 
515  bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
516  SDValue &CC) const;
517  bool isOneUseSetCC(SDValue N) const;
518  bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y);
519 
520  SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
521  unsigned HiOp);
522  SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
523  SDValue CombineExtLoad(SDNode *N);
524  SDValue CombineZExtLogicopShiftLoad(SDNode *N);
525  SDValue combineRepeatedFPDivisors(SDNode *N);
526  SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
527  SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
528  SDValue BuildSDIV(SDNode *N);
529  SDValue BuildSDIVPow2(SDNode *N);
530  SDValue BuildUDIV(SDNode *N);
531  SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
532  SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
533  SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
534  SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
535  SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
536  SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
537  SDNodeFlags Flags, bool Reciprocal);
538  SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
539  SDNodeFlags Flags, bool Reciprocal);
540  SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
541  bool DemandHighBits = true);
542  SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
543  SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
544  SDValue InnerPos, SDValue InnerNeg,
545  unsigned PosOpcode, unsigned NegOpcode,
546  const SDLoc &DL);
547  SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
548  SDValue MatchLoadCombine(SDNode *N);
549  SDValue MatchStoreCombine(StoreSDNode *N);
550  SDValue ReduceLoadWidth(SDNode *N);
551  SDValue ReduceLoadOpStoreWidth(SDNode *N);
553  SDValue TransformFPLoadStorePair(SDNode *N);
554  SDValue convertBuildVecZextToZext(SDNode *N);
555  SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
556  SDValue reduceBuildVecToShuffle(SDNode *N);
557  SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
558  ArrayRef<int> VectorMask, SDValue VecIn1,
559  SDValue VecIn2, unsigned LeftIdx,
560  bool DidSplitVec);
561  SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
562 
563  /// Walk up chain skipping non-aliasing memory nodes,
564  /// looking for aliasing nodes and adding them to the Aliases vector.
565  void GatherAllAliases(SDNode *N, SDValue OriginalChain,
566  SmallVectorImpl<SDValue> &Aliases);
567 
568  /// Return true if there is any possibility that the two addresses overlap.
569  bool isAlias(SDNode *Op0, SDNode *Op1) const;
570 
571  /// Walk up chain skipping non-aliasing memory nodes, looking for a better
572  /// chain (aliasing node.)
573  SDValue FindBetterChain(SDNode *N, SDValue Chain);
574 
575  /// Try to replace a store and any possibly adjacent stores on
576  /// consecutive chains with better chains. Return true only if St is
577  /// replaced.
578  ///
579  /// Notice that other chains may still be replaced even if the function
580  /// returns false.
581  bool findBetterNeighborChains(StoreSDNode *St);
582 
583  // Helper for findBetterNeighborChains. Walk up store chain add additional
584  // chained stores that do not overlap and can be parallelized.
585  bool parallelizeChainedStores(StoreSDNode *St);
586 
587  /// Holds a pointer to an LSBaseSDNode as well as information on where it
588  /// is located in a sequence of memory operations connected by a chain.
589  struct MemOpLink {
590  // Ptr to the mem node.
591  LSBaseSDNode *MemNode;
592 
593  // Offset from the base ptr.
594  int64_t OffsetFromBase;
595 
596  MemOpLink(LSBaseSDNode *N, int64_t Offset)
597  : MemNode(N), OffsetFromBase(Offset) {}
598  };
599 
600  /// This is a helper function for visitMUL to check the profitability
601  /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
602  /// MulNode is the original multiply, AddNode is (add x, c1),
603  /// and ConstNode is c2.
604  bool isMulAddWithConstProfitable(SDNode *MulNode,
605  SDValue &AddNode,
606  SDValue &ConstNode);
607 
608  /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
609  /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
610  /// the type of the loaded value to be extended.
611  bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
612  EVT LoadResultTy, EVT &ExtVT);
613 
614  /// Helper function to calculate whether the given Load/Store can have its
615  /// width reduced to ExtVT.
616  bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
617  EVT &MemVT, unsigned ShAmt = 0);
618 
619  /// Used by BackwardsPropagateMask to find suitable loads.
620  bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
621  SmallPtrSetImpl<SDNode*> &NodesWithConsts,
622  ConstantSDNode *Mask, SDNode *&NodeToMask);
623  /// Attempt to propagate a given AND node back to load leaves so that they
624  /// can be combined into narrow loads.
625  bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
626 
627  /// Helper function for MergeConsecutiveStores which merges the
628  /// component store chains.
629  SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
630  unsigned NumStores);
631 
632  /// This is a helper function for MergeConsecutiveStores. When the
633  /// source elements of the consecutive stores are all constants or
634  /// all extracted vector elements, try to merge them into one
635  /// larger store introducing bitcasts if necessary. \return True
636  /// if a merged store was created.
637  bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
638  EVT MemVT, unsigned NumStores,
639  bool IsConstantSrc, bool UseVector,
640  bool UseTrunc);
641 
642  /// This is a helper function for MergeConsecutiveStores. Stores
643  /// that potentially may be merged with St are placed in
644  /// StoreNodes. RootNode is a chain predecessor to all store
645  /// candidates.
646  void getStoreMergeCandidates(StoreSDNode *St,
647  SmallVectorImpl<MemOpLink> &StoreNodes,
648  SDNode *&Root);
649 
650  /// Helper function for MergeConsecutiveStores. Checks if
651  /// candidate stores have indirect dependency through their
652  /// operands. RootNode is the predecessor to all stores calculated
653  /// by getStoreMergeCandidates and is used to prune the dependency check.
654  /// \return True if safe to merge.
655  bool checkMergeStoreCandidatesForDependencies(
656  SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
657  SDNode *RootNode);
658 
659  /// Merge consecutive store operations into a wide store.
660  /// This optimization uses wide integers or vectors when possible.
661  /// \return number of stores that were merged into a merged store (the
662  /// affected nodes are stored as a prefix in \p StoreNodes).
663  bool MergeConsecutiveStores(StoreSDNode *St);
664 
665  /// Try to transform a truncation where C is a constant:
666  /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
667  ///
668  /// \p N needs to be a truncation and its first operand an AND. Other
669  /// requirements are checked by the function (e.g. that trunc is
670  /// single-use) and if missed an empty SDValue is returned.
671  SDValue distributeTruncateThroughAnd(SDNode *N);
672 
673  /// Helper function to determine whether the target supports operation
674  /// given by \p Opcode for type \p VT, that is, whether the operation
675  /// is legal or custom before legalizing operations, and whether is
676  /// legal (but not custom) after legalization.
677  bool hasOperation(unsigned Opcode, EVT VT) {
678  if (LegalOperations)
679  return TLI.isOperationLegal(Opcode, VT);
680  return TLI.isOperationLegalOrCustom(Opcode, VT);
681  }
682 
683  public:
684  /// Runs the dag combiner on all nodes in the work list
685  void Run(CombineLevel AtLevel);
686 
687  SelectionDAG &getDAG() const { return DAG; }
688 
689  /// Returns a type large enough to hold any valid shift amount - before type
690  /// legalization these can be huge.
691  EVT getShiftAmountTy(EVT LHSTy) {
692  assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
693  return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
694  }
695 
696  /// This method returns true if we are running before type legalization or
697  /// if the specified VT is legal.
698  bool isTypeLegal(const EVT &VT) {
699  if (!LegalTypes) return true;
700  return TLI.isTypeLegal(VT);
701  }
702 
703  /// Convenience wrapper around TargetLowering::getSetCCResultType
704  EVT getSetCCResultType(EVT VT) const {
705  return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
706  }
707 
708  void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
709  SDValue OrigLoad, SDValue ExtLoad,
711  };
712 
713 /// This class is a DAGUpdateListener that removes any deleted
714 /// nodes from the worklist.
715 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
716  DAGCombiner &DC;
717 
718 public:
719  explicit WorklistRemover(DAGCombiner &dc)
720  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
721 
722  void NodeDeleted(SDNode *N, SDNode *E) override {
723  DC.removeFromWorklist(N);
724  }
725 };
726 
727 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
728  DAGCombiner &DC;
729 
730 public:
731  explicit WorklistInserter(DAGCombiner &dc)
732  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
733 
734  // FIXME: Ideally we could add N to the worklist, but this causes exponential
735  // compile time costs in large DAGs, e.g. Halide.
736  void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
737 };
738 
739 } // end anonymous namespace
740 
741 //===----------------------------------------------------------------------===//
742 // TargetLowering::DAGCombinerInfo implementation
743 //===----------------------------------------------------------------------===//
744 
746  ((DAGCombiner*)DC)->AddToWorklist(N);
747 }
748 
750 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
751  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
752 }
753 
755 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
756  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
757 }
758 
760 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
761  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
762 }
763 
766  return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
767 }
768 
771  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
772 }
773 
774 //===----------------------------------------------------------------------===//
775 // Helper Functions
776 //===----------------------------------------------------------------------===//
777 
778 void DAGCombiner::deleteAndRecombine(SDNode *N) {
779  removeFromWorklist(N);
780 
781  // If the operands of this node are only used by the node, they will now be
782  // dead. Make sure to re-visit them and recursively delete dead nodes.
783  for (const SDValue &Op : N->ops())
784  // For an operand generating multiple values, one of the values may
785  // become dead allowing further simplification (e.g. split index
786  // arithmetic from an indexed load).
787  if (Op->hasOneUse() || Op->getNumValues() > 1)
788  AddToWorklist(Op.getNode());
789 
790  DAG.DeleteNode(N);
791 }
792 
793 // APInts must be the same size for most operations, this helper
794 // function zero extends the shorter of the pair so that they match.
795 // We provide an Offset so that we can create bitwidths that won't overflow.
796 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
797  unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
798  LHS = LHS.zextOrSelf(Bits);
799  RHS = RHS.zextOrSelf(Bits);
800 }
801 
802 // Return true if this node is a setcc, or is a select_cc
803 // that selects between the target values used for true and false, making it
804 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
805 // the appropriate nodes based on the type of node we are checking. This
806 // simplifies life a bit for the callers.
807 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
808  SDValue &CC) const {
809  if (N.getOpcode() == ISD::SETCC) {
810  LHS = N.getOperand(0);
811  RHS = N.getOperand(1);
812  CC = N.getOperand(2);
813  return true;
814  }
815 
816  if (N.getOpcode() != ISD::SELECT_CC ||
817  !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
818  !TLI.isConstFalseVal(N.getOperand(3).getNode()))
819  return false;
820 
821  if (TLI.getBooleanContents(N.getValueType()) ==
823  return false;
824 
825  LHS = N.getOperand(0);
826  RHS = N.getOperand(1);
827  CC = N.getOperand(4);
828  return true;
829 }
830 
831 /// Return true if this is a SetCC-equivalent operation with only one use.
832 /// If this is true, it allows the users to invert the operation for free when
833 /// it is profitable to do so.
834 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
835  SDValue N0, N1, N2;
836  if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
837  return true;
838  return false;
839 }
840 
841 // Returns the SDNode if it is a constant float BuildVector
842 // or constant float.
844  if (isa<ConstantFPSDNode>(N))
845  return N.getNode();
847  return N.getNode();
848  return nullptr;
849 }
850 
851 // Determines if it is a constant integer or a build vector of constant
852 // integers (and undefs).
853 // Do not permit build vector implicit truncation.
854 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
855  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
856  return !(Const->isOpaque() && NoOpaques);
857  if (N.getOpcode() != ISD::BUILD_VECTOR)
858  return false;
859  unsigned BitWidth = N.getScalarValueSizeInBits();
860  for (const SDValue &Op : N->op_values()) {
861  if (Op.isUndef())
862  continue;
864  if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
865  (Const->isOpaque() && NoOpaques))
866  return false;
867  }
868  return true;
869 }
870 
871 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
872 // undef's.
873 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
874  if (V.getOpcode() != ISD::BUILD_VECTOR)
875  return false;
876  return isConstantOrConstantVector(V, NoOpaques) ||
878 }
879 
880 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
881  const SDLoc &DL,
882  SDValue N0,
883  SDValue N1) {
884  // Currently this only tries to ensure we don't undo the GEP splits done by
885  // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
886  // we check if the following transformation would be problematic:
887  // (load/store (add, (add, x, offset1), offset2)) ->
888  // (load/store (add, x, offset1+offset2)).
889 
890  if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
891  return false;
892 
893  if (N0.hasOneUse())
894  return false;
895 
896  auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
897  auto *C2 = dyn_cast<ConstantSDNode>(N1);
898  if (!C1 || !C2)
899  return false;
900 
901  const APInt &C1APIntVal = C1->getAPIntValue();
902  const APInt &C2APIntVal = C2->getAPIntValue();
903  if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
904  return false;
905 
906  const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
907  if (CombinedValueIntVal.getBitWidth() > 64)
908  return false;
909  const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
910 
911  for (SDNode *Node : N0->uses()) {
912  auto LoadStore = dyn_cast<MemSDNode>(Node);
913  if (LoadStore) {
914  // Is x[offset2] already not a legal addressing mode? If so then
915  // reassociating the constants breaks nothing (we test offset2 because
916  // that's the one we hope to fold into the load or store).
918  AM.HasBaseReg = true;
919  AM.BaseOffs = C2APIntVal.getSExtValue();
920  EVT VT = LoadStore->getMemoryVT();
921  unsigned AS = LoadStore->getAddressSpace();
922  Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
923  if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
924  continue;
925 
926  // Would x[offset1+offset2] still be a legal addressing mode?
927  AM.BaseOffs = CombinedValue;
928  if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
929  return true;
930  }
931  }
932 
933  return false;
934 }
935 
936 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
937 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
938 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
939  SDValue N0, SDValue N1) {
940  EVT VT = N0.getValueType();
941 
942  if (N0.getOpcode() != Opc)
943  return SDValue();
944 
945  // Don't reassociate reductions.
946  if (N0->getFlags().hasVectorReduction())
947  return SDValue();
948 
950  if (SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
951  // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
952  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, C1, C2))
953  return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
954  return SDValue();
955  }
956  if (N0.hasOneUse()) {
957  // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
958  // iff (op x, c1) has one use
959  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
960  if (!OpNode.getNode())
961  return SDValue();
962  return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
963  }
964  }
965  return SDValue();
966 }
967 
968 // Try to reassociate commutative binops.
969 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
970  SDValue N1, SDNodeFlags Flags) {
971  assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
972  // Don't reassociate reductions.
973  if (Flags.hasVectorReduction())
974  return SDValue();
975 
976  // Floating-point reassociation is not allowed without loose FP math.
977  if (N0.getValueType().isFloatingPoint() ||
979  if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
980  return SDValue();
981 
982  if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
983  return Combined;
984  if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
985  return Combined;
986  return SDValue();
987 }
988 
989 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
990  bool AddTo) {
991  assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
992  ++NodesCombined;
993  LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
994  To[0].getNode()->dump(&DAG);
995  dbgs() << " and " << NumTo - 1 << " other values\n");
996  for (unsigned i = 0, e = NumTo; i != e; ++i)
997  assert((!To[i].getNode() ||
998  N->getValueType(i) == To[i].getValueType()) &&
999  "Cannot combine value to value of different type!");
1000 
1001  WorklistRemover DeadNodes(*this);
1002  DAG.ReplaceAllUsesWith(N, To);
1003  if (AddTo) {
1004  // Push the new nodes and any users onto the worklist
1005  for (unsigned i = 0, e = NumTo; i != e; ++i) {
1006  if (To[i].getNode()) {
1007  AddToWorklist(To[i].getNode());
1008  AddUsersToWorklist(To[i].getNode());
1009  }
1010  }
1011  }
1012 
1013  // Finally, if the node is now dead, remove it from the graph. The node
1014  // may not be dead if the replacement process recursively simplified to
1015  // something else needing this node.
1016  if (N->use_empty())
1017  deleteAndRecombine(N);
1018  return SDValue(N, 0);
1019 }
1020 
1021 void DAGCombiner::
1022 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1023  // Replace all uses. If any nodes become isomorphic to other nodes and
1024  // are deleted, make sure to remove them from our worklist.
1025  WorklistRemover DeadNodes(*this);
1026  DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1027 
1028  // Push the new node and any (possibly new) users onto the worklist.
1029  AddToWorklist(TLO.New.getNode());
1030  AddUsersToWorklist(TLO.New.getNode());
1031 
1032  // Finally, if the node is now dead, remove it from the graph. The node
1033  // may not be dead if the replacement process recursively simplified to
1034  // something else needing this node.
1035  if (TLO.Old.getNode()->use_empty())
1036  deleteAndRecombine(TLO.Old.getNode());
1037 }
1038 
1039 /// Check the specified integer node value to see if it can be simplified or if
1040 /// things it uses can be simplified by bit propagation. If so, return true.
1041 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1042  const APInt &DemandedElts) {
1043  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1044  KnownBits Known;
1045  if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO))
1046  return false;
1047 
1048  // Revisit the node.
1049  AddToWorklist(Op.getNode());
1050 
1051  // Replace the old value with the new one.
1052  ++NodesCombined;
1053  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1054  dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1055  dbgs() << '\n');
1056 
1057  CommitTargetLoweringOpt(TLO);
1058  return true;
1059 }
1060 
1061 /// Check the specified vector node value to see if it can be simplified or
1062 /// if things it uses can be simplified as it only uses some of the elements.
1063 /// If so, return true.
1064 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1065  const APInt &DemandedElts,
1066  bool AssumeSingleUse) {
1067  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1068  APInt KnownUndef, KnownZero;
1069  if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1070  TLO, 0, AssumeSingleUse))
1071  return false;
1072 
1073  // Revisit the node.
1074  AddToWorklist(Op.getNode());
1075 
1076  // Replace the old value with the new one.
1077  ++NodesCombined;
1078  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1079  dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1080  dbgs() << '\n');
1081 
1082  CommitTargetLoweringOpt(TLO);
1083  return true;
1084 }
1085 
1086 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1087  SDLoc DL(Load);
1088  EVT VT = Load->getValueType(0);
1089  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1090 
1091  LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1092  Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1093  WorklistRemover DeadNodes(*this);
1094  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1095  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1096  deleteAndRecombine(Load);
1097  AddToWorklist(Trunc.getNode());
1098 }
1099 
1100 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1101  Replace = false;
1102  SDLoc DL(Op);
1103  if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1104  LoadSDNode *LD = cast<LoadSDNode>(Op);
1105  EVT MemVT = LD->getMemoryVT();
1107  : LD->getExtensionType();
1108  Replace = true;
1109  return DAG.getExtLoad(ExtType, DL, PVT,
1110  LD->getChain(), LD->getBasePtr(),
1111  MemVT, LD->getMemOperand());
1112  }
1113 
1114  unsigned Opc = Op.getOpcode();
1115  switch (Opc) {
1116  default: break;
1117  case ISD::AssertSext:
1118  if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1119  return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1120  break;
1121  case ISD::AssertZext:
1122  if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1123  return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1124  break;
1125  case ISD::Constant: {
1126  unsigned ExtOpc =
1128  return DAG.getNode(ExtOpc, DL, PVT, Op);
1129  }
1130  }
1131 
1132  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1133  return SDValue();
1134  return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1135 }
1136 
1137 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1139  return SDValue();
1140  EVT OldVT = Op.getValueType();
1141  SDLoc DL(Op);
1142  bool Replace = false;
1143  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1144  if (!NewOp.getNode())
1145  return SDValue();
1146  AddToWorklist(NewOp.getNode());
1147 
1148  if (Replace)
1149  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1150  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1151  DAG.getValueType(OldVT));
1152 }
1153 
1154 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1155  EVT OldVT = Op.getValueType();
1156  SDLoc DL(Op);
1157  bool Replace = false;
1158  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1159  if (!NewOp.getNode())
1160  return SDValue();
1161  AddToWorklist(NewOp.getNode());
1162 
1163  if (Replace)
1164  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1165  return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1166 }
1167 
1168 /// Promote the specified integer binary operation if the target indicates it is
1169 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1170 /// i32 since i16 instructions are longer.
1171 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1172  if (!LegalOperations)
1173  return SDValue();
1174 
1175  EVT VT = Op.getValueType();
1176  if (VT.isVector() || !VT.isInteger())
1177  return SDValue();
1178 
1179  // If operation type is 'undesirable', e.g. i16 on x86, consider
1180  // promoting it.
1181  unsigned Opc = Op.getOpcode();
1182  if (TLI.isTypeDesirableForOp(Opc, VT))
1183  return SDValue();
1184 
1185  EVT PVT = VT;
1186  // Consult target whether it is a good idea to promote this operation and
1187  // what's the right type to promote it to.
1188  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1189  assert(PVT != VT && "Don't know what type to promote to!");
1190 
1191  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1192 
1193  bool Replace0 = false;
1194  SDValue N0 = Op.getOperand(0);
1195  SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1196 
1197  bool Replace1 = false;
1198  SDValue N1 = Op.getOperand(1);
1199  SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1200  SDLoc DL(Op);
1201 
1202  SDValue RV =
1203  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1204 
1205  // We are always replacing N0/N1's use in N and only need
1206  // additional replacements if there are additional uses.
1207  Replace0 &= !N0->hasOneUse();
1208  Replace1 &= (N0 != N1) && !N1->hasOneUse();
1209 
1210  // Combine Op here so it is preserved past replacements.
1211  CombineTo(Op.getNode(), RV);
1212 
1213  // If operands have a use ordering, make sure we deal with
1214  // predecessor first.
1215  if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1216  std::swap(N0, N1);
1217  std::swap(NN0, NN1);
1218  }
1219 
1220  if (Replace0) {
1221  AddToWorklist(NN0.getNode());
1222  ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1223  }
1224  if (Replace1) {
1225  AddToWorklist(NN1.getNode());
1226  ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1227  }
1228  return Op;
1229  }
1230  return SDValue();
1231 }
1232 
1233 /// Promote the specified integer shift operation if the target indicates it is
1234 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1235 /// i32 since i16 instructions are longer.
1236 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1237  if (!LegalOperations)
1238  return SDValue();
1239 
1240  EVT VT = Op.getValueType();
1241  if (VT.isVector() || !VT.isInteger())
1242  return SDValue();
1243 
1244  // If operation type is 'undesirable', e.g. i16 on x86, consider
1245  // promoting it.
1246  unsigned Opc = Op.getOpcode();
1247  if (TLI.isTypeDesirableForOp(Opc, VT))
1248  return SDValue();
1249 
1250  EVT PVT = VT;
1251  // Consult target whether it is a good idea to promote this operation and
1252  // what's the right type to promote it to.
1253  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1254  assert(PVT != VT && "Don't know what type to promote to!");
1255 
1256  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1257 
1258  bool Replace = false;
1259  SDValue N0 = Op.getOperand(0);
1260  SDValue N1 = Op.getOperand(1);
1261  if (Opc == ISD::SRA)
1262  N0 = SExtPromoteOperand(N0, PVT);
1263  else if (Opc == ISD::SRL)
1264  N0 = ZExtPromoteOperand(N0, PVT);
1265  else
1266  N0 = PromoteOperand(N0, PVT, Replace);
1267 
1268  if (!N0.getNode())
1269  return SDValue();
1270 
1271  SDLoc DL(Op);
1272  SDValue RV =
1273  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1274 
1275  if (Replace)
1276  ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1277 
1278  // Deal with Op being deleted.
1279  if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1280  return RV;
1281  }
1282  return SDValue();
1283 }
1284 
1285 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1286  if (!LegalOperations)
1287  return SDValue();
1288 
1289  EVT VT = Op.getValueType();
1290  if (VT.isVector() || !VT.isInteger())
1291  return SDValue();
1292 
1293  // If operation type is 'undesirable', e.g. i16 on x86, consider
1294  // promoting it.
1295  unsigned Opc = Op.getOpcode();
1296  if (TLI.isTypeDesirableForOp(Opc, VT))
1297  return SDValue();
1298 
1299  EVT PVT = VT;
1300  // Consult target whether it is a good idea to promote this operation and
1301  // what's the right type to promote it to.
1302  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1303  assert(PVT != VT && "Don't know what type to promote to!");
1304  // fold (aext (aext x)) -> (aext x)
1305  // fold (aext (zext x)) -> (zext x)
1306  // fold (aext (sext x)) -> (sext x)
1307  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1308  return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1309  }
1310  return SDValue();
1311 }
1312 
1313 bool DAGCombiner::PromoteLoad(SDValue Op) {
1314  if (!LegalOperations)
1315  return false;
1316 
1317  if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1318  return false;
1319 
1320  EVT VT = Op.getValueType();
1321  if (VT.isVector() || !VT.isInteger())
1322  return false;
1323 
1324  // If operation type is 'undesirable', e.g. i16 on x86, consider
1325  // promoting it.
1326  unsigned Opc = Op.getOpcode();
1327  if (TLI.isTypeDesirableForOp(Opc, VT))
1328  return false;
1329 
1330  EVT PVT = VT;
1331  // Consult target whether it is a good idea to promote this operation and
1332  // what's the right type to promote it to.
1333  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1334  assert(PVT != VT && "Don't know what type to promote to!");
1335 
1336  SDLoc DL(Op);
1337  SDNode *N = Op.getNode();
1338  LoadSDNode *LD = cast<LoadSDNode>(N);
1339  EVT MemVT = LD->getMemoryVT();
1341  : LD->getExtensionType();
1342  SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1343  LD->getChain(), LD->getBasePtr(),
1344  MemVT, LD->getMemOperand());
1345  SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1346 
1347  LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1348  Result.getNode()->dump(&DAG); dbgs() << '\n');
1349  WorklistRemover DeadNodes(*this);
1350  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1351  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1352  deleteAndRecombine(N);
1353  AddToWorklist(Result.getNode());
1354  return true;
1355  }
1356  return false;
1357 }
1358 
1359 /// Recursively delete a node which has no uses and any operands for
1360 /// which it is the only use.
1361 ///
1362 /// Note that this both deletes the nodes and removes them from the worklist.
1363 /// It also adds any nodes who have had a user deleted to the worklist as they
1364 /// may now have only one use and subject to other combines.
1365 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1366  if (!N->use_empty())
1367  return false;
1368 
1370  Nodes.insert(N);
1371  do {
1372  N = Nodes.pop_back_val();
1373  if (!N)
1374  continue;
1375 
1376  if (N->use_empty()) {
1377  for (const SDValue &ChildN : N->op_values())
1378  Nodes.insert(ChildN.getNode());
1379 
1380  removeFromWorklist(N);
1381  DAG.DeleteNode(N);
1382  } else {
1383  AddToWorklist(N);
1384  }
1385  } while (!Nodes.empty());
1386  return true;
1387 }
1388 
1389 //===----------------------------------------------------------------------===//
1390 // Main DAG Combiner implementation
1391 //===----------------------------------------------------------------------===//
1392 
1393 void DAGCombiner::Run(CombineLevel AtLevel) {
1394  // set the instance variables, so that the various visit routines may use it.
1395  Level = AtLevel;
1396  LegalOperations = Level >= AfterLegalizeVectorOps;
1397  LegalTypes = Level >= AfterLegalizeTypes;
1398 
1399  WorklistInserter AddNodes(*this);
1400 
1401  // Add all the dag nodes to the worklist.
1402  for (SDNode &Node : DAG.allnodes())
1403  AddToWorklist(&Node);
1404 
1405  // Create a dummy node (which is not added to allnodes), that adds a reference
1406  // to the root node, preventing it from being deleted, and tracking any
1407  // changes of the root.
1408  HandleSDNode Dummy(DAG.getRoot());
1409 
1410  // While we have a valid worklist entry node, try to combine it.
1411  while (SDNode *N = getNextWorklistEntry()) {
1412  // If N has no uses, it is dead. Make sure to revisit all N's operands once
1413  // N is deleted from the DAG, since they too may now be dead or may have a
1414  // reduced number of uses, allowing other xforms.
1415  if (recursivelyDeleteUnusedNodes(N))
1416  continue;
1417 
1418  WorklistRemover DeadNodes(*this);
1419 
1420  // If this combine is running after legalizing the DAG, re-legalize any
1421  // nodes pulled off the worklist.
1422  if (Level == AfterLegalizeDAG) {
1423  SmallSetVector<SDNode *, 16> UpdatedNodes;
1424  bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1425 
1426  for (SDNode *LN : UpdatedNodes) {
1427  AddUsersToWorklist(LN);
1428  AddToWorklist(LN);
1429  }
1430  if (!NIsValid)
1431  continue;
1432  }
1433 
1434  LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1435 
1436  // Add any operands of the new node which have not yet been combined to the
1437  // worklist as well. Because the worklist uniques things already, this
1438  // won't repeatedly process the same operand.
1439  CombinedNodes.insert(N);
1440  for (const SDValue &ChildN : N->op_values())
1441  if (!CombinedNodes.count(ChildN.getNode()))
1442  AddToWorklist(ChildN.getNode());
1443 
1444  SDValue RV = combine(N);
1445 
1446  if (!RV.getNode())
1447  continue;
1448 
1449  ++NodesCombined;
1450 
1451  // If we get back the same node we passed in, rather than a new node or
1452  // zero, we know that the node must have defined multiple values and
1453  // CombineTo was used. Since CombineTo takes care of the worklist
1454  // mechanics for us, we have no work to do in this case.
1455  if (RV.getNode() == N)
1456  continue;
1457 
1459  RV.getOpcode() != ISD::DELETED_NODE &&
1460  "Node was deleted but visit returned new node!");
1461 
1462  LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1463 
1464  if (N->getNumValues() == RV.getNode()->getNumValues())
1465  DAG.ReplaceAllUsesWith(N, RV.getNode());
1466  else {
1467  assert(N->getValueType(0) == RV.getValueType() &&
1468  N->getNumValues() == 1 && "Type mismatch");
1469  DAG.ReplaceAllUsesWith(N, &RV);
1470  }
1471 
1472  // Push the new node and any users onto the worklist
1473  AddToWorklist(RV.getNode());
1474  AddUsersToWorklist(RV.getNode());
1475 
1476  // Finally, if the node is now dead, remove it from the graph. The node
1477  // may not be dead if the replacement process recursively simplified to
1478  // something else needing this node. This will also take care of adding any
1479  // operands which have lost a user to the worklist.
1480  recursivelyDeleteUnusedNodes(N);
1481  }
1482 
1483  // If the root changed (e.g. it was a dead load, update the root).
1484  DAG.setRoot(Dummy.getValue());
1485  DAG.RemoveDeadNodes();
1486 }
1487 
1488 SDValue DAGCombiner::visit(SDNode *N) {
1489  switch (N->getOpcode()) {
1490  default: break;
1491  case ISD::TokenFactor: return visitTokenFactor(N);
1492  case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1493  case ISD::ADD: return visitADD(N);
1494  case ISD::SUB: return visitSUB(N);
1495  case ISD::SADDSAT:
1496  case ISD::UADDSAT: return visitADDSAT(N);
1497  case ISD::SSUBSAT:
1498  case ISD::USUBSAT: return visitSUBSAT(N);
1499  case ISD::ADDC: return visitADDC(N);
1500  case ISD::SADDO:
1501  case ISD::UADDO: return visitADDO(N);
1502  case ISD::SUBC: return visitSUBC(N);
1503  case ISD::SSUBO:
1504  case ISD::USUBO: return visitSUBO(N);
1505  case ISD::ADDE: return visitADDE(N);
1506  case ISD::ADDCARRY: return visitADDCARRY(N);
1507  case ISD::SUBE: return visitSUBE(N);
1508  case ISD::SUBCARRY: return visitSUBCARRY(N);
1509  case ISD::SMULFIX:
1510  case ISD::SMULFIXSAT:
1511  case ISD::UMULFIX:
1512  case ISD::UMULFIXSAT: return visitMULFIX(N);
1513  case ISD::MUL: return visitMUL(N);
1514  case ISD::SDIV: return visitSDIV(N);
1515  case ISD::UDIV: return visitUDIV(N);
1516  case ISD::SREM:
1517  case ISD::UREM: return visitREM(N);
1518  case ISD::MULHU: return visitMULHU(N);
1519  case ISD::MULHS: return visitMULHS(N);
1520  case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1521  case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1522  case ISD::SMULO:
1523  case ISD::UMULO: return visitMULO(N);
1524  case ISD::SMIN:
1525  case ISD::SMAX:
1526  case ISD::UMIN:
1527  case ISD::UMAX: return visitIMINMAX(N);
1528  case ISD::AND: return visitAND(N);
1529  case ISD::OR: return visitOR(N);
1530  case ISD::XOR: return visitXOR(N);
1531  case ISD::SHL: return visitSHL(N);
1532  case ISD::SRA: return visitSRA(N);
1533  case ISD::SRL: return visitSRL(N);
1534  case ISD::ROTR:
1535  case ISD::ROTL: return visitRotate(N);
1536  case ISD::FSHL:
1537  case ISD::FSHR: return visitFunnelShift(N);
1538  case ISD::ABS: return visitABS(N);
1539  case ISD::BSWAP: return visitBSWAP(N);
1540  case ISD::BITREVERSE: return visitBITREVERSE(N);
1541  case ISD::CTLZ: return visitCTLZ(N);
1542  case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1543  case ISD::CTTZ: return visitCTTZ(N);
1544  case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1545  case ISD::CTPOP: return visitCTPOP(N);
1546  case ISD::SELECT: return visitSELECT(N);
1547  case ISD::VSELECT: return visitVSELECT(N);
1548  case ISD::SELECT_CC: return visitSELECT_CC(N);
1549  case ISD::SETCC: return visitSETCC(N);
1550  case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1551  case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1552  case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1553  case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1554  case ISD::AssertSext:
1555  case ISD::AssertZext: return visitAssertExt(N);
1556  case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1557  case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1558  case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1559  case ISD::TRUNCATE: return visitTRUNCATE(N);
1560  case ISD::BITCAST: return visitBITCAST(N);
1561  case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1562  case ISD::FADD: return visitFADD(N);
1563  case ISD::FSUB: return visitFSUB(N);
1564  case ISD::FMUL: return visitFMUL(N);
1565  case ISD::FMA: return visitFMA(N);
1566  case ISD::FDIV: return visitFDIV(N);
1567  case ISD::FREM: return visitFREM(N);
1568  case ISD::FSQRT: return visitFSQRT(N);
1569  case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1570  case ISD::FPOW: return visitFPOW(N);
1571  case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1572  case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1573  case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1574  case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1575  case ISD::FP_ROUND: return visitFP_ROUND(N);
1576  case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1577  case ISD::FNEG: return visitFNEG(N);
1578  case ISD::FABS: return visitFABS(N);
1579  case ISD::FFLOOR: return visitFFLOOR(N);
1580  case ISD::FMINNUM: return visitFMINNUM(N);
1581  case ISD::FMAXNUM: return visitFMAXNUM(N);
1582  case ISD::FMINIMUM: return visitFMINIMUM(N);
1583  case ISD::FMAXIMUM: return visitFMAXIMUM(N);
1584  case ISD::FCEIL: return visitFCEIL(N);
1585  case ISD::FTRUNC: return visitFTRUNC(N);
1586  case ISD::BRCOND: return visitBRCOND(N);
1587  case ISD::BR_CC: return visitBR_CC(N);
1588  case ISD::LOAD: return visitLOAD(N);
1589  case ISD::STORE: return visitSTORE(N);
1590  case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1591  case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1592  case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1593  case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1594  case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1595  case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1596  case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1597  case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1598  case ISD::MGATHER: return visitMGATHER(N);
1599  case ISD::MLOAD: return visitMLOAD(N);
1600  case ISD::MSCATTER: return visitMSCATTER(N);
1601  case ISD::MSTORE: return visitMSTORE(N);
1602  case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1603  case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1604  case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1605  case ISD::VECREDUCE_FADD:
1606  case ISD::VECREDUCE_FMUL:
1607  case ISD::VECREDUCE_ADD:
1608  case ISD::VECREDUCE_MUL:
1609  case ISD::VECREDUCE_AND:
1610  case ISD::VECREDUCE_OR:
1611  case ISD::VECREDUCE_XOR:
1612  case ISD::VECREDUCE_SMAX:
1613  case ISD::VECREDUCE_SMIN:
1614  case ISD::VECREDUCE_UMAX:
1615  case ISD::VECREDUCE_UMIN:
1616  case ISD::VECREDUCE_FMAX:
1617  case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
1618  }
1619  return SDValue();
1620 }
1621 
1622 SDValue DAGCombiner::combine(SDNode *N) {
1623  SDValue RV = visit(N);
1624 
1625  // If nothing happened, try a target-specific DAG combine.
1626  if (!RV.getNode()) {
1628  "Node was deleted but visit returned NULL!");
1629 
1630  if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1632 
1633  // Expose the DAG combiner to the target combiner impls.
1635  DagCombineInfo(DAG, Level, false, this);
1636 
1637  RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1638  }
1639  }
1640 
1641  // If nothing happened still, try promoting the operation.
1642  if (!RV.getNode()) {
1643  switch (N->getOpcode()) {
1644  default: break;
1645  case ISD::ADD:
1646  case ISD::SUB:
1647  case ISD::MUL:
1648  case ISD::AND:
1649  case ISD::OR:
1650  case ISD::XOR:
1651  RV = PromoteIntBinOp(SDValue(N, 0));
1652  break;
1653  case ISD::SHL:
1654  case ISD::SRA:
1655  case ISD::SRL:
1656  RV = PromoteIntShiftOp(SDValue(N, 0));
1657  break;
1658  case ISD::SIGN_EXTEND:
1659  case ISD::ZERO_EXTEND:
1660  case ISD::ANY_EXTEND:
1661  RV = PromoteExtend(SDValue(N, 0));
1662  break;
1663  case ISD::LOAD:
1664  if (PromoteLoad(SDValue(N, 0)))
1665  RV = SDValue(N, 0);
1666  break;
1667  }
1668  }
1669 
1670  // If N is a commutative binary node, try to eliminate it if the commuted
1671  // version is already present in the DAG.
1672  if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1673  N->getNumValues() == 1) {
1674  SDValue N0 = N->getOperand(0);
1675  SDValue N1 = N->getOperand(1);
1676 
1677  // Constant operands are canonicalized to RHS.
1678  if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1679  SDValue Ops[] = {N1, N0};
1680  SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1681  N->getFlags());
1682  if (CSENode)
1683  return SDValue(CSENode, 0);
1684  }
1685  }
1686 
1687  return RV;
1688 }
1689 
1690 /// Given a node, return its input chain if it has one, otherwise return a null
1691 /// sd operand.
1693  if (unsigned NumOps = N->getNumOperands()) {
1694  if (N->getOperand(0).getValueType() == MVT::Other)
1695  return N->getOperand(0);
1696  if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1697  return N->getOperand(NumOps-1);
1698  for (unsigned i = 1; i < NumOps-1; ++i)
1699  if (N->getOperand(i).getValueType() == MVT::Other)
1700  return N->getOperand(i);
1701  }
1702  return SDValue();
1703 }
1704 
1705 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1706  // If N has two operands, where one has an input chain equal to the other,
1707  // the 'other' chain is redundant.
1708  if (N->getNumOperands() == 2) {
1709  if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1710  return N->getOperand(0);
1711  if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1712  return N->getOperand(1);
1713  }
1714 
1715  // Don't simplify token factors if optnone.
1716  if (OptLevel == CodeGenOpt::None)
1717  return SDValue();
1718 
1719  // If the sole user is a token factor, we should make sure we have a
1720  // chance to merge them together. This prevents TF chains from inhibiting
1721  // optimizations.
1722  if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1723  AddToWorklist(*(N->use_begin()));
1724 
1725  SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1726  SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1727  SmallPtrSet<SDNode*, 16> SeenOps;
1728  bool Changed = false; // If we should replace this token factor.
1729 
1730  // Start out with this token factor.
1731  TFs.push_back(N);
1732 
1733  // Iterate through token factors. The TFs grows when new token factors are
1734  // encountered.
1735  for (unsigned i = 0; i < TFs.size(); ++i) {
1736  // Limit number of nodes to inline, to avoid quadratic compile times.
1737  // We have to add the outstanding Token Factors to Ops, otherwise we might
1738  // drop Ops from the resulting Token Factors.
1739  if (Ops.size() > TokenFactorInlineLimit) {
1740  for (unsigned j = i; j < TFs.size(); j++)
1741  Ops.emplace_back(TFs[j], 0);
1742  // Drop unprocessed Token Factors from TFs, so we do not add them to the
1743  // combiner worklist later.
1744  TFs.resize(i);
1745  break;
1746  }
1747 
1748  SDNode *TF = TFs[i];
1749  // Check each of the operands.
1750  for (const SDValue &Op : TF->op_values()) {
1751  switch (Op.getOpcode()) {
1752  case ISD::EntryToken:
1753  // Entry tokens don't need to be added to the list. They are
1754  // redundant.
1755  Changed = true;
1756  break;
1757 
1758  case ISD::TokenFactor:
1759  if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1760  // Queue up for processing.
1761  TFs.push_back(Op.getNode());
1762  Changed = true;
1763  break;
1764  }
1766 
1767  default:
1768  // Only add if it isn't already in the list.
1769  if (SeenOps.insert(Op.getNode()).second)
1770  Ops.push_back(Op);
1771  else
1772  Changed = true;
1773  break;
1774  }
1775  }
1776  }
1777 
1778  // Re-visit inlined Token Factors, to clean them up in case they have been
1779  // removed. Skip the first Token Factor, as this is the current node.
1780  for (unsigned i = 1, e = TFs.size(); i < e; i++)
1781  AddToWorklist(TFs[i]);
1782 
1783  // Remove Nodes that are chained to another node in the list. Do so
1784  // by walking up chains breath-first stopping when we've seen
1785  // another operand. In general we must climb to the EntryNode, but we can exit
1786  // early if we find all remaining work is associated with just one operand as
1787  // no further pruning is possible.
1788 
1789  // List of nodes to search through and original Ops from which they originate.
1791  SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1792  SmallPtrSet<SDNode *, 16> SeenChains;
1793  bool DidPruneOps = false;
1794 
1795  unsigned NumLeftToConsider = 0;
1796  for (const SDValue &Op : Ops) {
1797  Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1798  OpWorkCount.push_back(1);
1799  }
1800 
1801  auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1802  // If this is an Op, we can remove the op from the list. Remark any
1803  // search associated with it as from the current OpNumber.
1804  if (SeenOps.count(Op) != 0) {
1805  Changed = true;
1806  DidPruneOps = true;
1807  unsigned OrigOpNumber = 0;
1808  while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1809  OrigOpNumber++;
1810  assert((OrigOpNumber != Ops.size()) &&
1811  "expected to find TokenFactor Operand");
1812  // Re-mark worklist from OrigOpNumber to OpNumber
1813  for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1814  if (Worklist[i].second == OrigOpNumber) {
1815  Worklist[i].second = OpNumber;
1816  }
1817  }
1818  OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1819  OpWorkCount[OrigOpNumber] = 0;
1820  NumLeftToConsider--;
1821  }
1822  // Add if it's a new chain
1823  if (SeenChains.insert(Op).second) {
1824  OpWorkCount[OpNumber]++;
1825  Worklist.push_back(std::make_pair(Op, OpNumber));
1826  }
1827  };
1828 
1829  for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1830  // We need at least be consider at least 2 Ops to prune.
1831  if (NumLeftToConsider <= 1)
1832  break;
1833  auto CurNode = Worklist[i].first;
1834  auto CurOpNumber = Worklist[i].second;
1835  assert((OpWorkCount[CurOpNumber] > 0) &&
1836  "Node should not appear in worklist");
1837  switch (CurNode->getOpcode()) {
1838  case ISD::EntryToken:
1839  // Hitting EntryToken is the only way for the search to terminate without
1840  // hitting
1841  // another operand's search. Prevent us from marking this operand
1842  // considered.
1843  NumLeftToConsider++;
1844  break;
1845  case ISD::TokenFactor:
1846  for (const SDValue &Op : CurNode->op_values())
1847  AddToWorklist(i, Op.getNode(), CurOpNumber);
1848  break;
1849  case ISD::LIFETIME_START:
1850  case ISD::LIFETIME_END:
1851  case ISD::CopyFromReg:
1852  case ISD::CopyToReg:
1853  AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1854  break;
1855  default:
1856  if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1857  AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1858  break;
1859  }
1860  OpWorkCount[CurOpNumber]--;
1861  if (OpWorkCount[CurOpNumber] == 0)
1862  NumLeftToConsider--;
1863  }
1864 
1865  // If we've changed things around then replace token factor.
1866  if (Changed) {
1867  SDValue Result;
1868  if (Ops.empty()) {
1869  // The entry token is the only possible outcome.
1870  Result = DAG.getEntryNode();
1871  } else {
1872  if (DidPruneOps) {
1873  SmallVector<SDValue, 8> PrunedOps;
1874  //
1875  for (const SDValue &Op : Ops) {
1876  if (SeenChains.count(Op.getNode()) == 0)
1877  PrunedOps.push_back(Op);
1878  }
1879  Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
1880  } else {
1881  Result = DAG.getTokenFactor(SDLoc(N), Ops);
1882  }
1883  }
1884  return Result;
1885  }
1886  return SDValue();
1887 }
1888 
1889 /// MERGE_VALUES can always be eliminated.
1890 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1891  WorklistRemover DeadNodes(*this);
1892  // Replacing results may cause a different MERGE_VALUES to suddenly
1893  // be CSE'd with N, and carry its uses with it. Iterate until no
1894  // uses remain, to ensure that the node can be safely deleted.
1895  // First add the users of this node to the work list so that they
1896  // can be tried again once they have new operands.
1897  AddUsersToWorklist(N);
1898  do {
1899  // Do as a single replacement to avoid rewalking use lists.
1901  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1902  Ops.push_back(N->getOperand(i));
1903  DAG.ReplaceAllUsesWith(N, Ops.data());
1904  } while (!N->use_empty());
1905  deleteAndRecombine(N);
1906  return SDValue(N, 0); // Return N so it doesn't get rechecked!
1907 }
1908 
1909 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1910 /// ConstantSDNode pointer else nullptr.
1913  return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1914 }
1915 
1916 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1917  assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
1918  "Unexpected binary operator");
1919 
1920  // Don't do this unless the old select is going away. We want to eliminate the
1921  // binary operator, not replace a binop with a select.
1922  // TODO: Handle ISD::SELECT_CC.
1923  unsigned SelOpNo = 0;
1924  SDValue Sel = BO->getOperand(0);
1925  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1926  SelOpNo = 1;
1927  Sel = BO->getOperand(1);
1928  }
1929 
1930  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1931  return SDValue();
1932 
1933  SDValue CT = Sel.getOperand(1);
1934  if (!isConstantOrConstantVector(CT, true) &&
1936  return SDValue();
1937 
1938  SDValue CF = Sel.getOperand(2);
1939  if (!isConstantOrConstantVector(CF, true) &&
1941  return SDValue();
1942 
1943  // Bail out if any constants are opaque because we can't constant fold those.
1944  // The exception is "and" and "or" with either 0 or -1 in which case we can
1945  // propagate non constant operands into select. I.e.:
1946  // and (select Cond, 0, -1), X --> select Cond, 0, X
1947  // or X, (select Cond, -1, 0) --> select Cond, -1, X
1948  auto BinOpcode = BO->getOpcode();
1949  bool CanFoldNonConst =
1950  (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
1953 
1954  SDValue CBO = BO->getOperand(SelOpNo ^ 1);
1955  if (!CanFoldNonConst &&
1956  !isConstantOrConstantVector(CBO, true) &&
1958  return SDValue();
1959 
1960  EVT VT = Sel.getValueType();
1961 
1962  // In case of shift value and shift amount may have different VT. For instance
1963  // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
1964  // swapped operands and value types do not match. NB: x86 is fine if operands
1965  // are not swapped with shift amount VT being not bigger than shifted value.
1966  // TODO: that is possible to check for a shift operation, correct VTs and
1967  // still perform optimization on x86 if needed.
1968  if (SelOpNo && VT != CBO.getValueType())
1969  return SDValue();
1970 
1971  // We have a select-of-constants followed by a binary operator with a
1972  // constant. Eliminate the binop by pulling the constant math into the select.
1973  // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
1974  SDLoc DL(Sel);
1975  SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
1976  : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
1977  if (!CanFoldNonConst && !NewCT.isUndef() &&
1978  !isConstantOrConstantVector(NewCT, true) &&
1980  return SDValue();
1981 
1982  SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
1983  : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
1984  if (!CanFoldNonConst && !NewCF.isUndef() &&
1985  !isConstantOrConstantVector(NewCF, true) &&
1987  return SDValue();
1988 
1989  SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1990  SelectOp->setFlags(BO->getFlags());
1991  return SelectOp;
1992 }
1993 
1995  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
1996  "Expecting add or sub");
1997 
1998  // Match a constant operand and a zext operand for the math instruction:
1999  // add Z, C
2000  // sub C, Z
2001  bool IsAdd = N->getOpcode() == ISD::ADD;
2002  SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2003  SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2004  auto *CN = dyn_cast<ConstantSDNode>(C);
2005  if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2006  return SDValue();
2007 
2008  // Match the zext operand as a setcc of a boolean.
2009  if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2010  Z.getOperand(0).getValueType() != MVT::i1)
2011  return SDValue();
2012 
2013  // Match the compare as: setcc (X & 1), 0, eq.
2014  SDValue SetCC = Z.getOperand(0);
2015  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2016  if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2017  SetCC.getOperand(0).getOpcode() != ISD::AND ||
2018  !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2019  return SDValue();
2020 
2021  // We are adding/subtracting a constant and an inverted low bit. Turn that
2022  // into a subtract/add of the low bit with incremented/decremented constant:
2023  // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2024  // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2025  EVT VT = C.getValueType();
2026  SDLoc DL(N);
2027  SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2028  SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2029  DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2030  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2031 }
2032 
2033 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2034 /// a shift and add with a different constant.
2036  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2037  "Expecting add or sub");
2038 
2039  // We need a constant operand for the add/sub, and the other operand is a
2040  // logical shift right: add (srl), C or sub C, (srl).
2041  // TODO - support non-uniform vector amounts.
2042  bool IsAdd = N->getOpcode() == ISD::ADD;
2043  SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2044  SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2045  ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
2046  if (!C || ShiftOp.getOpcode() != ISD::SRL)
2047  return SDValue();
2048 
2049  // The shift must be of a 'not' value.
2050  SDValue Not = ShiftOp.getOperand(0);
2051  if (!Not.hasOneUse() || !isBitwiseNot(Not))
2052  return SDValue();
2053 
2054  // The shift must be moving the sign bit to the least-significant-bit.
2055  EVT VT = ShiftOp.getValueType();
2056  SDValue ShAmt = ShiftOp.getOperand(1);
2057  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2058  if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2059  return SDValue();
2060 
2061  // Eliminate the 'not' by adjusting the shift and add/sub constant:
2062  // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2063  // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2064  SDLoc DL(N);
2065  auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2066  SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2067  APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
2068  return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
2069 }
2070 
2071 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2072 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2073 /// are no common bits set in the operands).
2074 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2075  SDValue N0 = N->getOperand(0);
2076  SDValue N1 = N->getOperand(1);
2077  EVT VT = N0.getValueType();
2078  SDLoc DL(N);
2079 
2080  // fold vector ops
2081  if (VT.isVector()) {
2082  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2083  return FoldedVOp;
2084 
2085  // fold (add x, 0) -> x, vector edition
2087  return N0;
2089  return N1;
2090  }
2091 
2092  // fold (add x, undef) -> undef
2093  if (N0.isUndef())
2094  return N0;
2095 
2096  if (N1.isUndef())
2097  return N1;
2098 
2100  // canonicalize constant to RHS
2102  return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2103  // fold (add c1, c2) -> c1+c2
2104  return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
2105  N1.getNode());
2106  }
2107 
2108  // fold (add x, 0) -> x
2109  if (isNullConstant(N1))
2110  return N0;
2111 
2112  if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2113  // fold ((A-c1)+c2) -> (A+(c2-c1))
2114  if (N0.getOpcode() == ISD::SUB &&
2115  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2116  SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N1.getNode(),
2117  N0.getOperand(1).getNode());
2118  assert(Sub && "Constant folding failed");
2119  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2120  }
2121 
2122  // fold ((c1-A)+c2) -> (c1+c2)-A
2123  if (N0.getOpcode() == ISD::SUB &&
2124  isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2125  SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N1.getNode(),
2126  N0.getOperand(0).getNode());
2127  assert(Add && "Constant folding failed");
2128  return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2129  }
2130 
2131  // add (sext i1 X), 1 -> zext (not i1 X)
2132  // We don't transform this pattern:
2133  // add (zext i1 X), -1 -> sext (not i1 X)
2134  // because most (?) targets generate better code for the zext form.
2135  if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2136  isOneOrOneSplat(N1)) {
2137  SDValue X = N0.getOperand(0);
2138  if ((!LegalOperations ||
2139  (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2140  TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2141  X.getScalarValueSizeInBits() == 1) {
2142  SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2143  return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2144  }
2145  }
2146 
2147  // Undo the add -> or combine to merge constant offsets from a frame index.
2148  if (N0.getOpcode() == ISD::OR &&
2149  isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2150  isa<ConstantSDNode>(N0.getOperand(1)) &&
2151  DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2152  SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2153  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2154  }
2155  }
2156 
2157  if (SDValue NewSel = foldBinOpIntoSelect(N))
2158  return NewSel;
2159 
2160  // reassociate add
2161  if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2162  if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2163  return RADD;
2164  }
2165  // fold ((0-A) + B) -> B-A
2166  if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2167  return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2168 
2169  // fold (A + (0-B)) -> A-B
2170  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2171  return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2172 
2173  // fold (A+(B-A)) -> B
2174  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2175  return N1.getOperand(0);
2176 
2177  // fold ((B-A)+A) -> B
2178  if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2179  return N0.getOperand(0);
2180 
2181  // fold ((A-B)+(C-A)) -> (C-B)
2182  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2183  N0.getOperand(0) == N1.getOperand(1))
2184  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2185  N0.getOperand(1));
2186 
2187  // fold ((A-B)+(B-C)) -> (A-C)
2188  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2189  N0.getOperand(1) == N1.getOperand(0))
2190  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2191  N1.getOperand(1));
2192 
2193  // fold (A+(B-(A+C))) to (B-C)
2194  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2195  N0 == N1.getOperand(1).getOperand(0))
2196  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2197  N1.getOperand(1).getOperand(1));
2198 
2199  // fold (A+(B-(C+A))) to (B-C)
2200  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2201  N0 == N1.getOperand(1).getOperand(1))
2202  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2203  N1.getOperand(1).getOperand(0));
2204 
2205  // fold (A+((B-A)+or-C)) to (B+or-C)
2206  if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2207  N1.getOperand(0).getOpcode() == ISD::SUB &&
2208  N0 == N1.getOperand(0).getOperand(1))
2209  return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2210  N1.getOperand(1));
2211 
2212  // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2213  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2214  SDValue N00 = N0.getOperand(0);
2215  SDValue N01 = N0.getOperand(1);
2216  SDValue N10 = N1.getOperand(0);
2217  SDValue N11 = N1.getOperand(1);
2218 
2220  return DAG.getNode(ISD::SUB, DL, VT,
2221  DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2222  DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2223  }
2224 
2225  // fold (add (umax X, C), -C) --> (usubsat X, C)
2226  if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2227  auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2228  return (!Max && !Op) ||
2229  (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2230  };
2231  if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2232  /*AllowUndefs*/ true))
2233  return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2234  N0.getOperand(1));
2235  }
2236 
2237  if (SimplifyDemandedBits(SDValue(N, 0)))
2238  return SDValue(N, 0);
2239 
2240  if (isOneOrOneSplat(N1)) {
2241  // fold (add (xor a, -1), 1) -> (sub 0, a)
2242  if (isBitwiseNot(N0))
2243  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2244  N0.getOperand(0));
2245 
2246  // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2247  if (N0.getOpcode() == ISD::ADD ||
2248  N0.getOpcode() == ISD::UADDO ||
2249  N0.getOpcode() == ISD::SADDO) {
2250  SDValue A, Xor;
2251 
2252  if (isBitwiseNot(N0.getOperand(0))) {
2253  A = N0.getOperand(1);
2254  Xor = N0.getOperand(0);
2255  } else if (isBitwiseNot(N0.getOperand(1))) {
2256  A = N0.getOperand(0);
2257  Xor = N0.getOperand(1);
2258  }
2259 
2260  if (Xor)
2261  return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2262  }
2263 
2264  // Look for:
2265  // add (add x, y), 1
2266  // And if the target does not like this form then turn into:
2267  // sub y, (xor x, -1)
2268  if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2269  N0.getOpcode() == ISD::ADD) {
2270  SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2271  DAG.getAllOnesConstant(DL, VT));
2272  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2273  }
2274  }
2275 
2276  // (x - y) + -1 -> add (xor y, -1), x
2277  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2279  SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2280  return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2281  }
2282 
2283  if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2284  return Combined;
2285 
2286  if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2287  return Combined;
2288 
2289  return SDValue();
2290 }
2291 
2292 SDValue DAGCombiner::visitADD(SDNode *N) {
2293  SDValue N0 = N->getOperand(0);
2294  SDValue N1 = N->getOperand(1);
2295  EVT VT = N0.getValueType();
2296  SDLoc DL(N);
2297 
2298  if (SDValue Combined = visitADDLike(N))
2299  return Combined;
2300 
2301  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2302  return V;
2303 
2304  if (SDValue V = foldAddSubOfSignBit(N, DAG))
2305  return V;
2306 
2307  // fold (a+b) -> (a|b) iff a and b share no bits.
2308  if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2309  DAG.haveNoCommonBitsSet(N0, N1))
2310  return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2311 
2312  return SDValue();
2313 }
2314 
2315 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2316  unsigned Opcode = N->getOpcode();
2317  SDValue N0 = N->getOperand(0);
2318  SDValue N1 = N->getOperand(1);
2319  EVT VT = N0.getValueType();
2320  SDLoc DL(N);
2321 
2322  // fold vector ops
2323  if (VT.isVector()) {
2324  // TODO SimplifyVBinOp
2325 
2326  // fold (add_sat x, 0) -> x, vector edition
2328  return N0;
2330  return N1;
2331  }
2332 
2333  // fold (add_sat x, undef) -> -1
2334  if (N0.isUndef() || N1.isUndef())
2335  return DAG.getAllOnesConstant(DL, VT);
2336 
2338  // canonicalize constant to RHS
2340  return DAG.getNode(Opcode, DL, VT, N1, N0);
2341  // fold (add_sat c1, c2) -> c3
2342  return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(),
2343  N1.getNode());
2344  }
2345 
2346  // fold (add_sat x, 0) -> x
2347  if (isNullConstant(N1))
2348  return N0;
2349 
2350  // If it cannot overflow, transform into an add.
2351  if (Opcode == ISD::UADDSAT)
2352  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2353  return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2354 
2355  return SDValue();
2356 }
2357 
2358 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2359  bool Masked = false;
2360 
2361  // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2362  while (true) {
2363  if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2364  V = V.getOperand(0);
2365  continue;
2366  }
2367 
2368  if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2369  Masked = true;
2370  V = V.getOperand(0);
2371  continue;
2372  }
2373 
2374  break;
2375  }
2376 
2377  // If this is not a carry, return.
2378  if (V.getResNo() != 1)
2379  return SDValue();
2380 
2381  if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2382  V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2383  return SDValue();
2384 
2385  EVT VT = V.getNode()->getValueType(0);
2386  if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2387  return SDValue();
2388 
2389  // If the result is masked, then no matter what kind of bool it is we can
2390  // return. If it isn't, then we need to make sure the bool type is either 0 or
2391  // 1 and not other values.
2392  if (Masked ||
2393  TLI.getBooleanContents(V.getValueType()) ==
2395  return V;
2396 
2397  return SDValue();
2398 }
2399 
2400 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2401 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2402 /// the opcode and bypass the mask operation.
2403 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2404  SelectionDAG &DAG, const SDLoc &DL) {
2405  if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2406  return SDValue();
2407 
2408  EVT VT = N0.getValueType();
2409  if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2410  return SDValue();
2411 
2412  // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2413  // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2414  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2415 }
2416 
2417 /// Helper for doing combines based on N0 and N1 being added to each other.
2418 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2419  SDNode *LocReference) {
2420  EVT VT = N0.getValueType();
2421  SDLoc DL(LocReference);
2422 
2423  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2424  if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2426  return DAG.getNode(ISD::SUB, DL, VT, N0,
2427  DAG.getNode(ISD::SHL, DL, VT,
2428  N1.getOperand(0).getOperand(1),
2429  N1.getOperand(1)));
2430 
2431  if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2432  return V;
2433 
2434  // Look for:
2435  // add (add x, 1), y
2436  // And if the target does not like this form then turn into:
2437  // sub y, (xor x, -1)
2438  if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2439  N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2440  SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2441  DAG.getAllOnesConstant(DL, VT));
2442  return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2443  }
2444 
2445  // Hoist one-use subtraction by non-opaque constant:
2446  // (x - C) + y -> (x + y) - C
2447  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2448  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2449  isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2450  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2451  return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2452  }
2453  // Hoist one-use subtraction from non-opaque constant:
2454  // (C - x) + y -> (y - x) + C
2455  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2456  isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2457  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2458  return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2459  }
2460 
2461  // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2462  // rather than 'add 0/-1' (the zext should get folded).
2463  // add (sext i1 Y), X --> sub X, (zext i1 Y)
2464  if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2465  N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2467  SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2468  return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2469  }
2470 
2471  // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2472  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2473  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2474  if (TN->getVT() == MVT::i1) {
2475  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2476  DAG.getConstant(1, DL, VT));
2477  return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2478  }
2479  }
2480 
2481  // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2482  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2483  N1.getResNo() == 0)
2484  return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2485  N0, N1.getOperand(0), N1.getOperand(2));
2486 
2487  // (add X, Carry) -> (addcarry X, 0, Carry)
2489  if (SDValue Carry = getAsCarry(TLI, N1))
2490  return DAG.getNode(ISD::ADDCARRY, DL,
2491  DAG.getVTList(VT, Carry.getValueType()), N0,
2492  DAG.getConstant(0, DL, VT), Carry);
2493 
2494  return SDValue();
2495 }
2496 
2497 SDValue DAGCombiner::visitADDC(SDNode *N) {
2498  SDValue N0 = N->getOperand(0);
2499  SDValue N1 = N->getOperand(1);
2500  EVT VT = N0.getValueType();
2501  SDLoc DL(N);
2502 
2503  // If the flag result is dead, turn this into an ADD.
2504  if (!N->hasAnyUseOfValue(1))
2505  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2506  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2507 
2508  // canonicalize constant to RHS.
2511  if (N0C && !N1C)
2512  return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2513 
2514  // fold (addc x, 0) -> x + no carry out
2515  if (isNullConstant(N1))
2516  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2517  DL, MVT::Glue));
2518 
2519  // If it cannot overflow, transform into an add.
2520  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2521  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2522  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2523 
2524  return SDValue();
2525 }
2526 
2527 static SDValue flipBoolean(SDValue V, const SDLoc &DL,
2528  SelectionDAG &DAG, const TargetLowering &TLI) {
2529  EVT VT = V.getValueType();
2530 
2531  SDValue Cst;
2532  switch (TLI.getBooleanContents(VT)) {
2535  Cst = DAG.getConstant(1, DL, VT);
2536  break;
2538  Cst = DAG.getAllOnesConstant(DL, VT);
2539  break;
2540  }
2541 
2542  return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2543 }
2544 
2545 /**
2546  * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2547  * then the flip also occurs if computing the inverse is the same cost.
2548  * This function returns an empty SDValue in case it cannot flip the boolean
2549  * without increasing the cost of the computation. If you want to flip a boolean
2550  * no matter what, use flipBoolean.
2551  */
2553  const TargetLowering &TLI,
2554  bool Force) {
2555  if (Force && isa<ConstantSDNode>(V))
2556  return flipBoolean(V, SDLoc(V), DAG, TLI);
2557 
2558  if (V.getOpcode() != ISD::XOR)
2559  return SDValue();
2560 
2561  ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2562  if (!Const)
2563  return SDValue();
2564 
2565  EVT VT = V.getValueType();
2566 
2567  bool IsFlip = false;
2568  switch(TLI.getBooleanContents(VT)) {
2570  IsFlip = Const->isOne();
2571  break;
2573  IsFlip = Const->isAllOnesValue();
2574  break;
2576  IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2577  break;
2578  }
2579 
2580  if (IsFlip)
2581  return V.getOperand(0);
2582  if (Force)
2583  return flipBoolean(V, SDLoc(V), DAG, TLI);
2584  return SDValue();
2585 }
2586 
2587 SDValue DAGCombiner::visitADDO(SDNode *N) {
2588  SDValue N0 = N->getOperand(0);
2589  SDValue N1 = N->getOperand(1);
2590  EVT VT = N0.getValueType();
2591  bool IsSigned = (ISD::SADDO == N->getOpcode());
2592 
2593  EVT CarryVT = N->getValueType(1);
2594  SDLoc DL(N);
2595 
2596  // If the flag result is dead, turn this into an ADD.
2597  if (!N->hasAnyUseOfValue(1))
2598  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2599  DAG.getUNDEF(CarryVT));
2600 
2601  // canonicalize constant to RHS.
2604  return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2605 
2606  // fold (addo x, 0) -> x + no carry out
2607  if (isNullOrNullSplat(N1))
2608  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2609 
2610  if (!IsSigned) {
2611  // If it cannot overflow, transform into an add.
2612  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2613  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2614  DAG.getConstant(0, DL, CarryVT));
2615 
2616  // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2617  if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2618  SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2619  DAG.getConstant(0, DL, VT), N0.getOperand(0));
2620  return CombineTo(N, Sub,
2621  flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2622  }
2623 
2624  if (SDValue Combined = visitUADDOLike(N0, N1, N))
2625  return Combined;
2626 
2627  if (SDValue Combined = visitUADDOLike(N1, N0, N))
2628  return Combined;
2629  }
2630 
2631  return SDValue();
2632 }
2633 
2634 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2635  EVT VT = N0.getValueType();
2636  if (VT.isVector())
2637  return SDValue();
2638 
2639  // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2640  // If Y + 1 cannot overflow.
2641  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2642  SDValue Y = N1.getOperand(0);
2643  SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2644  if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2645  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2646  N1.getOperand(2));
2647  }
2648 
2649  // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2651  if (SDValue Carry = getAsCarry(TLI, N1))
2652  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2653  DAG.getConstant(0, SDLoc(N), VT), Carry);
2654 
2655  return SDValue();
2656 }
2657 
2658 SDValue DAGCombiner::visitADDE(SDNode *N) {
2659  SDValue N0 = N->getOperand(0);
2660  SDValue N1 = N->getOperand(1);
2661  SDValue CarryIn = N->getOperand(2);
2662 
2663  // canonicalize constant to RHS
2666  if (N0C && !N1C)
2667  return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2668  N1, N0, CarryIn);
2669 
2670  // fold (adde x, y, false) -> (addc x, y)
2671  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2672  return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2673 
2674  return SDValue();
2675 }
2676 
2677 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2678  SDValue N0 = N->getOperand(0);
2679  SDValue N1 = N->getOperand(1);
2680  SDValue CarryIn = N->getOperand(2);
2681  SDLoc DL(N);
2682 
2683  // canonicalize constant to RHS
2686  if (N0C && !N1C)
2687  return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2688 
2689  // fold (addcarry x, y, false) -> (uaddo x, y)
2690  if (isNullConstant(CarryIn)) {
2691  if (!LegalOperations ||
2693  return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2694  }
2695 
2696  // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2697  if (isNullConstant(N0) && isNullConstant(N1)) {
2698  EVT VT = N0.getValueType();
2699  EVT CarryVT = CarryIn.getValueType();
2700  SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2701  AddToWorklist(CarryExt.getNode());
2702  return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2703  DAG.getConstant(1, DL, VT)),
2704  DAG.getConstant(0, DL, CarryVT));
2705  }
2706 
2707  if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2708  return Combined;
2709 
2710  if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2711  return Combined;
2712 
2713  return SDValue();
2714 }
2715 
2716 /**
2717  * If we are facing some sort of diamond carry propapagtion pattern try to
2718  * break it up to generate something like:
2719  * (addcarry X, 0, (addcarry A, B, Z):Carry)
2720  *
2721  * The end result is usually an increase in operation required, but because the
2722  * carry is now linearized, other tranforms can kick in and optimize the DAG.
2723  *
2724  * Patterns typically look something like
2725  * (uaddo A, B)
2726  * / \
2727  * Carry Sum
2728  * | \
2729  * | (addcarry *, 0, Z)
2730  * | /
2731  * \ Carry
2732  * | /
2733  * (addcarry X, *, *)
2734  *
2735  * But numerous variation exist. Our goal is to identify A, B, X and Z and
2736  * produce a combine with a single path for carry propagation.
2737  */
2739  SDValue X, SDValue Carry0, SDValue Carry1,
2740  SDNode *N) {
2741  if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2742  return SDValue();
2743  if (Carry1.getOpcode() != ISD::UADDO)
2744  return SDValue();
2745 
2746  SDValue Z;
2747 
2748  /**
2749  * First look for a suitable Z. It will present itself in the form of
2750  * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2751  */
2752  if (Carry0.getOpcode() == ISD::ADDCARRY &&
2753  isNullConstant(Carry0.getOperand(1))) {
2754  Z = Carry0.getOperand(2);
2755  } else if (Carry0.getOpcode() == ISD::UADDO &&
2756  isOneConstant(Carry0.getOperand(1))) {
2757  EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2758  Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2759  } else {
2760  // We couldn't find a suitable Z.
2761  return SDValue();
2762  }
2763 
2764 
2765  auto cancelDiamond = [&](SDValue A,SDValue B) {
2766  SDLoc DL(N);
2767  SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
2768  Combiner.AddToWorklist(NewY.getNode());
2769  return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
2770  DAG.getConstant(0, DL, X.getValueType()),
2771  NewY.getValue(1));
2772  };
2773 
2774  /**
2775  * (uaddo A, B)
2776  * |
2777  * Sum
2778  * |
2779  * (addcarry *, 0, Z)
2780  */
2781  if (Carry0.getOperand(0) == Carry1.getValue(0)) {
2782  return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
2783  }
2784 
2785  /**
2786  * (addcarry A, 0, Z)
2787  * |
2788  * Sum
2789  * |
2790  * (uaddo *, B)
2791  */
2792  if (Carry1.getOperand(0) == Carry0.getValue(0)) {
2793  return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
2794  }
2795 
2796  if (Carry1.getOperand(1) == Carry0.getValue(0)) {
2797  return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
2798  }
2799 
2800  return SDValue();
2801 }
2802 
2803 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2804  SDNode *N) {
2805  // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
2806  if (isBitwiseNot(N0))
2807  if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
2808  SDLoc DL(N);
2809  SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
2810  N0.getOperand(0), NotC);
2811  return CombineTo(N, Sub,
2812  flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2813  }
2814 
2815  // Iff the flag result is dead:
2816  // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2817  // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
2818  // or the dependency between the instructions.
2819  if ((N0.getOpcode() == ISD::ADD ||
2820  (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
2821  N0.getValue(1) != CarryIn)) &&
2822  isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2823  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2824  N0.getOperand(0), N0.getOperand(1), CarryIn);
2825 
2826  /**
2827  * When one of the addcarry argument is itself a carry, we may be facing
2828  * a diamond carry propagation. In which case we try to transform the DAG
2829  * to ensure linear carry propagation if that is possible.
2830  */
2831  if (auto Y = getAsCarry(TLI, N1)) {
2832  // Because both are carries, Y and Z can be swapped.
2833  if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
2834  return R;
2835  if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
2836  return R;
2837  }
2838 
2839  return SDValue();
2840 }
2841 
2842 // Since it may not be valid to emit a fold to zero for vector initializers
2843 // check if we can before folding.
2844 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2845  SelectionDAG &DAG, bool LegalOperations) {
2846  if (!VT.isVector())
2847  return DAG.getConstant(0, DL, VT);
2848  if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2849  return DAG.getConstant(0, DL, VT);
2850  return SDValue();
2851 }
2852 
2853 SDValue DAGCombiner::visitSUB(SDNode *N) {
2854  SDValue N0 = N->getOperand(0);
2855  SDValue N1 = N->getOperand(1);
2856  EVT VT = N0.getValueType();
2857  SDLoc DL(N);
2858 
2859  // fold vector ops
2860  if (VT.isVector()) {
2861  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2862  return FoldedVOp;
2863 
2864  // fold (sub x, 0) -> x, vector edition
2866  return N0;
2867  }
2868 
2869  // fold (sub x, x) -> 0
2870  // FIXME: Refactor this and xor and other similar operations together.
2871  if (N0 == N1)
2872  return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
2875  // fold (sub c1, c2) -> c1-c2
2876  return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2877  N1.getNode());
2878  }
2879 
2880  if (SDValue NewSel = foldBinOpIntoSelect(N))
2881  return NewSel;
2882 
2884 
2885  // fold (sub x, c) -> (add x, -c)
2886  if (N1C) {
2887  return DAG.getNode(ISD::ADD, DL, VT, N0,
2888  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2889  }
2890 
2891  if (isNullOrNullSplat(N0)) {
2892  unsigned BitWidth = VT.getScalarSizeInBits();
2893  // Right-shifting everything out but the sign bit followed by negation is
2894  // the same as flipping arithmetic/logical shift type without the negation:
2895  // -(X >>u 31) -> (X >>s 31)
2896  // -(X >>s 31) -> (X >>u 31)
2897  if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2898  ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2899  if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
2900  auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2901  if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2902  return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2903  }
2904  }
2905 
2906  // 0 - X --> 0 if the sub is NUW.
2907  if (N->getFlags().hasNoUnsignedWrap())
2908  return N0;
2909 
2910  if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2911  // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2912  // N1 must be 0 because negating the minimum signed value is undefined.
2913  if (N->getFlags().hasNoSignedWrap())
2914  return N0;
2915 
2916  // 0 - X --> X if X is 0 or the minimum signed value.
2917  return N1;
2918  }
2919  }
2920 
2921  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2922  if (isAllOnesOrAllOnesSplat(N0))
2923  return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2924 
2925  // fold (A - (0-B)) -> A+B
2926  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2927  return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
2928 
2929  // fold A-(A-B) -> B
2930  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2931  return N1.getOperand(1);
2932 
2933  // fold (A+B)-A -> B
2934  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2935  return N0.getOperand(1);
2936 
2937  // fold (A+B)-B -> A
2938  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2939  return N0.getOperand(0);
2940 
2941  // fold (A+C1)-C2 -> A+(C1-C2)
2942  if (N0.getOpcode() == ISD::ADD &&
2943  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2944  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2945  SDValue NewC = DAG.FoldConstantArithmetic(
2946  ISD::SUB, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
2947  assert(NewC && "Constant folding failed");
2948  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
2949  }
2950 
2951  // fold C2-(A+C1) -> (C2-C1)-A
2952  if (N1.getOpcode() == ISD::ADD) {
2953  SDValue N11 = N1.getOperand(1);
2954  if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2955  isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2956  SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2957  N11.getNode());
2958  assert(NewC && "Constant folding failed");
2959  return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2960  }
2961  }
2962 
2963  // fold (A-C1)-C2 -> A-(C1+C2)
2964  if (N0.getOpcode() == ISD::SUB &&
2965  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2966  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2967  SDValue NewC = DAG.FoldConstantArithmetic(
2968  ISD::ADD, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
2969  assert(NewC && "Constant folding failed");
2970  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
2971  }
2972 
2973  // fold (c1-A)-c2 -> (c1-c2)-A
2974  if (N0.getOpcode() == ISD::SUB &&
2975  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2976  isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
2977  SDValue NewC = DAG.FoldConstantArithmetic(
2978  ISD::SUB, DL, VT, N0.getOperand(0).getNode(), N1.getNode());
2979  assert(NewC && "Constant folding failed");
2980  return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
2981  }
2982 
2983  // fold ((A+(B+or-C))-B) -> A+or-C
2984  if (N0.getOpcode() == ISD::ADD &&
2985  (N0.getOperand(1).getOpcode() == ISD::SUB ||
2986  N0.getOperand(1).getOpcode() == ISD::ADD) &&
2987  N0.getOperand(1).getOperand(0) == N1)
2988  return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2989  N0.getOperand(1).getOperand(1));
2990 
2991  // fold ((A+(C+B))-B) -> A+C
2992  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2993  N0.getOperand(1).getOperand(1) == N1)
2994  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2995  N0.getOperand(1).getOperand(0));
2996 
2997  // fold ((A-(B-C))-C) -> A-B
2998  if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2999  N0.getOperand(1).getOperand(1) == N1)
3000  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3001  N0.getOperand(1).getOperand(0));
3002 
3003  // fold (A-(B-C)) -> A+(C-B)
3004  if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3005  return DAG.getNode(ISD::ADD, DL, VT, N0,
3006  DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3007  N1.getOperand(0)));
3008 
3009  // fold (X - (-Y * Z)) -> (X + (Y * Z))
3010  if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3011  if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3013  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3014  N1.getOperand(0).getOperand(1),
3015  N1.getOperand(1));
3016  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3017  }
3018  if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3020  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3021  N1.getOperand(0),
3022  N1.getOperand(1).getOperand(1));
3023  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3024  }
3025  }
3026 
3027  // If either operand of a sub is undef, the result is undef
3028  if (N0.isUndef())
3029  return N0;
3030  if (N1.isUndef())
3031  return N1;
3032 
3033  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3034  return V;
3035 
3036  if (SDValue V = foldAddSubOfSignBit(N, DAG))
3037  return V;
3038 
3039  if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3040  return V;
3041 
3042  // (x - y) - 1 -> add (xor y, -1), x
3043  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3044  SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3045  DAG.getAllOnesConstant(DL, VT));
3046  return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3047  }
3048 
3049  // Look for:
3050  // sub y, (xor x, -1)
3051  // And if the target does not like this form then turn into:
3052  // add (add x, y), 1
3053  if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3054  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3055  return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3056  }
3057 
3058  // Hoist one-use addition by non-opaque constant:
3059  // (x + C) - y -> (x - y) + C
3060  if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3061  isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3062  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3063  return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3064  }
3065  // y - (x + C) -> (y - x) - C
3066  if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3067  isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3068  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3069  return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3070  }
3071  // (x - C) - y -> (x - y) - C
3072  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3073  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3074  isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3075  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3076  return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3077  }
3078  // (C - x) - y -> C - (x + y)
3079  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3080  isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3081  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3082  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3083  }
3084 
3085  // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3086  // rather than 'sub 0/1' (the sext should get folded).
3087  // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3088  if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3089  N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3090  TLI.getBooleanContents(VT) ==
3092  SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3093  return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3094  }
3095 
3096  // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3097  if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3098  if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3099  SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3100  SDValue S0 = N1.getOperand(0);
3101  if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
3102  unsigned OpSizeInBits = VT.getScalarSizeInBits();
3104  if (C->getAPIntValue() == (OpSizeInBits - 1))
3105  return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3106  }
3107  }
3108  }
3109 
3110  // If the relocation model supports it, consider symbol offsets.
3111  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3112  if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3113  // fold (sub Sym, c) -> Sym-c
3114  if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3115  return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3116  GA->getOffset() -
3117  (uint64_t)N1C->getSExtValue());
3118  // fold (sub Sym+c1, Sym+c2) -> c1-c2
3119  if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3120  if (GA->getGlobal() == GB->getGlobal())
3121  return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3122  DL, VT);
3123  }
3124 
3125  // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3126  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3127  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3128  if (TN->getVT() == MVT::i1) {
3129  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3130  DAG.getConstant(1, DL, VT));
3131  return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3132  }
3133  }
3134 
3135  // Prefer an add for more folding potential and possibly better codegen:
3136  // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3137  if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3138  SDValue ShAmt = N1.getOperand(1);
3139  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3140  if (ShAmtC &&
3141  ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3142  SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3143  return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3144  }
3145  }
3146 
3147  if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3148  // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
3149  if (SDValue Carry = getAsCarry(TLI, N0)) {
3150  SDValue X = N1;
3151  SDValue Zero = DAG.getConstant(0, DL, VT);
3152  SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3153  return DAG.getNode(ISD::ADDCARRY, DL,
3154  DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3155  Carry);
3156  }
3157  }
3158 
3159  return SDValue();
3160 }
3161 
3162 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3163  SDValue N0 = N->getOperand(0);
3164  SDValue N1 = N->getOperand(1);
3165  EVT VT = N0.getValueType();
3166  SDLoc DL(N);
3167 
3168  // fold vector ops
3169  if (VT.isVector()) {
3170  // TODO SimplifyVBinOp
3171 
3172  // fold (sub_sat x, 0) -> x, vector edition
3174  return N0;
3175  }
3176 
3177  // fold (sub_sat x, undef) -> 0
3178  if (N0.isUndef() || N1.isUndef())
3179  return DAG.getConstant(0, DL, VT);
3180 
3181  // fold (sub_sat x, x) -> 0
3182  if (N0 == N1)
3183  return DAG.getConstant(0, DL, VT);
3184 
3187  // fold (sub_sat c1, c2) -> c3
3188  return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(),
3189  N1.getNode());
3190  }
3191 
3192  // fold (sub_sat x, 0) -> x
3193  if (isNullConstant(N1))
3194  return N0;
3195 
3196  return SDValue();
3197 }
3198 
3199 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3200  SDValue N0 = N->getOperand(0);
3201  SDValue N1 = N->getOperand(1);
3202  EVT VT = N0.getValueType();
3203  SDLoc DL(N);
3204 
3205  // If the flag result is dead, turn this into an SUB.
3206  if (!N->hasAnyUseOfValue(1))
3207  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3208  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3209 
3210  // fold (subc x, x) -> 0 + no borrow
3211  if (N0 == N1)
3212  return CombineTo(N, DAG.getConstant(0, DL, VT),
3213  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3214 
3215  // fold (subc x, 0) -> x + no borrow
3216  if (isNullConstant(N1))
3217  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3218 
3219  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3220  if (isAllOnesConstant(N0))
3221  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3222  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3223 
3224  return SDValue();
3225 }
3226 
3227 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3228  SDValue N0 = N->getOperand(0);
3229  SDValue N1 = N->getOperand(1);
3230  EVT VT = N0.getValueType();
3231  bool IsSigned = (ISD::SSUBO == N->getOpcode());
3232 
3233  EVT CarryVT = N->getValueType(1);
3234  SDLoc DL(N);
3235 
3236  // If the flag result is dead, turn this into an SUB.
3237  if (!N->hasAnyUseOfValue(1))
3238  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3239  DAG.getUNDEF(CarryVT));
3240 
3241  // fold (subo x, x) -> 0 + no borrow
3242  if (N0 == N1)
3243  return CombineTo(N, DAG.getConstant(0, DL, VT),
3244  DAG.getConstant(0, DL, CarryVT));
3245 
3247 
3248  // fold (subox, c) -> (addo x, -c)
3249  if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3250  return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3251  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3252  }
3253 
3254  // fold (subo x, 0) -> x + no borrow
3255  if (isNullOrNullSplat(N1))
3256  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3257 
3258  // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3259  if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3260  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3261  DAG.getConstant(0, DL, CarryVT));
3262 
3263  return SDValue();
3264 }
3265 
3266 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3267  SDValue N0 = N->getOperand(0);
3268  SDValue N1 = N->getOperand(1);
3269  SDValue CarryIn = N->getOperand(2);
3270 
3271  // fold (sube x, y, false) -> (subc x, y)
3272  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3273  return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3274 
3275  return SDValue();
3276 }
3277 
3278 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3279  SDValue N0 = N->getOperand(0);
3280  SDValue N1 = N->getOperand(1);
3281  SDValue CarryIn = N->getOperand(2);
3282 
3283  // fold (subcarry x, y, false) -> (usubo x, y)
3284  if (isNullConstant(CarryIn)) {
3285  if (!LegalOperations ||
3287  return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3288  }
3289 
3290  return SDValue();
3291 }
3292 
3293 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3294 // UMULFIXSAT here.
3295 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3296  SDValue N0 = N->getOperand(0);
3297  SDValue N1 = N->getOperand(1);
3298  SDValue Scale = N->getOperand(2);
3299  EVT VT = N0.getValueType();
3300 
3301  // fold (mulfix x, undef, scale) -> 0
3302  if (N0.isUndef() || N1.isUndef())
3303  return DAG.getConstant(0, SDLoc(N), VT);
3304 
3305  // Canonicalize constant to RHS (vector doesn't have to splat)
3308  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3309 
3310  // fold (mulfix x, 0, scale) -> 0
3311  if (isNullConstant(N1))
3312  return DAG.getConstant(0, SDLoc(N), VT);
3313 
3314  return SDValue();
3315 }
3316 
3317 SDValue DAGCombiner::visitMUL(SDNode *N) {
3318  SDValue N0 = N->getOperand(0);
3319  SDValue N1 = N->getOperand(1);
3320  EVT VT = N0.getValueType();
3321 
3322  // fold (mul x, undef) -> 0
3323  if (N0.isUndef() || N1.isUndef())
3324  return DAG.getConstant(0, SDLoc(N), VT);
3325 
3326  bool N0IsConst = false;
3327  bool N1IsConst = false;
3328  bool N1IsOpaqueConst = false;
3329  bool N0IsOpaqueConst = false;
3330  APInt ConstValue0, ConstValue1;
3331  // fold vector ops
3332  if (VT.isVector()) {
3333  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3334  return FoldedVOp;
3335 
3336  N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
3337  N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3338  assert((!N0IsConst ||
3339  ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
3340  "Splat APInt should be element width");
3341  assert((!N1IsConst ||
3342  ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3343  "Splat APInt should be element width");
3344  } else {
3345  N0IsConst = isa<ConstantSDNode>(N0);
3346  if (N0IsConst) {
3347  ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
3348  N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
3349  }
3350  N1IsConst = isa<ConstantSDNode>(N1);
3351  if (N1IsConst) {
3352  ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3353  N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3354  }
3355  }
3356 
3357  // fold (mul c1, c2) -> c1*c2
3358  if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
3359  return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
3360  N0.getNode(), N1.getNode());
3361 
3362  // canonicalize constant to RHS (vector doesn't have to splat)
3365  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3366  // fold (mul x, 0) -> 0
3367  if (N1IsConst && ConstValue1.isNullValue())
3368  return N1;
3369  // fold (mul x, 1) -> x
3370  if (N1IsConst && ConstValue1.isOneValue())
3371  return N0;
3372 
3373  if (SDValue NewSel = foldBinOpIntoSelect(N))
3374  return NewSel;
3375 
3376  // fold (mul x, -1) -> 0-x
3377  if (N1IsConst && ConstValue1.isAllOnesValue()) {
3378  SDLoc DL(N);
3379  return DAG.getNode(ISD::SUB, DL, VT,
3380  DAG.getConstant(0, DL, VT), N0);
3381  }
3382  // fold (mul x, (1 << c)) -> x << c
3383  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3384  DAG.isKnownToBeAPowerOfTwo(N1) &&
3385  (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3386  SDLoc DL(N);
3387  SDValue LogBase2 = BuildLogBase2(N1, DL);
3388  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3389  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3390  return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3391  }
3392  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3393  if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3394  unsigned Log2Val = (-ConstValue1).logBase2();
3395  SDLoc DL(N);
3396  // FIXME: If the input is something that is easily negated (e.g. a
3397  // single-use add), we should put the negate there.
3398  return DAG.getNode(ISD::SUB, DL, VT,
3399  DAG.getConstant(0, DL, VT),
3400  DAG.getNode(ISD::SHL, DL, VT, N0,
3401  DAG.getConstant(Log2Val, DL,
3402  getShiftAmountTy(N0.getValueType()))));
3403  }
3404 
3405  // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3406  // mul x, (2^N + 1) --> add (shl x, N), x
3407  // mul x, (2^N - 1) --> sub (shl x, N), x
3408  // Examples: x * 33 --> (x << 5) + x
3409  // x * 15 --> (x << 4) - x
3410  // x * -33 --> -((x << 5) + x)
3411  // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3412  if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3413  // TODO: We could handle more general decomposition of any constant by
3414  // having the target set a limit on number of ops and making a
3415  // callback to determine that sequence (similar to sqrt expansion).
3416  unsigned MathOp = ISD::DELETED_NODE;
3417  APInt MulC = ConstValue1.abs();
3418  if ((MulC - 1).isPowerOf2())
3419  MathOp = ISD::ADD;
3420  else if ((MulC + 1).isPowerOf2())
3421  MathOp = ISD::SUB;
3422 
3423  if (MathOp != ISD::DELETED_NODE) {
3424  unsigned ShAmt =
3425  MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3426  assert(ShAmt < VT.getScalarSizeInBits() &&
3427  "multiply-by-constant generated out of bounds shift");
3428  SDLoc DL(N);
3429  SDValue Shl =
3430  DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3431  SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
3432  if (ConstValue1.isNegative())
3433  R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3434  return R;
3435  }
3436  }
3437 
3438  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3439  if (N0.getOpcode() == ISD::SHL &&
3440  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3441  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3442  SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3444  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3445  }
3446 
3447  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3448  // use.
3449  {
3450  SDValue Sh(nullptr, 0), Y(nullptr, 0);
3451 
3452  // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
3453  if (N0.getOpcode() == ISD::SHL &&
3455  N0.getNode()->hasOneUse()) {
3456  Sh = N0; Y = N1;
3457  } else if (N1.getOpcode() == ISD::SHL &&
3459  N1.getNode()->hasOneUse()) {
3460  Sh = N1; Y = N0;
3461  }
3462 
3463  if (Sh.getNode()) {
3464  SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3465  return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3466  }
3467  }
3468 
3469  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3471  N0.getOpcode() == ISD::ADD &&
3473  isMulAddWithConstProfitable(N, N0, N1))
3474  return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3475  DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3476  N0.getOperand(0), N1),
3477  DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3478  N0.getOperand(1), N1));
3479 
3480  // reassociate mul
3481  if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3482  return RMUL;
3483 
3484  return SDValue();
3485 }
3486 
3487 /// Return true if divmod libcall is available.
3488 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3489  const TargetLowering &TLI) {
3490  RTLIB::Libcall LC;
3491  EVT NodeType = Node->getValueType(0);
3492  if (!NodeType.isSimple())
3493  return false;
3494  switch (NodeType.getSimpleVT().SimpleTy) {
3495  default: return false; // No libcall for vector types.
3496  case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
3497  case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3498  case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3499  case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3500  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3501  }
3502 
3503  return TLI.getLibcallName(LC) != nullptr;
3504 }
3505 
3506 /// Issue divrem if both quotient and remainder are needed.
3507 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3508  if (Node->use_empty())
3509  return SDValue(); // This is a dead node, leave it alone.
3510 
3511  unsigned Opcode = Node->getOpcode();
3512  bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3513  unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3514 
3515  // DivMod lib calls can still work on non-legal types if using lib-calls.
3516  EVT VT = Node->getValueType(0);
3517  if (VT.isVector() || !VT.isInteger())
3518  return SDValue();
3519 
3520  if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3521  return SDValue();
3522 
3523  // If DIVREM is going to get expanded into a libcall,
3524  // but there is no libcall available, then don't combine.
3525  if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3526  !isDivRemLibcallAvailable(Node, isSigned, TLI))
3527  return SDValue();
3528 
3529  // If div is legal, it's better to do the normal expansion
3530  unsigned OtherOpcode = 0;
3531  if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3532  OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3533  if (TLI.isOperationLegalOrCustom(Opcode, VT))
3534  return SDValue();
3535  } else {
3536  OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3537  if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3538  return SDValue();
3539  }
3540 
3541  SDValue Op0 = Node->getOperand(0);
3542  SDValue Op1 = Node->getOperand(1);
3543  SDValue combined;
3544  for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3545  UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3546  SDNode *User = *UI;
3547  if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3548  User->use_empty())
3549  continue;
3550  // Convert the other matching node(s), too;
3551  // otherwise, the DIVREM may get target-legalized into something
3552  // target-specific that we won't be able to recognize.
3553  unsigned UserOpc = User->getOpcode();
3554  if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3555  User->getOperand(0) == Op0 &&
3556  User->getOperand(1) == Op1) {
3557  if (!combined) {
3558  if (UserOpc == OtherOpcode) {
3559  SDVTList VTs = DAG.getVTList(VT, VT);
3560  combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3561  } else if (UserOpc == DivRemOpc) {
3562  combined = SDValue(User, 0);
3563  } else {
3564  assert(UserOpc == Opcode);
3565  continue;
3566  }
3567  }
3568  if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3569  CombineTo(User, combined);
3570  else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3571  CombineTo(User, combined.getValue(1));
3572  }
3573  }
3574  return combined;
3575 }
3576 
3578  SDValue N0 = N->getOperand(0);
3579  SDValue N1 = N->getOperand(1);
3580  EVT VT = N->getValueType(0);
3581  SDLoc DL(N);
3582 
3583  unsigned Opc = N->getOpcode();
3584  bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
3586 
3587  // X / undef -> undef
3588  // X % undef -> undef
3589  // X / 0 -> undef
3590  // X % 0 -> undef
3591  // NOTE: This includes vectors where any divisor element is zero/undef.
3592  if (DAG.isUndef(Opc, {N0, N1}))
3593  return DAG.getUNDEF(VT);
3594 
3595  // undef / X -> 0
3596  // undef % X -> 0
3597  if (N0.isUndef())
3598  return DAG.getConstant(0, DL, VT);
3599 
3600  // 0 / X -> 0
3601  // 0 % X -> 0
3603  if (N0C && N0C->isNullValue())
3604  return N0;
3605 
3606  // X / X -> 1
3607  // X % X -> 0
3608  if (N0 == N1)
3609  return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
3610 
3611  // X / 1 -> X
3612  // X % 1 -> 0
3613  // If this is a boolean op (single-bit element type), we can't have
3614  // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3615  // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3616  // it's a 1.
3617  if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
3618  return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
3619 
3620  return SDValue();
3621 }
3622 
3623 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3624  SDValue N0 = N->getOperand(0);
3625  SDValue N1 = N->getOperand(1);
3626  EVT VT = N->getValueType(0);
3627  EVT CCVT = getSetCCResultType(VT);
3628 
3629  // fold vector ops
3630  if (VT.isVector())
3631  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3632  return FoldedVOp;
3633 
3634  SDLoc DL(N);
3635 
3636  // fold (sdiv c1, c2) -> c1/c2
3639  if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
3640  return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
3641  // fold (sdiv X, -1) -> 0-X
3642  if (N1C && N1C->isAllOnesValue())
3643  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3644  // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3645  if (N1C && N1C->getAPIntValue().isMinSignedValue())
3646  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3647  DAG.getConstant(1, DL, VT),
3648  DAG.getConstant(0, DL, VT));
3649 
3650  if (SDValue V = simplifyDivRem(N, DAG))
3651  return V;
3652 
3653  if (SDValue NewSel = foldBinOpIntoSelect(N))
3654  return NewSel;
3655 
3656  // If we know the sign bits of both operands are zero, strength reduce to a
3657  // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
3658  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3659  return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3660 
3661  if (SDValue V = visitSDIVLike(N0, N1, N)) {
3662  // If the corresponding remainder node exists, update its users with
3663  // (Dividend - (Quotient * Divisor).
3664  if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
3665  { N0, N1 })) {
3666  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3667  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3668  AddToWorklist(Mul.getNode());
3669  AddToWorklist(Sub.getNode());
3670  CombineTo(RemNode, Sub);
3671  }
3672  return V;
3673  }
3674 
3675  // sdiv, srem -> sdivrem
3676  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3677  // true. Otherwise, we break the simplification logic in visitREM().
3679  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3680  if (SDValue DivRem = useDivRem(N))
3681  return DivRem;
3682 
3683  return SDValue();
3684 }
3685 
3686 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3687  SDLoc DL(N);
3688  EVT VT = N->getValueType(0);
3689  EVT CCVT = getSetCCResultType(VT);
3690  unsigned BitWidth = VT.getScalarSizeInBits();
3691 
3692  // Helper for determining whether a value is a power-2 constant scalar or a
3693  // vector of such elements.
3694  auto IsPowerOfTwo = [](ConstantSDNode *C) {
3695  if (C->isNullValue() || C->isOpaque())
3696  return false;
3697  if (C->getAPIntValue().isPowerOf2())
3698  return true;
3699  if ((-C->getAPIntValue()).isPowerOf2())
3700  return true;
3701  return false;
3702  };
3703 
3704  // fold (sdiv X, pow2) -> simple ops after legalize
3705  // FIXME: We check for the exact bit here because the generic lowering gives
3706  // better results in that case. The target-specific lowering should learn how
3707  // to handle exact sdivs efficiently.
3708  if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3709  // Target-specific implementation of sdiv x, pow2.
3710  if (SDValue Res = BuildSDIVPow2(N))
3711  return Res;
3712 
3713  // Create constants that are functions of the shift amount value.
3714  EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3715  SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3716  SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3717  C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3718  SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3719  if (!isConstantOrConstantVector(Inexact))
3720  return SDValue();
3721 
3722  // Splat the sign bit into the register
3723  SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3724  DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3725  AddToWorklist(Sign.getNode());
3726 
3727  // Add (N0 < 0) ? abs2 - 1 : 0;
3728  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3729  AddToWorklist(Srl.getNode());
3730  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3731  AddToWorklist(Add.getNode());
3732  SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3733  AddToWorklist(Sra.getNode());
3734 
3735  // Special case: (sdiv X, 1) -> X
3736  // Special Case: (sdiv X, -1) -> 0-X
3737  SDValue One = DAG.getConstant(1, DL, VT);
3738  SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3739  SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3740  SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3741  SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3742  Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3743 
3744  // If dividing by a positive value, we're done. Otherwise, the result must
3745  // be negated.
3746  SDValue Zero = DAG.getConstant(0, DL, VT);
3747  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3748 
3749  // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3750  SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3751  SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3752  return Res;
3753  }
3754 
3755  // If integer divide is expensive and we satisfy the requirements, emit an
3756  // alternate sequence. Targets may check function attributes for size/speed
3757  // trade-offs.
3759  if (isConstantOrConstantVector(N1) &&
3760  !TLI.isIntDivCheap(N->getValueType(0), Attr))
3761  if (SDValue Op = BuildSDIV(N))
3762  return Op;
3763 
3764  return SDValue();
3765 }
3766 
3767 SDValue DAGCombiner::visitUDIV(SDNode *N) {
3768  SDValue N0 = N->getOperand(0);
3769  SDValue N1 = N->getOperand(1);
3770  EVT VT = N->getValueType(0);
3771  EVT CCVT = getSetCCResultType(VT);
3772 
3773  // fold vector ops
3774  if (VT.isVector())
3775  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3776  return FoldedVOp;
3777 
3778  SDLoc DL(N);
3779 
3780  // fold (udiv c1, c2) -> c1/c2
3783  if (N0C && N1C)
3784  if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
3785  N0C, N1C))
3786  return Folded;
3787  // fold (udiv X, -1) -> select(X == -1, 1, 0)
3788  if (N1C && N1C->getAPIntValue().isAllOnesValue())
3789  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3790  DAG.getConstant(1, DL, VT),
3791  DAG.getConstant(0, DL, VT));
3792 
3793  if (SDValue V = simplifyDivRem(N, DAG))
3794  return V;
3795 
3796  if (SDValue NewSel = foldBinOpIntoSelect(N))
3797  return NewSel;
3798 
3799  if (SDValue V = visitUDIVLike(N0, N1, N)) {
3800  // If the corresponding remainder node exists, update its users with
3801  // (Dividend - (Quotient * Divisor).
3802  if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
3803  { N0, N1 })) {
3804  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3805  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3806  AddToWorklist(Mul.getNode());
3807  AddToWorklist(Sub.getNode());
3808  CombineTo(RemNode, Sub);
3809  }
3810  return V;
3811  }
3812 
3813  // sdiv, srem -> sdivrem
3814  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3815  // true. Otherwise, we break the simplification logic in visitREM().
3817  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3818  if (SDValue DivRem = useDivRem(N))
3819  return DivRem;
3820 
3821  return SDValue();
3822 }
3823 
3824 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3825  SDLoc DL(N);
3826  EVT VT = N->getValueType(0);
3827 
3828  // fold (udiv x, (1 << c)) -> x >>u c
3829  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3830  DAG.isKnownToBeAPowerOfTwo(N1)) {
3831  SDValue LogBase2 = BuildLogBase2(N1, DL);
3832  AddToWorklist(LogBase2.getNode());
3833 
3834  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3835  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3836  AddToWorklist(Trunc.getNode());
3837  return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3838  }
3839 
3840  // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3841  if (N1.getOpcode() == ISD::SHL) {
3842  SDValue N10 = N1.getOperand(0);
3843  if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3844  DAG.isKnownToBeAPowerOfTwo(N10)) {
3845  SDValue LogBase2 = BuildLogBase2(N10, DL);
3846  AddToWorklist(LogBase2.getNode());
3847 
3848  EVT ADDVT = N1.getOperand(1).getValueType();
3849  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3850  AddToWorklist(Trunc.getNode());
3851  SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3852  AddToWorklist(Add.getNode());
3853  return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3854  }
3855  }
3856 
3857  // fold (udiv x, c) -> alternate
3859  if (isConstantOrConstantVector(N1) &&
3860  !TLI.isIntDivCheap(N->getValueType(0), Attr))
3861  if (SDValue Op = BuildUDIV(N))
3862  return Op;
3863 
3864  return SDValue();
3865 }
3866 
3867 // handles ISD::SREM and ISD::UREM
3868 SDValue DAGCombiner::visitREM(SDNode *N) {
3869  unsigned Opcode = N->getOpcode();
3870  SDValue N0 = N->getOperand(0);
3871  SDValue N1 = N->getOperand(1);
3872  EVT VT = N->getValueType(0);
3873  EVT CCVT = getSetCCResultType(VT);
3874 
3875  bool isSigned = (Opcode == ISD::SREM);
3876  SDLoc DL(N);
3877 
3878  // fold (rem c1, c2) -> c1%c2
3881  if (N0C && N1C)
3882  if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3883  return Folded;
3884  // fold (urem X, -1) -> select(X == -1, 0, x)
3885  if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
3886  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3887  DAG.getConstant(0, DL, VT), N0);
3888 
3889  if (SDValue V = simplifyDivRem(N, DAG))
3890  return V;
3891 
3892  if (SDValue NewSel = foldBinOpIntoSelect(N))
3893  return NewSel;
3894 
3895  if (isSigned) {
3896  // If we know the sign bits of both operands are zero, strength reduce to a
3897  // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3898  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3899  return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3900  } else {
3901  SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3902  if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3903  // fold (urem x, pow2) -> (and x, pow2-1)
3904  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3905  AddToWorklist(Add.getNode());
3906  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3907  }
3908  if (N1.getOpcode() == ISD::SHL &&
3909  DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3910  // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3911  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3912  AddToWorklist(Add.getNode());
3913  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3914  }
3915  }
3916 
3918 
3919  // If X/C can be simplified by the division-by-constant logic, lower
3920  // X%C to the equivalent of X-X/C*C.
3921  // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
3922  // speculative DIV must not cause a DIVREM conversion. We guard against this
3923  // by skipping the simplification if isIntDivCheap(). When div is not cheap,
3924  // combine will not return a DIVREM. Regardless, checking cheapness here
3925  // makes sense since the simplification results in fatter code.
3926  if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
3927  SDValue OptimizedDiv =
3928  isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
3929  if (OptimizedDiv.getNode()) {
3930  // If the equivalent Div node also exists, update its users.
3931  unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3932  if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
3933  { N0, N1 }))
3934  CombineTo(DivNode, OptimizedDiv);
3935  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3936  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3937  AddToWorklist(OptimizedDiv.getNode());
3938  AddToWorklist(Mul.getNode());
3939  return Sub;
3940  }
3941  }
3942 
3943  // sdiv, srem -> sdivrem
3944  if (SDValue DivRem = useDivRem(N))
3945  return DivRem.getValue(1);
3946 
3947  return SDValue();
3948 }
3949 
3950 SDValue DAGCombiner::visitMULHS(SDNode *N) {
3951  SDValue N0 = N->getOperand(0);
3952  SDValue N1 = N->getOperand(1);
3953  EVT VT = N->getValueType(0);
3954  SDLoc DL(N);
3955 
3956  if (VT.isVector()) {
3957  // fold (mulhs x, 0) -> 0
3958  // do not return N0/N1, because undef node may exist.
3959  if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
3961  return DAG.getConstant(0, DL, VT);
3962  }
3963 
3964  // fold (mulhs x, 0) -> 0
3965  if (isNullConstant(N1))
3966  return N1;
3967  // fold (mulhs x, 1) -> (sra x, size(x)-1)
3968  if (isOneConstant(N1))
3969  return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3970  DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
3971  getShiftAmountTy(N0.getValueType())));
3972 
3973  // fold (mulhs x, undef) -> 0
3974  if (N0.isUndef() || N1.isUndef())
3975  return DAG.getConstant(0, DL, VT);
3976 
3977  // If the type twice as wide is legal, transform the mulhs to a wider multiply
3978  // plus a shift.
3979  if (VT.isSimple() && !VT.isVector()) {
3980  MVT Simple = VT.getSimpleVT();
3981  unsigned SimpleSize = Simple.getSizeInBits();
3982  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3983  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3984  N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3985  N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3986  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3987  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3988  DAG.getConstant(SimpleSize, DL,
3989  getShiftAmountTy(N1.getValueType())));
3990  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3991  }
3992  }
3993 
3994  return SDValue();
3995 }
3996 
3997 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3998  SDValue N0 = N->getOperand(0);
3999  SDValue N1 = N->getOperand(1);
4000  EVT VT = N->getValueType(0);
4001  SDLoc DL(N);
4002 
4003  if (VT.isVector()) {
4004  // fold (mulhu x, 0) -> 0
4005  // do not return N0/N1, because undef node may exist.
4006  if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
4008  return DAG.getConstant(0, DL, VT);
4009  }
4010 
4011  // fold (mulhu x, 0) -> 0
4012  if (isNullConstant(N1))
4013  return N1;
4014  // fold (mulhu x, 1) -> 0
4015  if (isOneConstant(N1))
4016  return DAG.getConstant(0, DL, N0.getValueType());
4017  // fold (mulhu x, undef) -> 0
4018  if (N0.isUndef() || N1.isUndef())
4019  return DAG.getConstant(0, DL, VT);
4020 
4021  // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4022  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4023  DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4024  unsigned NumEltBits = VT.getScalarSizeInBits();
4025  SDValue LogBase2 = BuildLogBase2(N1, DL);
4026  SDValue SRLAmt = DAG.getNode(
4027  ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4028  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4029  SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4030  return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4031  }
4032 
4033  // If the type twice as wide is legal, transform the mulhu to a wider multiply
4034  // plus a shift.
4035  if (VT.isSimple() && !VT.isVector()) {
4036  MVT Simple = VT.getSimpleVT();
4037  unsigned SimpleSize = Simple.getSizeInBits();
4038  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4039  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4040  N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4041  N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4042  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4043  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4044  DAG.getConstant(SimpleSize, DL,
4045  getShiftAmountTy(N1.getValueType())));
4046  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4047  }
4048  }
4049 
4050  return SDValue();
4051 }
4052 
4053 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4054 /// give the opcodes for the two computations that are being performed. Return
4055 /// true if a simplification was made.
4056 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4057  unsigned HiOp) {
4058  // If the high half is not needed, just compute the low half.
4059  bool HiExists = N->hasAnyUseOfValue(1);
4060  if (!HiExists && (!LegalOperations ||
4061  TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4062  SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4063  return CombineTo(N, Res, Res);
4064  }
4065 
4066  // If the low half is not needed, just compute the high half.
4067  bool LoExists = N->hasAnyUseOfValue(0);
4068  if (!LoExists && (!LegalOperations ||
4069  TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4070  SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4071  return CombineTo(N, Res, Res);
4072  }
4073 
4074  // If both halves are used, return as it is.
4075  if (LoExists && HiExists)
4076  return SDValue();
4077 
4078  // If the two computed results can be simplified separately, separate them.
4079  if (LoExists) {
4080  SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4081  AddToWorklist(Lo.getNode());
4082  SDValue LoOpt = combine(Lo.getNode());
4083  if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4084  (!LegalOperations ||
4085  TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4086  return CombineTo(N, LoOpt, LoOpt);
4087  }
4088 
4089  if (HiExists) {
4090  SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4091  AddToWorklist(Hi.getNode());
4092  SDValue HiOpt = combine(Hi.getNode());
4093  if (HiOpt.getNode() && HiOpt != Hi &&
4094  (!LegalOperations ||
4095  TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4096  return CombineTo(N, HiOpt, HiOpt);
4097  }
4098 
4099  return SDValue();
4100 }
4101 
4102 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4103  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4104  return Res;
4105 
4106  EVT VT = N->getValueType(0);
4107  SDLoc DL(N);
4108 
4109  // If the type is twice as wide is legal, transform the mulhu to a wider
4110  // multiply plus a shift.
4111  if (VT.isSimple() && !VT.isVector()) {
4112  MVT Simple = VT.getSimpleVT();
4113  unsigned SimpleSize = Simple.getSizeInBits();
4114  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4115  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4116  SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4117  SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4118  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4119  // Compute the high part as N1.
4120  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4121  DAG.getConstant(SimpleSize, DL,
4122  getShiftAmountTy(Lo.getValueType())));
4123  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4124  // Compute the low part as N0.
4125  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4126  return CombineTo(N, Lo, Hi);
4127  }
4128  }
4129 
4130  return SDValue();
4131 }
4132 
4133 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4134  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4135  return Res;
4136 
4137  EVT VT = N->getValueType(0);
4138  SDLoc DL(N);
4139 
4140  // (umul_lohi N0, 0) -> (0, 0)
4141  if (isNullConstant(N->getOperand(1))) {
4142  SDValue Zero = DAG.getConstant(0, DL, VT);
4143  return CombineTo(N, Zero, Zero);
4144  }
4145 
4146  // (umul_lohi N0, 1) -> (N0, 0)
4147  if (isOneConstant(N->getOperand(1))) {
4148  SDValue Zero = DAG.getConstant(0, DL, VT);
4149  return CombineTo(N, N->getOperand(0), Zero);
4150  }
4151 
4152  // If the type is twice as wide is legal, transform the mulhu to a wider
4153  // multiply plus a shift.
4154  if (VT.isSimple() && !VT.isVector()) {
4155  MVT Simple = VT.getSimpleVT();
4156  unsigned SimpleSize = Simple.getSizeInBits();
4157  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4158  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4159  SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4160  SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4161  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4162  // Compute the high part as N1.
4163  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4164  DAG.getConstant(SimpleSize, DL,
4165  getShiftAmountTy(Lo.getValueType())));
4166  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4167  // Compute the low part as N0.
4168  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4169  return CombineTo(N, Lo, Hi);
4170  }
4171  }
4172 
4173  return SDValue();
4174 }
4175 
4176 SDValue DAGCombiner::visitMULO(SDNode *N) {
4177  SDValue N0 = N->getOperand(0);
4178  SDValue N1 = N->getOperand(1);
4179  EVT VT = N0.getValueType();
4180  bool IsSigned = (ISD::SMULO == N->getOpcode());
4181 
4182  EVT CarryVT = N->getValueType(1);
4183  SDLoc DL(N);
4184 
4185  // canonicalize constant to RHS.
4188  return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
4189 
4190  // fold (mulo x, 0) -> 0 + no carry out
4191  if (isNullOrNullSplat(N1))
4192  return CombineTo(N, DAG.getConstant(0, DL, VT),
4193  DAG.getConstant(0, DL, CarryVT));
4194 
4195  // (mulo x, 2) -> (addo x, x)
4196  if (ConstantSDNode *C2 = isConstOrConstSplat(N1))
4197  if (C2->getAPIntValue() == 2)
4198  return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
4199  N->getVTList(), N0, N0);
4200 
4201  return SDValue();
4202 }
4203 
4204 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
4205  SDValue N0 = N->getOperand(0);
4206  SDValue N1 = N->getOperand(1);
4207  EVT VT = N0.getValueType();
4208 
4209  // fold vector ops
4210  if (VT.isVector())
4211  if (SDValue FoldedVOp = SimplifyVBinOp(N))
4212  return FoldedVOp;
4213 
4214  // fold operation with constant operands.
4217  if (N0C && N1C)
4218  return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
4219 
4220  // canonicalize constant to RHS
4223  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
4224 
4225  // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
4226  // Only do this if the current op isn't legal and the flipped is.
4227  unsigned Opcode = N->getOpcode();
4228  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4229  if (!TLI.isOperationLegal(Opcode, VT) &&
4230  (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
4231  (N1.isUndef() || DAG.SignBitIsZero(N1))) {
4232  unsigned AltOpcode;
4233  switch (Opcode) {
4234  case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4235  case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4236  case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4237  case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4238  default: llvm_unreachable("Unknown MINMAX opcode");
4239  }
4240  if (TLI.isOperationLegal(AltOpcode, VT))
4241  return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4242  }
4243 
4244  return SDValue();
4245 }
4246 
4247 /// If this is a bitwise logic instruction and both operands have the same
4248 /// opcode, try to sink the other opcode after the logic instruction.
4249 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4250  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4251  EVT VT = N0.getValueType();
4252  unsigned LogicOpcode = N->getOpcode();
4253  unsigned HandOpcode = N0.getOpcode();
4254  assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
4255  LogicOpcode == ISD::XOR) && "Expected logic opcode");
4256  assert(HandOpcode == N1.getOpcode() && "Bad input!");
4257 
4258  // Bail early if none of these transforms apply.
4259  if (N0.getNumOperands() == 0)
4260  return SDValue();
4261 
4262  // FIXME: We should check number of uses of the operands to not increase
4263  // the instruction count for all transforms.
4264 
4265  // Handle size-changing casts.
4266  SDValue X = N0.getOperand(0);
4267  SDValue Y = N1.getOperand(0);
4268  EVT XVT = X.getValueType();
4269  SDLoc DL(N);
4270  if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4271  HandOpcode == ISD::SIGN_EXTEND) {
4272  // If both operands have other uses, this transform would create extra
4273  // instructions without eliminating anything.
4274  if (!N0.hasOneUse() && !N1.hasOneUse())
4275  return SDValue();
4276  // We need matching integer source types.
4277  if (XVT != Y.getValueType())
4278  return SDValue();
4279  // Don't create an illegal op during or after legalization. Don't ever
4280  // create an unsupported vector op.
4281  if ((VT.isVector() || LegalOperations) &&
4282  !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4283  return SDValue();
4284  // Avoid infinite looping with PromoteIntBinOp.
4285  // TODO: Should we apply desirable/legal constraints to all opcodes?
4286  if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4287  !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4288  return SDValue();
4289  // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4290  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4291  return DAG.getNode(HandOpcode, DL, VT, Logic);
4292  }
4293 
4294  // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4295  if (HandOpcode == ISD::TRUNCATE) {
4296  // If both operands have other uses, this transform would create extra
4297  // instructions without eliminating anything.
4298  if (!N0.hasOneUse() && !N1.hasOneUse())
4299  return SDValue();
4300  // We need matching source types.
4301  if (XVT != Y.getValueType())
4302  return SDValue();
4303  // Don't create an illegal op during or after legalization.
4304  if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
4305  return SDValue();
4306  // Be extra careful sinking truncate. If it's free, there's no benefit in
4307  // widening a binop. Also, don't create a logic op on an illegal type.
4308  if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
4309  return SDValue();
4310  if (!TLI.isTypeLegal(XVT))
4311  return SDValue();
4312  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4313  return DAG.getNode(HandOpcode, DL, VT, Logic);
4314  }
4315 
4316  // For binops SHL/SRL/SRA/AND:
4317  // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4318  if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
4319  HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
4320  N0.getOperand(1) == N1.getOperand(1)) {
4321  // If either operand has other uses, this transform is not an improvement.
4322  if (!N0.hasOneUse() || !N1.hasOneUse())
4323  return SDValue();
4324  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4325  return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4326  }
4327 
4328  // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4329  if (HandOpcode == ISD::BSWAP) {
4330  // If either operand has other uses, this transform is not an improvement.
4331  if (!N0.hasOneUse() || !N1.hasOneUse())
4332  return SDValue();
4333  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4334  return DAG.getNode(HandOpcode, DL, VT, Logic);
4335  }
4336 
4337  // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4338  // Only perform this optimization up until type legalization, before
4339  // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4340  // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4341  // we don't want to undo this promotion.
4342  // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4343  // on scalars.
4344  if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
4345  Level <= AfterLegalizeTypes) {
4346  // Input types must be integer and the same.
4347  if (XVT.isInteger() && XVT == Y.getValueType() &&
4348  !(VT.isVector() && TLI.isTypeLegal(VT) &&
4349  !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
4350  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4351  return DAG.getNode(HandOpcode, DL, VT, Logic);
4352  }
4353  }
4354 
4355  // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4356  // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4357  // If both shuffles use the same mask, and both shuffle within a single
4358  // vector, then it is worthwhile to move the swizzle after the operation.
4359  // The type-legalizer generates this pattern when loading illegal
4360  // vector types from memory. In many cases this allows additional shuffle
4361  // optimizations.
4362  // There are other cases where moving the shuffle after the xor/and/or
4363  // is profitable even if shuffles don't perform a swizzle.
4364  // If both shuffles use the same mask, and both shuffles have the same first
4365  // or second operand, then it might still be profitable to move the shuffle
4366  // after the xor/and/or operation.
4367  if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4368  auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4369  auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4370  assert(X.getValueType() == Y.getValueType() &&
4371  "Inputs to shuffles are not the same type");
4372 
4373  // Check that both shuffles use the same mask. The masks are known to be of
4374  // the same length because the result vector type is the same.
4375  // Check also that shuffles have only one use to avoid introducing extra
4376  // instructions.
4377  if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
4378  !SVN0->getMask().equals(SVN1->getMask()))
4379  return SDValue();
4380 
4381  // Don't try to fold this node if it requires introducing a
4382  // build vector of all zeros that might be illegal at this stage.
4383  SDValue ShOp = N0.getOperand(1);
4384  if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4385  ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4386 
4387  // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4388  if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4389  SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4390  N0.getOperand(0), N1.getOperand(0));
4391  return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4392  }
4393 
4394  // Don't try to fold this node if it requires introducing a
4395  // build vector of all zeros that might be illegal at this stage.
4396  ShOp = N0.getOperand(0);
4397  if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4398  ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4399 
4400  // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4401  if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4402  SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4403  N1.getOperand(1));
4404  return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4405  }
4406  }
4407 
4408  return SDValue();
4409 }
4410 
4411 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4412 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4413  const SDLoc &DL) {
4414  SDValue LL, LR, RL, RR, N0CC, N1CC;
4415  if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4416  !isSetCCEquivalent(N1, RL, RR, N1CC))
4417  return SDValue();
4418 
4419  assert(N0.getValueType() == N1.getValueType() &&
4420  "Unexpected operand types for bitwise logic op");
4421  assert(LL.getValueType() == LR.getValueType() &&
4422  RL.getValueType() == RR.getValueType() &&
4423  "Unexpected operand types for setcc");
4424 
4425  // If we're here post-legalization or the logic op type is not i1, the logic
4426  // op type must match a setcc result type. Also, all folds require new
4427  // operations on the left and right operands, so those types must match.
4428  EVT VT = N0.getValueType();
4429  EVT OpVT = LL.getValueType();
4430  if (LegalOperations || VT.getScalarType() != MVT::i1)
4431  if (VT != getSetCCResultType(OpVT))
4432  return SDValue();
4433  if (OpVT != RL.getValueType())
4434  return SDValue();
4435 
4436  ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4437  ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4438  bool IsInteger = OpVT.isInteger();
4439  if (LR == RR && CC0 == CC1 && IsInteger) {
4440  bool IsZero = isNullOrNullSplat(LR);
4441  bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4442 
4443  // All bits clear?
4444  bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
4445  // All sign bits clear?
4446  bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
4447  // Any bits set?
4448  bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
4449  // Any sign bits set?
4450  bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
4451 
4452  // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
4453  // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4454  // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
4455  // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
4456  if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
4457  SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4458  AddToWorklist(Or.getNode());
4459  return DAG.getSetCC(DL, VT, Or, LR, CC1);
4460  }
4461 
4462  // All bits set?
4463  bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
4464  // All sign bits set?
4465  bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
4466  // Any bits clear?
4467  bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
4468  // Any sign bits clear?
4469  bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
4470 
4471  // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4472  // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
4473  // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4474  // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
4475  if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
4476  SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4477  AddToWorklist(And.getNode());
4478  return DAG.getSetCC(DL, VT, And, LR, CC1);
4479  }
4480  }
4481 
4482  // TODO: What is the 'or' equivalent of this fold?
4483  // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4484  if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
4485  IsInteger && CC0 == ISD::SETNE &&
4486  ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
4487  (isAllOnesConstant(LR) && isNullConstant(RR)))) {
4488  SDValue One = DAG.getConstant(1, DL, OpVT);
4489  SDValue Two = DAG.getConstant(2, DL, OpVT);
4490  SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4491  AddToWorklist(Add.getNode());
4492  return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4493  }
4494 
4495  // Try more general transforms if the predicates match and the only user of
4496  // the compares is the 'and' or 'or'.
4497  if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
4498  N0.hasOneUse() && N1.hasOneUse()) {
4499  // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4500  // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4501  if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
4502  SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4503  SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4504  SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4505  SDValue Zero = DAG.getConstant(0, DL, OpVT);
4506  return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4507  }
4508 
4509  // Turn compare of constants whose difference is 1 bit into add+and+setcc.
4510  // TODO - support non-uniform vector amounts.
4511  if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
4512  // Match a shared variable operand and 2 non-opaque constant operands.
4515  if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
4516  // Canonicalize larger constant as C0.
4517  if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
4518  std::swap(C0, C1);
4519 
4520  // The difference of the constants must be a single bit.
4521  const APInt &C0Val = C0->getAPIntValue();
4522  const APInt &C1Val = C1->getAPIntValue();
4523  if ((C0Val - C1Val).isPowerOf2()) {
4524  // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
4525  // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
4526  SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
4527  SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
4528  SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
4529  SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
4530  SDValue Zero = DAG.getConstant(0, DL, OpVT);
4531  return DAG.getSetCC(DL, VT, And, Zero, CC0);
4532  }
4533  }
4534  }
4535  }
4536 
4537  // Canonicalize equivalent operands to LL == RL.
4538  if (LL == RR && LR == RL) {
4539  CC1 = ISD::getSetCCSwappedOperands(CC1);
4540  std::swap(RL, RR);
4541  }
4542 
4543  // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4544  // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4545  if (LL == RL && LR == RR) {
4546  ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
4547  : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
4548  if (NewCC != ISD::SETCC_INVALID &&
4549  (!LegalOperations ||
4550  (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
4551  TLI.isOperationLegal(ISD::SETCC, OpVT))))
4552  return DAG.getSetCC(DL, VT, LL, LR, NewCC);
4553  }
4554 
4555  return SDValue();
4556 }
4557 
4558 /// This contains all DAGCombine rules which reduce two values combined by
4559 /// an And operation to a single value. This makes them reusable in the context
4560 /// of visitSELECT(). Rules involving constants are not included as
4561 /// visitSELECT() already handles those cases.
4562 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
4563  EVT VT = N1.getValueType();
4564  SDLoc DL(N);
4565 
4566  // fold (and x, undef) -> 0
4567  if (N0.isUndef() || N1.isUndef())
4568  return DAG.getConstant(0, DL, VT);
4569 
4570  if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4571  return V;
4572 
4573  if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
4574  VT.getSizeInBits() <= 64) {
4575  if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4576  if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4577  // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4578  // immediate for an add, but it is legal if its top c2 bits are set,
4579  // transform the ADD so the immediate doesn't need to be materialized
4580  // in a register.
4581  APInt ADDC = ADDI->getAPIntValue();
4582  APInt SRLC = SRLI->getAPIntValue();
4583  if (ADDC.getMinSignedBits() <= 64 &&
4584  SRLC.ult(VT.getSizeInBits()) &&
4585  !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4587  SRLC.getZExtValue());
4588  if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4589  ADDC |= Mask;
4590  if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4591  SDLoc DL0(N0);
4592  SDValue NewAdd =
4593  DAG.getNode(ISD::ADD, DL0, VT,
4594  N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4595  CombineTo(N0.getNode(), NewAdd);
4596  // Return N so it doesn't get rechecked!
4597  return SDValue(N, 0);
4598  }
4599  }
4600  }
4601  }
4602  }
4603  }
4604 
4605  // Reduce bit extract of low half of an integer to the narrower type.
4606  // (and (srl i64:x, K), KMask) ->
4607  // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4608  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4609  if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4610  if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4611  unsigned Size = VT.getSizeInBits();
4612  const APInt &AndMask = CAnd->getAPIntValue();
4613  unsigned ShiftBits = CShift->getZExtValue();
4614 
4615  // Bail out, this node will probably disappear anyway.
4616  if (ShiftBits == 0)
4617  return SDValue();
4618 
4619  unsigned MaskBits = AndMask.countTrailingOnes();
4620  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4621 
4622  if (AndMask.isMask() &&
4623  // Required bits must not span the two halves of the integer and
4624  // must fit in the half size type.
4625  (ShiftBits + MaskBits <= Size / 2) &&
4626  TLI.isNarrowingProfitable(VT, HalfVT) &&
4627  TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4628  TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4629  TLI.isTruncateFree(VT, HalfVT) &&
4630  TLI.isZExtFree(HalfVT, VT)) {
4631  // The isNarrowingProfitable is to avoid regressions on PPC and
4632  // AArch64 which match a few 64-bit bit insert / bit extract patterns
4633  // on downstream users of this. Those patterns could probably be
4634  // extended to handle extensions mixed in.
4635 
4636  SDValue SL(N0);
4637  assert(MaskBits <= Size);
4638 
4639  // Extracting the highest bit of the low half.
4640  EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4641  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4642  N0.getOperand(0));
4643 
4644  SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4645  SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4646  SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4647  SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4648  return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4649  }
4650  }
4651  }
4652  }
4653 
4654  return SDValue();
4655 }
4656 
4657 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4658  EVT LoadResultTy, EVT &ExtVT) {
4659  if (!AndC->getAPIntValue().isMask())
4660  return false;
4661 
4662  unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4663 
4664  ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4665  EVT LoadedVT = LoadN->getMemoryVT();
4666 
4667  if (ExtVT == LoadedVT &&
4668  (!LegalOperations ||
4669  TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4670  // ZEXTLOAD will match without needing to change the size of the value being
4671  // loaded.
4672  return true;
4673  }
4674 
4675  // Do not change the width of a volatile or atomic loads.
4676  if (!LoadN->isSimple())
4677  return false;
4678 
4679  // Do not generate loads of non-round integer types since these can
4680  // be expensive (and would be wrong if the type is not byte sized).
4681  if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4682  return false;
4683 
4684  if (LegalOperations &&
4685  !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4686  return false;
4687 
4688  if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4689  return false;
4690 
4691  return true;
4692 }
4693 
4694 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4695  ISD::LoadExtType ExtType, EVT &MemVT,
4696  unsigned ShAmt) {
4697  if (!LDST)
4698  return false;
4699  // Only allow byte offsets.
4700  if (ShAmt % 8)
4701  return false;
4702 
4703  // Do not generate loads of non-round integer types since these can
4704  // be expensive (and would be wrong if the type is not byte sized).
4705  if (!MemVT.isRound())
4706  return false;
4707 
4708  // Don't change the width of a volatile or atomic loads.
4709  if (!LDST->isSimple())
4710  return false;
4711 
4712  // Verify that we are actually reducing a load width here.
4713  if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4714  return false;
4715 
4716  // Ensure that this isn't going to produce an unsupported memory access.
4717  if (ShAmt &&
4718  !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4719  LDST->getAddressSpace(), ShAmt / 8,
4720  LDST->getMemOperand()->getFlags()))
4721  return false;
4722 
4723  // It's not possible to generate a constant of extended or untyped type.
4724  EVT PtrType = LDST->getBasePtr().getValueType();
4725  if (PtrType == MVT::Untyped || PtrType.isExtended())
4726  return false;
4727 
4728  if (isa<LoadSDNode>(LDST)) {
4729  LoadSDNode *Load = cast<LoadSDNode>(LDST);
4730  // Don't transform one with multiple uses, this would require adding a new
4731  // load.
4732  if (!SDValue(Load, 0).hasOneUse())
4733  return false;
4734 
4735  if (LegalOperations &&
4736  !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4737  return false;
4738 
4739  // For the transform to be legal, the load must produce only two values
4740  // (the value loaded and the chain). Don't transform a pre-increment
4741  // load, for example, which produces an extra value. Otherwise the
4742  // transformation is not equivalent, and the downstream logic to replace
4743  // uses gets things wrong.
4744  if (Load->getNumValues() > 2)
4745  return false;
4746 
4747  // If the load that we're shrinking is an extload and we're not just
4748  // discarding the extension we can't simply shrink the load. Bail.
4749  // TODO: It would be possible to merge the extensions in some cases.
4750  if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4751  Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4752  return false;
4753 
4754  if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4755  return false;
4756  } else {
4757  assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4758  StoreSDNode *Store = cast<StoreSDNode>(LDST);
4759  // Can't write outside the original store
4760  if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4761  return false;
4762 
4763  if (LegalOperations &&
4764  !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4765  return false;
4766  }
4767  return true;
4768 }
4769 
4770 bool DAGCombiner::SearchForAndLoads(SDNode *N,
4772  SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4774  SDNode *&NodeToMask) {
4775  // Recursively search for the operands, looking for loads which can be
4776  // narrowed.
4777  for (SDValue Op : N->op_values()) {
4778  if (Op.getValueType().isVector())
4779  return false;
4780 
4781  // Some constants may need fixing up later if they are too large.
4782  if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4783  if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4784  (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4785  NodesWithConsts.insert(N);
4786  continue;
4787  }
4788 
4789  if (!Op.hasOneUse())
4790  return false;
4791 
4792  switch(Op.getOpcode()) {
4793  case ISD::LOAD: {
4794  auto *Load = cast<LoadSDNode>(Op);
4795  EVT ExtVT;
4796  if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
4797  isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
4798 
4799  // ZEXTLOAD is already small enough.
4800  if (Load->getExtensionType() == ISD::ZEXTLOAD &&
4801  ExtVT.bitsGE(Load->getMemoryVT()))
4802  continue;
4803 
4804  // Use LE to convert equal sized loads to zext.
4805  if (ExtVT.bitsLE(Load->getMemoryVT()))
4806  Loads.push_back(Load);
4807 
4808  continue;
4809  }
4810  return false;
4811  }
4812  case ISD::ZERO_EXTEND:
4813  case ISD::AssertZext: {
4814  unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
4815  EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4816  EVT VT = Op.getOpcode() == ISD::AssertZext ?
4817  cast<VTSDNode>(Op.getOperand(1))->getVT() :
4818  Op.getOperand(0).getValueType();
4819 
4820  // We can accept extending nodes if the mask is wider or an equal
4821  // width to the original type.
4822  if (ExtVT.bitsGE(VT))
4823  continue;
4824  break;
4825  }
4826  case ISD::OR:
4827  case ISD::XOR:
4828  case ISD::AND:
4829  if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
4830  NodeToMask))
4831  return false;
4832  continue;
4833  }
4834 
4835  // Allow one node which will masked along with any loads found.
4836  if (NodeToMask)
4837  return false;
4838 
4839  // Also ensure that the node to be masked only produces one data result.
4840  NodeToMask = Op.getNode();
4841  if (NodeToMask->getNumValues() > 1) {
4842  bool HasValue = false;
4843  for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
4844  MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
4845  if (VT != MVT::Glue && VT != MVT::Other) {
4846  if (HasValue) {
4847  NodeToMask = nullptr;
4848  return false;
4849  }
4850  HasValue = true;
4851  }
4852  }
4853  assert(HasValue && "Node to be masked has no data result?");
4854  }
4855  }
4856  return true;
4857 }
4858 
4859 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
4860  auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
4861  if (!Mask)
4862  return false;
4863 
4864  if (!Mask->getAPIntValue().isMask())
4865  return false;
4866 
4867  // No need to do anything if the and directly uses a load.